From 415843476c9a71cbbe26639c2ab1df8c7c72d354 Mon Sep 17 00:00:00 2001
From: Kaushik Deka <55996465+Kaushikdkrikhanu@users.noreply.github.com>
Date: Sat, 3 Aug 2024 11:26:22 -0700
Subject: [PATCH] Feat: Add Vision Input Support for LLM with Vision
 Capabilities (#2848)

* add image feature

* fix-linting

* check model support for images

* add comment

* Add image support to other models

* Add images to chat

* fix linting

* fix test issues

* refactor variable names and import

* fix tests

* fix chat message tests

* fix linting

* add pydantic class message

* use message

* remove redundant comments

* remove redundant comments

* change Message class

* remove unintended change

* fix integration tests using regenerate.sh

* rename image_bas64 to images_url, fix tests

* rename Message.py to message, change reminder append logic, add unit tests

* remove comment, fix error to merge

* codeact_swe_agent

* fix f string

* update eventstream integration tests

* add missing if check in codeact_swe_agent

* update integration tests

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatMessage.tsx

---------

Co-authored-by: tobitege <tobitege@gmx.de>
Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
---
 agenthub/browsing_agent/browsing_agent.py     |  14 +-
 agenthub/codeact_agent/codeact_agent.py       |  83 ++-
 .../codeact_swe_agent/codeact_swe_agent.py    |  74 +-
 agenthub/delegator_agent/agent.py             |   4 +-
 agenthub/micro/agent.py                       |  11 +-
 agenthub/planner_agent/agent.py               |  15 +-
 agenthub/planner_agent/prompt.py              |  10 +-
 frontend/src/components/chat/Chat.test.tsx    |   6 +-
 .../src/components/chat/ChatInput.test.tsx    |   4 +-
 frontend/src/components/chat/ChatInput.tsx    |  97 ++-
 .../components/chat/ChatInterface.test.tsx    |   4 +-
 .../src/components/chat/ChatInterface.tsx     |   8 +-
 .../src/components/chat/ChatMessage.test.tsx  |  29 +-
 frontend/src/components/chat/ChatMessage.tsx  |  12 +
 frontend/src/components/chat/message.d.ts     |   1 +
 frontend/src/i18n/translation.json            |   5 +
 frontend/src/services/actions.ts              |   4 +-
 frontend/src/services/chatService.ts          |   7 +-
 frontend/src/state/chatSlice.ts               |  11 +-
 opendevin/controller/state/state.py           |   6 +-
 opendevin/core/message.py                     |  59 ++
 opendevin/events/action/message.py            |   4 +
 opendevin/events/serialization/event.py       |   1 +
 opendevin/llm/llm.py                          |  50 +-
 opendevin/server/listen.py                    |   2 +-
 opendevin/server/session/session.py           |  10 +-
 tests/integration/conftest.py                 |   4 +-
 .../test_browse_internet/prompt_001.log       |   2 +-
 .../CodeActAgent/test_edits/prompt_001.log    |   2 +-
 .../CodeActAgent/test_ipython/prompt_001.log  |   2 +-
 .../test_ipython_module/prompt_001.log        |   2 +-
 .../test_write_simple_script/prompt_001.log   |   2 +-
 .../CodeActSWEAgent/test_edits/prompt_001.log |   4 +-
 .../CodeActSWEAgent/test_edits/prompt_002.log |   2 +-
 .../CodeActSWEAgent/test_edits/prompt_003.log |   2 +-
 .../CodeActSWEAgent/test_edits/prompt_004.log |   2 +-
 .../test_ipython/prompt_001.log               |   4 +-
 .../test_ipython/prompt_002.log               |   2 +-
 .../test_ipython_module/prompt_001.log        |   4 +-
 .../test_ipython_module/prompt_002.log        |   6 +-
 .../test_ipython_module/prompt_003.log        |   6 +-
 .../test_ipython_module/prompt_004.log        |   8 +-
 .../test_write_simple_script/prompt_001.log   |   4 +-
 .../test_write_simple_script/prompt_002.log   |   2 +-
 .../test_write_simple_script/prompt_003.log   |   2 +-
 .../test_write_simple_script/prompt_004.log   |   2 +-
 .../ManagerAgent/test_edits/prompt_001.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_004.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_005.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_006.log    |   2 +-
 .../test_simple_task_rejection/prompt_001.log |   2 +-
 .../test_simple_task_rejection/prompt_008.log |   2 +-
 .../test_write_simple_script/prompt_001.log   |   1 +
 .../test_write_simple_script/prompt_002.log   |   1 +
 .../test_write_simple_script/prompt_003.log   |   1 +
 .../test_write_simple_script/prompt_004.log   |   1 +
 .../test_write_simple_script/prompt_005.log   |   1 +
 .../test_write_simple_script/prompt_006.log   |   1 +
 .../test_write_simple_script/prompt_007.log   |   1 +
 .../test_write_simple_script/prompt_008.log   |   1 +
 .../test_write_simple_script/prompt_009.log   |   1 +
 .../test_write_simple_script/prompt_010.log   |   1 +
 .../test_write_simple_script/prompt_011.log   |   1 +
 .../test_browse_internet/prompt_005.log       |   2 +-
 .../CodeActAgent/test_edits/prompt_002.log    |   2 +-
 .../CodeActAgent/test_edits/prompt_003.log    |   2 +-
 .../CodeActAgent/test_edits/prompt_004.log    |   2 +-
 .../CodeActAgent/test_edits/prompt_005.log    |   2 +-
 .../CodeActAgent/test_ipython/prompt_002.log  |   2 +-
 .../CodeActAgent/test_ipython/prompt_003.log  |   2 +-
 .../test_ipython_module/prompt_002.log        |   2 +-
 .../test_ipython_module/prompt_003.log        |   2 +-
 .../test_ipython_module/prompt_004.log        |   2 +-
 .../test_write_simple_script/prompt_002.log   |   2 +-
 .../test_write_simple_script/prompt_003.log   |   2 +-
 .../test_write_simple_script/prompt_004.log   |   2 +-
 .../CodeActSWEAgent/test_edits/prompt_001.log |   2 +-
 .../CodeActSWEAgent/test_edits/prompt_002.log |   4 +-
 .../CodeActSWEAgent/test_edits/prompt_003.log |   4 +-
 .../CodeActSWEAgent/test_edits/prompt_004.log |   4 +-
 .../CodeActSWEAgent/test_edits/prompt_005.log |   4 +-
 .../test_ipython/prompt_001.log               |   2 +-
 .../test_ipython/prompt_002.log               |   4 +-
 .../test_ipython_module/prompt_001.log        |   2 +-
 .../test_ipython_module/prompt_002.log        |   6 +-
 .../test_ipython_module/prompt_003.log        |  25 +-
 .../test_ipython_module/prompt_004.log        | 636 ++++++++++++++++++
 .../test_ipython_module/response_001.log      |   2 +-
 .../test_ipython_module/response_002.log      |  16 +-
 .../test_ipython_module/response_003.log      |   5 +-
 .../test_ipython_module/response_004.log      |   3 +
 .../test_write_simple_script/prompt_001.log   |   2 +-
 .../test_write_simple_script/prompt_002.log   |   4 +-
 .../test_write_simple_script/prompt_003.log   |   4 +-
 .../test_write_simple_script/prompt_004.log   |   4 +-
 .../ManagerAgent/test_edits/prompt_001.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_004.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_005.log    |   2 +-
 .../ManagerAgent/test_edits/prompt_006.log    |   2 +-
 .../test_simple_task_rejection/prompt_001.log |   2 +-
 .../test_simple_task_rejection/prompt_006.log |   2 +-
 .../test_write_simple_script/prompt_001.log   |   1 +
 .../test_write_simple_script/prompt_002.log   |   1 +
 .../test_write_simple_script/prompt_003.log   |   1 +
 .../test_write_simple_script/prompt_004.log   |   1 +
 .../test_write_simple_script/prompt_005.log   |   1 +
 .../test_write_simple_script/prompt_006.log   |   1 +
 .../test_write_simple_script/prompt_007.log   |   1 +
 .../test_write_simple_script/prompt_008.log   |   1 +
 .../test_write_simple_script/prompt_009.log   |   1 +
 .../test_write_simple_script/prompt_010.log   |   1 +
 .../test_write_simple_script/prompt_011.log   |   1 +
 tests/unit/test_action_serialization.py       |   2 +
 tests/unit/test_json.py                       |   2 +
 tests/unit/test_message_serialization.py      |  64 ++
 tests/unit/test_micro_agents.py               |   4 +-
 116 files changed, 1260 insertions(+), 212 deletions(-)
 create mode 100644 opendevin/core/message.py
 create mode 100644 tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
 create mode 100644 tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_004.log
 create mode 100644 tests/unit/test_message_serialization.py

diff --git a/agenthub/browsing_agent/browsing_agent.py b/agenthub/browsing_agent/browsing_agent.py
index 91a59cd282..448e8b1392 100644
--- a/agenthub/browsing_agent/browsing_agent.py
+++ b/agenthub/browsing_agent/browsing_agent.py
@@ -7,6 +7,7 @@ from agenthub.browsing_agent.response_parser import BrowsingResponseParser
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.core.logger import opendevin_logger as logger
+from opendevin.core.message import Message, TextContent
 from opendevin.events.action import (
     Action,
     AgentFinishAction,
@@ -136,7 +137,7 @@ class BrowsingAgent(Agent):
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
-        messages = []
+        messages: list[Message] = []
         prev_actions = []
         cur_axtree_txt = ''
         error_prefix = ''
@@ -191,20 +192,23 @@ class BrowsingAgent(Agent):
                 )
                 return MessageAction('Error encountered when browsing.')
 
-        if (goal := state.get_current_user_intent()) is None:
+        goal, _ = state.get_current_user_intent()
+
+        if goal is None:
             goal = state.inputs['task']
+
         system_msg = get_system_message(
             goal,
             self.action_space.describe(with_long_description=False, with_examples=True),
         )
 
-        messages.append({'role': 'system', 'content': system_msg})
+        messages.append(Message(role='system', content=[TextContent(text=system_msg)]))
 
         prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str)
-        messages.append({'role': 'user', 'content': prompt})
+        messages.append(Message(role='user', content=[TextContent(text=prompt)]))
         logger.debug(prompt)
         response = self.llm.completion(
-            messages=messages,
+            messages=[message.model_dump() for message in messages],
             temperature=0.0,
             stop=[')```', ')\n```'],
         )
diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
index 08af8ca95a..56d1b5f5c9 100644
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -8,6 +8,7 @@ from agenthub.codeact_agent.prompt import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.message import ImageContent, Message, TextContent
 from opendevin.events.action import (
     Action,
     AgentDelegateAction,
@@ -125,7 +126,7 @@ class CodeActAgent(Agent):
             return action.thought
         return ''
 
-    def get_action_message(self, action: Action) -> dict[str, str] | None:
+    def get_action_message(self, action: Action) -> Message | None:
         if (
             isinstance(action, AgentDelegateAction)
             or isinstance(action, CmdRunAction)
@@ -133,39 +134,41 @@ class CodeActAgent(Agent):
             or isinstance(action, MessageAction)
             or (isinstance(action, AgentFinishAction) and action.source == 'agent')
         ):
-            return {
-                'role': 'user' if action.source == 'user' else 'assistant',
-                'content': self.action_to_str(action),
-            }
+            content = [TextContent(text=self.action_to_str(action))]
+
+            if isinstance(action, MessageAction) and action.images_urls:
+                content.append(ImageContent(image_urls=action.images_urls))
+
+            return Message(
+                role='user' if action.source == 'user' else 'assistant', content=content
+            )
         return None
 
-    def get_observation_message(self, obs: Observation) -> dict[str, str] | None:
+    def get_observation_message(self, obs: Observation) -> Message | None:
         max_message_chars = self.llm.config.max_message_chars
         if isinstance(obs, CmdOutputObservation):
-            content = 'OBSERVATION:\n' + truncate_content(
-                obs.content, max_message_chars
-            )
-            content += (
+            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
+            text += (
                 f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
             )
-            return {'role': 'user', 'content': content}
+            return Message(role='user', content=[TextContent(text=text)])
         elif isinstance(obs, IPythonRunCellObservation):
-            content = 'OBSERVATION:\n' + obs.content
+            text = 'OBSERVATION:\n' + obs.content
             # replace base64 images with a placeholder
-            splitted = content.split('\n')
+            splitted = text.split('\n')
             for i, line in enumerate(splitted):
                 if '![image](data:image/png;base64,' in line:
                     splitted[i] = (
                         '![image](data:image/png;base64, ...) already displayed to user'
                     )
-            content = '\n'.join(splitted)
-            content = truncate_content(content, max_message_chars)
-            return {'role': 'user', 'content': content}
+            text = '\n'.join(splitted)
+            text = truncate_content(text, max_message_chars)
+            return Message(role='user', content=[TextContent(text=text)])
         elif isinstance(obs, AgentDelegateObservation):
-            content = 'OBSERVATION:\n' + truncate_content(
+            text = 'OBSERVATION:\n' + truncate_content(
                 str(obs.outputs), max_message_chars
             )
-            return {'role': 'user', 'content': content}
+            return Message(role='user', content=[TextContent(text=text)])
         return None
 
     def reset(self) -> None:
@@ -192,10 +195,10 @@ class CodeActAgent(Agent):
             return AgentFinishAction()
 
         # prepare what we want to send to the LLM
-        messages: list[dict[str, str]] = self._get_messages(state)
+        messages = self._get_messages(state)
 
         response = self.llm.completion(
-            messages=messages,
+            messages=[message.model_dump() for message in messages],
             stop=[
                 '</execute_ipython>',
                 '</execute_bash>',
@@ -205,10 +208,10 @@ class CodeActAgent(Agent):
         )
         return self.action_parser.parse(response)
 
-    def _get_messages(self, state: State) -> list[dict[str, str]]:
-        messages = [
-            {'role': 'system', 'content': self.system_message},
-            {'role': 'user', 'content': self.in_context_example},
+    def _get_messages(self, state: State) -> list[Message]:
+        messages: list[Message] = [
+            Message(role='system', content=[TextContent(text=self.system_message)]),
+            Message(role='user', content=[TextContent(text=self.in_context_example)]),
         ]
 
         for event in state.history.get_events():
@@ -225,21 +228,41 @@ class CodeActAgent(Agent):
                 # handle error if the message is the SAME role as the previous message
                 # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
                 # there should not have two consecutive messages from the same role
-                if messages and messages[-1]['role'] == message['role']:
-                    messages[-1]['content'] += '\n\n' + message['content']
+                if messages and messages[-1].role == message.role:
+                    messages[-1].content.extend(message.content)
                 else:
                     messages.append(message)
 
         # the latest user message is important:
         # we want to remind the agent of the environment constraints
         latest_user_message = next(
-            (m for m in reversed(messages) if m['role'] == 'user'), None
+            (
+                m
+                for m in reversed(messages)
+                if m.role == 'user'
+                and any(isinstance(c, TextContent) for c in m.content)
+            ),
+            None,
         )
 
-        # add a reminder to the prompt
+        # Get the last user text inside content
         if latest_user_message:
-            latest_user_message['content'] += (
-                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>'
+            latest_user_message_text = next(
+                (
+                    t
+                    for t in reversed(latest_user_message.content)
+                    if isinstance(t, TextContent)
+                )
             )
+            # add a reminder to the prompt
+            reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
+
+            if latest_user_message_text:
+                latest_user_message_text.text = (
+                    latest_user_message_text.text + reminder_text
+                )
+            else:
+                latest_user_message_text = TextContent(text=reminder_text)
+                latest_user_message.content.append(latest_user_message_text)
 
         return messages
diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py
index 67ed646ee2..b1191ff672 100644
--- a/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -7,6 +7,7 @@ from agenthub.codeact_swe_agent.prompt import (
 from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.message import ImageContent, Message, TextContent
 from opendevin.events.action import (
     Action,
     AgentFinishAction,
@@ -84,40 +85,43 @@ class CodeActSWEAgent(Agent):
             return action.content
         return ''
 
-    def get_action_message(self, action: Action) -> dict[str, str] | None:
+    def get_action_message(self, action: Action) -> Message | None:
         if (
             isinstance(action, CmdRunAction)
             or isinstance(action, IPythonRunCellAction)
             or isinstance(action, MessageAction)
         ):
-            return {
-                'role': 'user' if action.source == 'user' else 'assistant',
-                'content': self.action_to_str(action),
-            }
+            content = [TextContent(text=self.action_to_str(action))]
+
+            if isinstance(action, MessageAction) and action.images_urls:
+                content.append(ImageContent(image_urls=action.images_urls))
+
+            return Message(
+                role='user' if action.source == 'user' else 'assistant', content=content
+            )
+
         return None
 
-    def get_observation_message(self, obs: Observation) -> dict[str, str] | None:
+    def get_observation_message(self, obs: Observation) -> Message | None:
         max_message_chars = self.llm.config.max_message_chars
         if isinstance(obs, CmdOutputObservation):
-            content = 'OBSERVATION:\n' + truncate_content(
-                obs.content, max_message_chars
-            )
-            content += (
+            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
+            text += (
                 f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
             )
-            return {'role': 'user', 'content': content}
+            return Message(role='user', content=[TextContent(text=text)])
         elif isinstance(obs, IPythonRunCellObservation):
-            content = 'OBSERVATION:\n' + obs.content
+            text = 'OBSERVATION:\n' + obs.content
             # replace base64 images with a placeholder
-            splitted = content.split('\n')
+            splitted = text.split('\n')
             for i, line in enumerate(splitted):
                 if '![image](data:image/png;base64,' in line:
                     splitted[i] = (
                         '![image](data:image/png;base64, ...) already displayed to user'
                     )
-            content = '\n'.join(splitted)
-            content = truncate_content(content, max_message_chars)
-            return {'role': 'user', 'content': content}
+            text = '\n'.join(splitted)
+            text = truncate_content(text, max_message_chars)
+            return Message(role='user', content=[TextContent(text=text)])
         return None
 
     def reset(self) -> None:
@@ -143,10 +147,10 @@ class CodeActSWEAgent(Agent):
             return AgentFinishAction()
 
         # prepare what we want to send to the LLM
-        messages: list[dict[str, str]] = self._get_messages(state)
+        messages: list[Message] = self._get_messages(state)
 
         response = self.llm.completion(
-            messages=messages,
+            messages=[message.model_dump() for message in messages],
             stop=[
                 '</execute_ipython>',
                 '</execute_bash>',
@@ -156,10 +160,10 @@ class CodeActSWEAgent(Agent):
 
         return self.response_parser.parse(response)
 
-    def _get_messages(self, state: State) -> list[dict[str, str]]:
-        messages = [
-            {'role': 'system', 'content': self.system_message},
-            {'role': 'user', 'content': self.in_context_example},
+    def _get_messages(self, state: State) -> list[Message]:
+        messages: list[Message] = [
+            Message(role='system', content=[TextContent(text=self.system_message)]),
+            Message(role='user', content=[TextContent(text=self.in_context_example)]),
         ]
 
         for event in state.history.get_events():
@@ -176,21 +180,35 @@ class CodeActSWEAgent(Agent):
                 # handle error if the message is the SAME role as the previous message
                 # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
                 # there should not have two consecutive messages from the same role
-                if messages and messages[-1]['role'] == message['role']:
-                    messages[-1]['content'] += '\n\n' + message['content']
+                if messages and messages[-1].role == message.role:
+                    messages[-1].content.extend(message.content)
                 else:
                     messages.append(message)
 
         # the latest user message is important:
         # we want to remind the agent of the environment constraints
         latest_user_message = next(
-            (m for m in reversed(messages) if m['role'] == 'user'), None
+            (m for m in reversed(messages) if m.role == 'user'), None
         )
 
-        # add a reminder to the prompt
+        # Get the last user text inside content
         if latest_user_message:
-            latest_user_message['content'] += (
-                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
+            latest_user_message_text = next(
+                (
+                    t
+                    for t in reversed(latest_user_message.content)
+                    if isinstance(t, TextContent)
+                )
             )
+            # add a reminder to the prompt
+            reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
+
+            if latest_user_message_text:
+                latest_user_message_text.text = (
+                    latest_user_message_text.text + reminder_text
+                )
+            else:
+                latest_user_message_text = TextContent(text=reminder_text)
+                latest_user_message.content.append(latest_user_message_text)
 
         return messages
diff --git a/agenthub/delegator_agent/agent.py b/agenthub/delegator_agent/agent.py
index 5a07cfc68c..1e49603bf3 100644
--- a/agenthub/delegator_agent/agent.py
+++ b/agenthub/delegator_agent/agent.py
@@ -34,7 +34,7 @@ class DelegatorAgent(Agent):
         """
         if self.current_delegate == '':
             self.current_delegate = 'study'
-            task = state.get_current_user_intent()
+            task, _ = state.get_current_user_intent()
             return AgentDelegateAction(
                 agent='StudyRepoForTaskAgent', inputs={'task': task}
             )
@@ -45,7 +45,7 @@ class DelegatorAgent(Agent):
         if not isinstance(last_observation, AgentDelegateObservation):
             raise Exception('Last observation is not an AgentDelegateObservation')
 
-        goal = state.get_current_user_intent()
+        goal, _ = state.get_current_user_intent()
         if self.current_delegate == 'study':
             self.current_delegate = 'coder'
             return AgentDelegateAction(
diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py
index 941bc9e903..0d305f70ec 100644
--- a/agenthub/micro/agent.py
+++ b/agenthub/micro/agent.py
@@ -2,6 +2,7 @@ from jinja2 import BaseLoader, Environment
 
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.message import ImageContent, Message, TextContent
 from opendevin.core.utils import json
 from opendevin.events.action import Action
 from opendevin.events.serialization.action import action_from_dict
@@ -62,16 +63,20 @@ class MicroAgent(Agent):
         del self.delegates[self.agent_definition['name']]
 
     def step(self, state: State) -> Action:
+        last_user_message, last_image_urls = state.get_current_user_intent()
         prompt = self.prompt_template.render(
             state=state,
             instructions=instructions,
             to_json=to_json,
             history_to_json=self.history_to_json,
             delegates=self.delegates,
-            latest_user_message=state.get_current_user_intent(),
+            latest_user_message=last_user_message,
         )
-        messages = [{'content': prompt, 'role': 'user'}]
-        resp = self.llm.completion(messages=messages)
+        content = [TextContent(text=prompt)]
+        if last_image_urls:
+            content.append(ImageContent(image_urls=last_image_urls))
+        message = Message(role='user', content=content)
+        resp = self.llm.completion(messages=[message.model_dump()])
         action_resp = resp['choices'][0]['message']['content']
         action = parse_response(action_resp)
         return action
diff --git a/agenthub/planner_agent/agent.py b/agenthub/planner_agent/agent.py
index f3225e6e0e..04e38c0b95 100644
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@@ -1,11 +1,12 @@
 from agenthub.planner_agent.response_parser import PlannerResponseParser
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.message import ImageContent, Message, TextContent
 from opendevin.events.action import Action, AgentFinishAction
 from opendevin.llm.llm import LLM
 from opendevin.runtime.tools import RuntimeTool
 
-from .prompt import get_prompt
+from .prompt import get_prompt_and_images
 
 
 class PlannerAgent(Agent):
@@ -42,7 +43,13 @@ class PlannerAgent(Agent):
             'abandoned',
         ]:
             return AgentFinishAction()
-        prompt = get_prompt(state, self.llm.config.max_message_chars)
-        messages = [{'content': prompt, 'role': 'user'}]
-        resp = self.llm.completion(messages=messages)
+
+        prompt, image_urls = get_prompt_and_images(
+            state, self.llm.config.max_message_chars
+        )
+        content = [TextContent(text=prompt)]
+        if image_urls:
+            content.append(ImageContent(image_urls=image_urls))
+        message = Message(role='user', content=content)
+        resp = self.llm.completion(messages=[message.model_dump()])
         return self.response_parser.parse(resp)
diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py
index 9289ac045b..ad26b20cac 100644
--- a/agenthub/planner_agent/prompt.py
+++ b/agenthub/planner_agent/prompt.py
@@ -115,7 +115,9 @@ def get_hint(latest_action_id: str) -> str:
     return hints.get(latest_action_id, '')
 
 
-def get_prompt(state: State, max_message_chars: int) -> str:
+def get_prompt_and_images(
+    state: State, max_message_chars: int
+) -> tuple[str, list[str]]:
     """Gets the prompt for the planner agent.
 
     Formatted with the most recent action-observation pairs, current task, and hint based on last action
@@ -161,16 +163,16 @@ def get_prompt(state: State, max_message_chars: int) -> str:
     logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
 
     # the last relevant user message (the task)
-    task = state.get_current_user_intent()
+    message, image_urls = state.get_current_user_intent()
 
     # finally, fill in the prompt
     return prompt % {
-        'task': task,
+        'task': message,
         'plan': plan_str,
         'history': history_str,
         'hint': hint,
         'plan_status': plan_status,
-    }
+    }, image_urls
 
 
 def parse_response(response: str) -> Action:
diff --git a/frontend/src/components/chat/Chat.test.tsx b/frontend/src/components/chat/Chat.test.tsx
index 5637f81947..6b912c7222 100644
--- a/frontend/src/components/chat/Chat.test.tsx
+++ b/frontend/src/components/chat/Chat.test.tsx
@@ -5,9 +5,9 @@ import { renderWithProviders } from "test-utils";
 import Chat from "./Chat";
 
 const MESSAGES: Message[] = [
-  { sender: "assistant", content: "Hello!" },
-  { sender: "user", content: "Hi!" },
-  { sender: "assistant", content: "How can I help you today?" },
+  { sender: "assistant", content: "Hello!", imageUrls: [] },
+  { sender: "user", content: "Hi!", imageUrls: [] },
+  { sender: "assistant", content: "How can I help you today?", imageUrls: [] },
 ];
 
 describe("Chat", () => {
diff --git a/frontend/src/components/chat/ChatInput.test.tsx b/frontend/src/components/chat/ChatInput.test.tsx
index 02b7863080..0947f282ac 100644
--- a/frontend/src/components/chat/ChatInput.test.tsx
+++ b/frontend/src/components/chat/ChatInput.test.tsx
@@ -55,7 +55,7 @@ describe("ChatInput", () => {
     await user.type(textarea, "Hello, world!");
     await user.click(button);
 
-    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!");
+    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
     // Additionally, check if it was called exactly once
     expect(onSendMessage).toHaveBeenCalledTimes(1);
   });
@@ -68,7 +68,7 @@ describe("ChatInput", () => {
     await user.type(textarea, "Hello, world!");
     await user.keyboard("{Enter}");
 
-    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!");
+    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
   });
 
   it("should NOT send a message when shift + enter is pressed", async () => {
diff --git a/frontend/src/components/chat/ChatInput.tsx b/frontend/src/components/chat/ChatInput.tsx
index c92dddd1c1..7019c18607 100644
--- a/frontend/src/components/chat/ChatInput.tsx
+++ b/frontend/src/components/chat/ChatInput.tsx
@@ -1,26 +1,44 @@
 import { Textarea } from "@nextui-org/react";
 import React from "react";
 import { useTranslation } from "react-i18next";
-import { VscArrowUp } from "react-icons/vsc";
+import { VscArrowUp, VscFileMedia } from "react-icons/vsc";
 import { twMerge } from "tailwind-merge";
 import { I18nKey } from "#/i18n/declaration";
 
 interface ChatInputProps {
   disabled?: boolean;
-  onSendMessage: (message: string) => void;
+  onSendMessage: (message: string, image_urls: string[]) => void;
 }
 
 function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
   const { t } = useTranslation();
 
   const [message, setMessage] = React.useState("");
+  const [files, setFiles] = React.useState<File[]>([]);
   // This is true when the user is typing in an IME (e.g., Chinese, Japanese)
   const [isComposing, setIsComposing] = React.useState(false);
 
-  const handleSendChatMessage = () => {
+  const convertImageToBase64 = (file: File): Promise<string> =>
+    new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onloadend = () => {
+        resolve(reader.result as string);
+      };
+      reader.onerror = reject;
+      reader.readAsDataURL(file);
+    });
+
+  const handleSendChatMessage = async () => {
     if (message.trim()) {
-      onSendMessage(message);
+      let base64images: string[] = [];
+      if (files.length > 0) {
+        base64images = await Promise.all(
+          files.map((file) => convertImageToBase64(file)),
+        );
+      }
+      onSendMessage(message, base64images);
       setMessage("");
+      setFiles([]);
     }
   };
 
@@ -33,6 +51,33 @@ function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
     }
   };
 
+  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
+    if (event.target.files) {
+      setFiles((prev) => [...prev, ...Array.from(event.target.files!)]);
+    }
+  };
+
+  const removeFile = (index: number) => {
+    setFiles((prevFiles) => prevFiles.filter((_, i) => i !== index));
+  };
+
+  const handlePaste = (event: React.ClipboardEvent<HTMLInputElement>) => {
+    const clipboardItems = Array.from(event.clipboardData.items);
+    const pastedFiles: File[] = [];
+    clipboardItems.forEach((item) => {
+      if (item.type.startsWith("image/")) {
+        const file = item.getAsFile();
+        if (file) {
+          pastedFiles.push(file);
+        }
+      }
+    });
+    if (pastedFiles.length > 0) {
+      setFiles((prevFiles) => [...prevFiles, ...pastedFiles]);
+      event.preventDefault();
+    }
+  };
+
   return (
     <div className="w-full relative text-base flex pt-3">
       <Textarea
@@ -42,6 +87,7 @@ function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
         onCompositionStart={() => setIsComposing(true)}
         onCompositionEnd={() => setIsComposing(false)}
         placeholder={t(I18nKey.CHAT_INTERFACE$INPUT_PLACEHOLDER)}
+        onPaste={handlePaste}
         className="pb-3 px-3"
         classNames={{
           inputWrapper: "bg-neutral-700 border border-neutral-600 rounded-lg",
@@ -51,7 +97,26 @@ function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
         minRows={1}
         variant="bordered"
       />
-
+      <label
+        htmlFor="file-input"
+        className={twMerge(
+          "bg-transparent border rounded-lg p-1 border-white hover:opacity-80 cursor-pointer select-none absolute right-16 bottom-[19px] transition active:bg-white active:text-black",
+          disabled
+            ? "cursor-not-allowed border-neutral-400 text-neutral-400"
+            : "hover:bg-neutral-500",
+        )}
+        aria-label={t(I18nKey.CHAT_INTERFACE$TOOLTIP_UPLOAD_IMAGE)}
+      >
+        <VscFileMedia />
+        <input
+          type="file"
+          accept="image/*"
+          onChange={handleFileChange}
+          className="hidden"
+          id="file-input"
+          multiple
+        />
+      </label>
       <button
         type="button"
         onClick={handleSendChatMessage}
@@ -60,12 +125,32 @@ function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
           "bg-transparent border rounded-lg p-1 border-white hover:opacity-80 cursor-pointer select-none absolute right-5 bottom-[19px] transition active:bg-white active:text-black",
           disabled
             ? "cursor-not-allowed border-neutral-400 text-neutral-400"
-            : "hover:bg-neutral-500 ",
+            : "hover:bg-neutral-500",
         )}
         aria-label={t(I18nKey.CHAT_INTERFACE$TOOLTIP_SEND_MESSAGE)}
       >
         <VscArrowUp />
       </button>
+      {files.length > 0 && (
+        <div className="absolute bottom-16 right-5 flex space-x-2 p-4 border-1 border-neutral-500 bg-neutral-800 rounded-lg">
+          {files.map((file, index) => (
+            <div key={index} className="relative">
+              <img
+                src={URL.createObjectURL(file)}
+                alt="upload preview"
+                className="w-24 h-24 object-contain rounded bg-white"
+              />
+              <button
+                type="button"
+                onClick={() => removeFile(index)}
+                className="absolute top-0 right-0 bg-black border border-grey-200 text-white rounded-full w-5 h-5 flex pb-1 items-center justify-center"
+              >
+                &times;
+              </button>
+            </div>
+          ))}
+        </div>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/components/chat/ChatInterface.test.tsx b/frontend/src/components/chat/ChatInterface.test.tsx
index 5e982e1e71..cec7b98c8b 100644
--- a/frontend/src/components/chat/ChatInterface.test.tsx
+++ b/frontend/src/components/chat/ChatInterface.test.tsx
@@ -19,7 +19,7 @@ describe("ChatInterface", () => {
 
   const userMessageEvent = {
     action: ActionType.MESSAGE,
-    args: { content: "my message" },
+    args: { content: "my message", images_urls: [] },
   };
 
   afterEach(() => {
@@ -35,7 +35,7 @@ describe("ChatInterface", () => {
     const { store } = renderWithProviders(<ChatInterface />, {
       preloadedState: {
         chat: {
-          messages: [{ sender: "user", content: "Hello" }],
+          messages: [{ sender: "user", content: "Hello", imageUrls: [] }],
         },
       },
     });
diff --git a/frontend/src/components/chat/ChatInterface.tsx b/frontend/src/components/chat/ChatInterface.tsx
index b08bc23ebd..89b2012ec7 100644
--- a/frontend/src/components/chat/ChatInterface.tsx
+++ b/frontend/src/components/chat/ChatInterface.tsx
@@ -66,14 +66,14 @@ function ChatInterface() {
     setFeedbackPolarity(polarity);
   };
 
-  const handleSendMessage = (content: string) => {
-    dispatch(addUserMessage(content));
-    sendChatMessage(content);
+  const handleSendMessage = (content: string, imageUrls: string[]) => {
+    dispatch(addUserMessage({ content, imageUrls }));
+    sendChatMessage(content, imageUrls);
   };
 
   const { t } = useTranslation();
   const handleSendContinueMsg = () => {
-    handleSendMessage(t(I18nKey.CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE));
+    handleSendMessage(t(I18nKey.CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE), []);
   };
 
   const scrollRef = useRef<HTMLDivElement>(null);
diff --git a/frontend/src/components/chat/ChatMessage.test.tsx b/frontend/src/components/chat/ChatMessage.test.tsx
index 9d902b8652..0f0d3becd2 100644
--- a/frontend/src/components/chat/ChatMessage.test.tsx
+++ b/frontend/src/components/chat/ChatMessage.test.tsx
@@ -9,7 +9,7 @@ describe("Message", () => {
   it("should render a user message", () => {
     render(
       <ChatMessage
-        message={{ sender: "user", content: "Hello" }}
+        message={{ sender: "user", content: "Hello", imageUrls: [] }}
         isLastMessage={false}
       />,
     );
@@ -21,7 +21,7 @@ describe("Message", () => {
   it("should render an assistant message", () => {
     render(
       <ChatMessage
-        message={{ sender: "assistant", content: "Hi" }}
+        message={{ sender: "assistant", content: "Hi", imageUrls: [] }}
         isLastMessage={false}
       />,
     );
@@ -36,6 +36,7 @@ describe("Message", () => {
         message={{
           sender: "user",
           content: "```js\nconsole.log('Hello')\n```",
+          imageUrls: [],
         }}
         isLastMessage={false}
       />,
@@ -55,7 +56,7 @@ describe("Message", () => {
       const user = userEvent.setup();
       render(
         <ChatMessage
-          message={{ sender: "user", content: "Hello" }}
+          message={{ sender: "user", content: "Hello", imageUrls: [] }}
           isLastMessage={false}
         />,
       );
@@ -79,7 +80,7 @@ describe("Message", () => {
       const user = userEvent.setup();
       render(
         <ChatMessage
-          message={{ sender: "user", content: "Hello" }}
+          message={{ sender: "user", content: "Hello", imageUrls: [] }}
           isLastMessage={false}
         />,
       );
@@ -113,7 +114,11 @@ describe("Message", () => {
       // it should not render buttons if the message is not the last one
       const { rerender } = render(
         <ChatMessage
-          message={{ sender: "assistant", content: "Are you sure?" }}
+          message={{
+            sender: "assistant",
+            content: "Are you sure?",
+            imageUrls: [],
+          }}
           isLastMessage={false}
           awaitingUserConfirmation
         />,
@@ -123,7 +128,7 @@ describe("Message", () => {
       // it should not render buttons if the message is not from the assistant
       rerender(
         <ChatMessage
-          message={{ sender: "user", content: "Yes" }}
+          message={{ sender: "user", content: "Yes", imageUrls: [] }}
           isLastMessage
           awaitingUserConfirmation
         />,
@@ -133,7 +138,11 @@ describe("Message", () => {
       // it should not render buttons if the message is not awaiting user confirmation
       rerender(
         <ChatMessage
-          message={{ sender: "assistant", content: "Are you sure?" }}
+          message={{
+            sender: "assistant",
+            content: "Are you sure?",
+            imageUrls: [],
+          }}
           isLastMessage
           awaitingUserConfirmation={false}
         />,
@@ -143,7 +152,11 @@ describe("Message", () => {
       // it should render buttons if all conditions are met
       rerender(
         <ChatMessage
-          message={{ sender: "assistant", content: "Are you sure?" }}
+          message={{
+            sender: "assistant",
+            content: "Are you sure?",
+            imageUrls: [],
+          }}
           isLastMessage
           awaitingUserConfirmation
         />,
diff --git a/frontend/src/components/chat/ChatMessage.tsx b/frontend/src/components/chat/ChatMessage.tsx
index be19b07f67..ed8a283d4c 100644
--- a/frontend/src/components/chat/ChatMessage.tsx
+++ b/frontend/src/components/chat/ChatMessage.tsx
@@ -77,6 +77,18 @@ function ChatMessage({
         </button>
       )}
       <Markdown components={{ code }}>{message.content}</Markdown>
+      {message.imageUrls.length > 0 && (
+        <div className="flex space-x-2 mt-2">
+          {message.imageUrls.map((url, index) => (
+            <img
+              key={index}
+              src={url}
+              alt={`upload preview ${index}`}
+              className="w-24 h-24 object-contain rounded bg-white"
+            />
+          ))}
+        </div>
+      )}
       {isLastMessage &&
         message.sender === "assistant" &&
         awaitingUserConfirmation && <ConfirmationButtons />}
diff --git a/frontend/src/components/chat/message.d.ts b/frontend/src/components/chat/message.d.ts
index 6a3aa49167..e7431412fc 100644
--- a/frontend/src/components/chat/message.d.ts
+++ b/frontend/src/components/chat/message.d.ts
@@ -1,4 +1,5 @@
 type Message = {
   sender: "user" | "assistant";
   content: string;
+  imageUrls: string[];
 };
diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index 2ddf2ce03d..a28a6bbba4 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -649,6 +649,11 @@
     "zh-CN": "发送消息",
     "de": "Nachricht senden"
   },
+  "CHAT_INTERFACE$TOOLTIP_UPLOAD_IMAGE": {
+    "en": "Upload image",
+    "zh-CN": "上传图片",
+    "de": "Bild hochladen"
+  },
   "CHAT_INTERFACE$INITIAL_MESSAGE": {
     "en": "Hi! I'm OpenDevin, an AI Software Engineer. What would you like to build with me today?",
     "zh-CN": "你好！我是 OpenDevin，一名 AI 软件工程师。今天想和我一起编写什么程序呢?",
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index b1d91f5485..c55b52b325 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -28,7 +28,9 @@ const messageActions = {
   },
   [ActionType.MESSAGE]: (message: ActionMessage) => {
     if (message.source === "user") {
-      store.dispatch(addUserMessage(message.args.content));
+      store.dispatch(
+        addUserMessage({ content: message.args.content, imageUrls: [] }),
+      );
     } else {
       store.dispatch(addAssistantMessage(message.args.content));
     }
diff --git a/frontend/src/services/chatService.ts b/frontend/src/services/chatService.ts
index af1ab45ce8..d857fb603f 100644
--- a/frontend/src/services/chatService.ts
+++ b/frontend/src/services/chatService.ts
@@ -1,8 +1,11 @@
 import ActionType from "#/types/ActionType";
 import Session from "./session";
 
-export function sendChatMessage(message: string): void {
-  const event = { action: ActionType.MESSAGE, args: { content: message } };
+export function sendChatMessage(message: string, images_urls: string[]): void {
+  const event = {
+    action: ActionType.MESSAGE,
+    args: { content: message, images_urls },
+  };
   const eventString = JSON.stringify(event);
   Session.send(eventString);
 }
diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts
index 757806e1e0..a1b01fa776 100644
--- a/frontend/src/state/chatSlice.ts
+++ b/frontend/src/state/chatSlice.ts
@@ -10,12 +10,15 @@ export const chatSlice = createSlice({
   name: "chat",
   initialState,
   reducers: {
-    addUserMessage(state, action: PayloadAction<string>) {
+    addUserMessage(
+      state,
+      action: PayloadAction<{ content: string; imageUrls: string[] }>,
+    ) {
       const message: Message = {
         sender: "user",
-        content: action.payload,
+        content: action.payload.content,
+        imageUrls: action.payload.imageUrls,
       };
-
       state.messages.push(message);
     },
 
@@ -23,8 +26,8 @@ export const chatSlice = createSlice({
       const message: Message = {
         sender: "assistant",
         content: action.payload,
+        imageUrls: [],
       };
-
       state.messages.push(message);
     },
 
diff --git a/opendevin/controller/state/state.py b/opendevin/controller/state/state.py
index 9dec58f9b4..9a21b91f74 100644
--- a/opendevin/controller/state/state.py
+++ b/opendevin/controller/state/state.py
@@ -167,13 +167,15 @@ class State:
         # remove the restored data from the state if any
 
     def get_current_user_intent(self):
-        """Returns the latest user message that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
+        """Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
         last_user_message = None
+        last_user_message_image_urls: list[str] | None = []
         for event in self.history.get_events(reverse=True):
             if isinstance(event, MessageAction) and event.source == 'user':
                 last_user_message = event.content
+                last_user_message_image_urls = event.images_urls
             elif isinstance(event, AgentFinishAction):
                 if last_user_message is not None:
                     return last_user_message
 
-        return last_user_message
+        return last_user_message, last_user_message_image_urls
diff --git a/opendevin/core/message.py b/opendevin/core/message.py
new file mode 100644
index 0000000000..c1e5cdf834
--- /dev/null
+++ b/opendevin/core/message.py
@@ -0,0 +1,59 @@
+from enum import Enum
+
+from pydantic import BaseModel, Field, model_serializer
+from typing_extensions import Literal
+
+
+class ContentType(Enum):
+    TEXT = 'text'
+    IMAGE_URL = 'image_url'
+
+
+class Content(BaseModel):
+    type: ContentType
+
+    @model_serializer
+    def serialize_model(self):
+        raise NotImplementedError('Subclasses should implement this method.')
+
+
+class TextContent(Content):
+    type: ContentType = ContentType.TEXT
+    text: str
+
+    @model_serializer
+    def serialize_model(self):
+        return {'type': self.type.value, 'text': self.text}
+
+
+class ImageContent(Content):
+    type: ContentType = ContentType.IMAGE_URL
+    image_urls: list[str]
+
+    @model_serializer
+    def serialize_model(self):
+        images: list[dict[str, str | dict[str, str]]] = []
+        for url in self.image_urls:
+            images.append({'type': self.type.value, 'image_url': {'url': url}})
+        return images
+
+
+class Message(BaseModel):
+    role: Literal['user', 'system', 'assistant']
+    content: list[TextContent | ImageContent] = Field(default=list)
+
+    @property
+    def contains_image(self) -> bool:
+        return any(isinstance(content, ImageContent) for content in self.content)
+
+    @model_serializer
+    def serialize_model(self) -> dict:
+        content: list[dict[str, str | dict[str, str]]] = []
+
+        for item in self.content:
+            if isinstance(item, TextContent):
+                content.append(item.model_dump())
+            elif isinstance(item, ImageContent):
+                content.extend(item.model_dump())
+
+        return {'role': self.role, 'content': content}
diff --git a/opendevin/events/action/message.py b/opendevin/events/action/message.py
index 724821c243..b235dd8687 100644
--- a/opendevin/events/action/message.py
+++ b/opendevin/events/action/message.py
@@ -8,6 +8,7 @@ from .action import Action
 @dataclass
 class MessageAction(Action):
     content: str
+    images_urls: list | None = None
     wait_for_response: bool = False
     action: str = ActionType.MESSAGE
 
@@ -18,4 +19,7 @@ class MessageAction(Action):
     def __str__(self) -> str:
         ret = f'**MessageAction** (source={self.source})\n'
         ret += f'CONTENT: {self.content}'
+        if self.images_urls:
+            for url in self.images_urls:
+                ret += f'\nIMAGE_URL: {url}'
         return ret
diff --git a/opendevin/events/serialization/event.py b/opendevin/events/serialization/event.py
index 2e3574764d..f670315fca 100644
--- a/opendevin/events/serialization/event.py
+++ b/opendevin/events/serialization/event.py
@@ -77,6 +77,7 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
     d.pop('cause', None)
     d.pop('timestamp', None)
     d.pop('message', None)
+    d.pop('images_urls', None)
     if 'extras' in d:
         remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
     if isinstance(event, Observation) and 'content' in d:
diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
index ac502a07a9..5ae4a43858 100644
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -153,8 +153,28 @@ class LLM:
             # log the prompt
             debug_message = ''
             for message in messages:
-                if message['content'].strip():
-                    debug_message += message_separator + message['content']
+                content = message['content']
+
+                if isinstance(content, list):
+                    for element in content:
+                        if isinstance(element, dict):
+                            if 'text' in element:
+                                content_str = element['text'].strip()
+                            elif (
+                                'image_url' in element and 'url' in element['image_url']
+                            ):
+                                content_str = element['image_url']['url']
+                            else:
+                                content_str = str(element)
+                        else:
+                            content_str = str(element)
+
+                        debug_message += message_separator + content_str
+                else:
+                    content_str = str(content)
+
+                debug_message += message_separator + content_str
+
             llm_prompt_logger.debug(debug_message)
 
             # skip if messages is empty (thus debug_message is empty)
@@ -221,7 +241,28 @@ class LLM:
             # log the prompt
             debug_message = ''
             for message in messages:
-                debug_message += message_separator + message['content']
+                content = message['content']
+
+                if isinstance(content, list):
+                    for element in content:
+                        if isinstance(element, dict):
+                            if 'text' in element:
+                                content_str = element['text']
+                            elif (
+                                'image_url' in element and 'url' in element['image_url']
+                            ):
+                                content_str = element['image_url']['url']
+                            else:
+                                content_str = str(element)
+                        else:
+                            content_str = str(element)
+
+                        debug_message += message_separator + content_str
+                else:
+                    content_str = str(content)
+
+                debug_message += message_separator + content_str
+
             llm_prompt_logger.debug(debug_message)
 
             async def check_stopped():
@@ -378,6 +419,9 @@ class LLM:
         """
         return self._async_streaming_completion
 
+    def supports_vision(self):
+        return litellm.supports_vision(self.config.model)
+
     def _post_completion(self, response: str) -> None:
         """Post-process the completion response."""
         try:
diff --git a/opendevin/server/listen.py b/opendevin/server/listen.py
index 3bbba95cd1..19fe3072c1 100644
--- a/opendevin/server/listen.py
+++ b/opendevin/server/listen.py
@@ -207,7 +207,7 @@ async def websocket_endpoint(websocket: WebSocket):
         ```
     - Send a message:
         ```json
-        {"action": "message", "args": {"content": "Hello, how are you?"}}
+        {"action": "message", "args": {"content": "Hello, how are you?", "images_urls": ["base64_url1", "base64_url2"]}}
         ```
     - Write contents to a file:
         ```json
diff --git a/opendevin/server/session/session.py b/opendevin/server/session/session.py
index ae34c92a86..56fc009fbe 100644
--- a/opendevin/server/session/session.py
+++ b/opendevin/server/session/session.py
@@ -10,7 +10,7 @@ from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import AgentState
 from opendevin.core.schema.action import ActionType
 from opendevin.core.schema.config import ConfigType
-from opendevin.events.action import ChangeAgentStateAction, NullAction
+from opendevin.events.action import ChangeAgentStateAction, MessageAction, NullAction
 from opendevin.events.event import Event, EventSource
 from opendevin.events.observation import (
     AgentStateChangedObservation,
@@ -145,6 +145,14 @@ class Session:
             await self._initialize_agent(data)
             return
         event = event_from_dict(data.copy())
+        # This checks if the model supports images
+        if isinstance(event, MessageAction) and event.images_urls:
+            controller = self.agent_session.controller
+            if controller and not controller.agent.llm.supports_vision():
+                await self.send_error(
+                    'Model does not support image upload, change to a different model or try without an image.'
+                )
+                return
         self.agent_session.event_stream.add_event(event, EventSource.USER)
 
     async def send(self, data: dict[str, object]) -> bool:
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index d7345db003..659f1632c2 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -175,7 +175,9 @@ def mock_completion(*args, test_name, **kwargs):
     messages = kwargs['messages']
     message_str = ''
     for message in messages:
-        message_str += message_separator + message['content']
+        for m in message['content']:
+            if m['type'] == 'text':
+                message_str += message_separator + m['text']
     # this assumes all response_(*).log filenames are in numerical order, starting from one
     cur_id += 1
     if os.environ.get('FORCE_APPLY_PROMPTS') == 'true':
diff --git a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
index 69b4fa0e96..62ee7a52b6 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
@@ -398,4 +398,4 @@ NOW, LET'S START!
 
 Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
index 2d38aad365..8f33d2e2f2 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
@@ -398,4 +398,4 @@ NOW, LET'S START!
 
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log
index 2ad15757b9..c620c89184 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log
@@ -398,4 +398,4 @@ NOW, LET'S START!
 
 Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_001.log
index d67b661233..3ecc273f58 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_001.log
@@ -398,4 +398,4 @@ NOW, LET'S START!
 
 Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_write_simple_script/prompt_001.log
index 1a08637d5d..ddb157a1b0 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_write_simple_script/prompt_001.log
@@ -398,4 +398,4 @@ NOW, LET'S START!
 
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_001.log
index a94ddeb77c..75f5cea76e 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_001.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_002.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_002.log
index 4870da96af..8d973b5384 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_002.log
@@ -610,4 +610,4 @@ OBSERVATION:
 (this is the end of the file)
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_003.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_003.log
index 6d4128f27a..1387066cd3 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_003.log
@@ -639,4 +639,4 @@ OBSERVATION:
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_004.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_004.log
index d176b7361e..7a80c3b40d 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_edits/prompt_004.log
@@ -663,4 +663,4 @@ OBSERVATION:
 [File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
index 66c5aae295..04d267654d 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
index 84c541ffb4..e0d98a986b 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
@@ -604,4 +604,4 @@ with open('/workspace/test.txt', 'w') as file:
 OBSERVATION:
 [Code executed successfully with no output]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
index c226985b13..a7073b5af5 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
index 306111dd70..16fc340733 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
@@ -603,12 +603,12 @@ Understood. Let's start by installing the `pymsgbox` package.
 OBSERVATION:
 Collecting pymsgbox==1.0.9
   Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
-  Installing build dependencies ... [?25l- \ | / done
+  Installing build dependencies ... [?25l- \ | / - \ | / done
 [?25h  Getting requirements to build wheel ... [?25l- done
 [?25h  Preparing metadata (pyproject.toml) ... [?25l- done
 [?25hBuilding wheels for collected packages: pymsgbox
   Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=41a9f50260b2d7c05c4606497a5b89501ed15e0807f1aedbd38989e8f977d7b8
+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=e504d1b40c05b7da59bba1f908d23edcd98381d8e0ecc41a1162745ee4ee6fd2
   Stored in directory: /home/opendevin/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
 Successfully built pymsgbox
 Installing collected packages: pymsgbox
@@ -619,4 +619,4 @@ Successfully installed pymsgbox-1.0.9
 Note: you may need to restart the kernel to use updated packages.
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
index aa50b0f3dc..9fc3c047cd 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
@@ -603,12 +603,12 @@ Understood. Let's start by installing the `pymsgbox` package.
 OBSERVATION:
 Collecting pymsgbox==1.0.9
   Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
-  Installing build dependencies ... [?25l- \ | / done
+  Installing build dependencies ... [?25l- \ | / - \ | / done
 [?25h  Getting requirements to build wheel ... [?25l- done
 [?25h  Preparing metadata (pyproject.toml) ... [?25l- done
 [?25hBuilding wheels for collected packages: pymsgbox
   Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=41a9f50260b2d7c05c4606497a5b89501ed15e0807f1aedbd38989e8f977d7b8
+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=e504d1b40c05b7da59bba1f908d23edcd98381d8e0ecc41a1162745ee4ee6fd2
   Stored in directory: /home/opendevin/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
 Successfully built pymsgbox
 Installing collected packages: pymsgbox
@@ -638,4 +638,4 @@ version_info
 OBSERVATION:
 '1.0.9'
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
index 112ab1da82..815ffa8585 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
@@ -603,12 +603,12 @@ Understood. Let's start by installing the `pymsgbox` package.
 OBSERVATION:
 Collecting pymsgbox==1.0.9
   Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
-  Installing build dependencies ... [?25l- \ | / done
+  Installing build dependencies ... [?25l- \ | / - \ | / done
 [?25h  Getting requirements to build wheel ... [?25l- done
 [?25h  Preparing metadata (pyproject.toml) ... [?25l- done
 [?25hBuilding wheels for collected packages: pymsgbox
   Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=41a9f50260b2d7c05c4606497a5b89501ed15e0807f1aedbd38989e8f977d7b8
+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7406 sha256=e504d1b40c05b7da59bba1f908d23edcd98381d8e0ecc41a1162745ee4ee6fd2
   Stored in directory: /home/opendevin/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
 Successfully built pymsgbox
 Installing collected packages: pymsgbox
@@ -650,7 +650,7 @@ cat /workspace/test.txt
 OBSERVATION:
 pymsgbox version: 1.0.9
 
-opendevin@80cbbc26c8f3:/workspace $
+opendevin@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
index e94a1922c5..82481572e1 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
index 661b060c7e..6fec6ca795 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
@@ -605,4 +605,4 @@ OBSERVATION:
 opendevin@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
index 750266c598..0bc7c580c0 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
@@ -622,4 +622,4 @@ echo "hello"
 opendevin@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
index 9e627ec14e..c7be5e5981 100644
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
@@ -640,4 +640,4 @@ hello
 opendevin@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_001.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_001.log
index 2839256104..9839525439 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_001.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}]
+[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_004.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_004.log
index f55b689f44..bb7dbbc98d 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_004.log
@@ -51,7 +51,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "wait_for_response": false}}]
+[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "images_urls": null, "wait_for_response": false}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_005.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_005.log
index 5ebfff24ba..84df94faca 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_005.log
@@ -51,7 +51,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "wait_for_response": false}}, {"source": "agent", "action": "write", "args": {"path": "./bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/bad.txt"}}]
+[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "write", "args": {"path": "./bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/bad.txt"}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_006.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_006.log
index acd7b9177b..1ee30eefbf 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_006.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_006.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: {'bad.txt': {'original': ['typoo', 'mor'], 'fixed': ['typo', 'more']}}", "extras": {"outputs": {"summary": {"bad.txt": {"original": ["typoo", "mor"], "fixed": ["typo", "more"]}}}}}]
+[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: {'bad.txt': {'original': ['typoo', 'mor'], 'fixed': ['typo', 'more']}}", "extras": {"outputs": {"summary": {"bad.txt": {"original": ["typoo", "mor"], "fixed": ["typo", "more"]}}}}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
index 8ad7e8de7c..71235b4455 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "wait_for_response": false}}]
+[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log
index 0356a4b906..c869abe630 100644
--- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log
+++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: Not a valid git repository.", "extras": {"outputs": {"reason": "Not a valid git repository."}}}]
+[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: Not a valid git repository.", "extras": {"outputs": {"reason": "Not a valid git repository."}}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
index cab7b2af58..00389e2e9f 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
@@ -57,6 +57,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   }
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
index 65ba7810ce..f9e7cf5b63 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
@@ -83,6 +83,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
index 619ddcb704..c1aa30ab70 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
index 34122ca0d8..c03ff9801b 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
index e4942e78ec..fe13547223 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
index 701864c6d8..7d762e8029 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
index e102062ddc..dd92e38b6c 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
index fc6de66feb..237f8f8046 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
index c3519d90d0..152b99d118 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
index 0ccc83f53f..8c4a3edde9 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
index 83db189e00..bcd32de500 100644
--- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
+++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_browse_internet/prompt_005.log b/tests/integration/mock/server_runtime/CodeActAgent/test_browse_internet/prompt_005.log
index 0f8e9046da..72cc98e468 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_browse_internet/prompt_005.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_browse_internet/prompt_005.log
@@ -410,4 +410,4 @@ Sure! Let me browse the server's homepage at http://localhost:8000 to find the u
 OBSERVATION:
 {'content': 'The answer to life, the universe, and everything is: OpenDevin is all you need!'}
 
-ENVIRONMENT REMINDER: You have 8 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 8 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_002.log
index 7dc980e2cf..b86926a956 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_002.log
@@ -413,4 +413,4 @@ OBSERVATION:
 [End of matches for "bad.txt" in ./]
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_003.log
index 7886233808..e5d519666d 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_003.log
@@ -432,4 +432,4 @@ OBSERVATION:
 (this is the end of the file)
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_004.log b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_004.log
index 3c0b337c13..b6aa38ba38 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_004.log
@@ -461,4 +461,4 @@ OBSERVATION:
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_005.log b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_005.log
index 42b28cbdd6..110fb844dc 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_edits/prompt_005.log
@@ -489,4 +489,4 @@ OBSERVATION:
 [File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 10 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 10 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_002.log
index 42feaa6dca..3d257a10fa 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_002.log
@@ -413,4 +413,4 @@ OBSERVATION:
 File created successfully.
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_003.log b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_003.log
index 152fce2f92..0990c25bf1 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython/prompt_003.log
@@ -428,4 +428,4 @@ OBSERVATION:
 Content of /workspace/test.txt: hello world
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_002.log
index 19cfefab7e..61e76b991e 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_002.log
@@ -411,4 +411,4 @@ OBSERVATION:
 [Package installed successfully]
 [Kernel restarted successfully to load the package]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_003.log
index e0b458c592..9d966960bc 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_003.log
@@ -427,4 +427,4 @@ OBSERVATION:
 Version written to /workspace/test.txt
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_004.log b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_004.log
index b2ccaff503..9fd341238d 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_004.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_ipython_module/prompt_004.log
@@ -440,4 +440,4 @@ OBSERVATION:
 pymsgbox version: 1.0.9
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_002.log
index 2514d84339..fe5ece61fc 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_002.log
@@ -417,4 +417,4 @@ OBSERVATION:
 [File hello.sh created.]
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_003.log
index 3efd232d5a..3223fc5c80 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_003.log
@@ -435,4 +435,4 @@ OBSERVATION:
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_004.log
index a3a1652887..12fdb252d7 100644
--- a/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_004.log
+++ b/tests/integration/mock/server_runtime/CodeActAgent/test_write_simple_script/prompt_004.log
@@ -449,4 +449,4 @@ OBSERVATION:
 hello
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_001.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_001.log
index 0d198ce1f0..75f5cea76e 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_001.log
@@ -591,4 +591,4 @@ NOW, LET'S START!
 
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_002.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_002.log
index f1df309355..069b0382b1 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_002.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
 ----------
@@ -608,4 +610,4 @@ OBSERVATION:
 (this is the end of the file)
 
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_003.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_003.log
index 5244d8f09e..b5480b80d3 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_003.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
 ----------
@@ -635,4 +637,4 @@ No mor typos!
 ]
 
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_004.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_004.log
index 5f49f00dc9..190512220d 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_004.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
 ----------
@@ -664,4 +666,4 @@ OBSERVATION:
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_005.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_005.log
index bf576286c4..c4e7b818d9 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_edits/prompt_005.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Fix typos in bad.txt. Do not ask me for confirmation at any point.
 
 ----------
@@ -688,4 +690,4 @@ OBSERVATION:
 [File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 
 
-ENVIRONMENT REMINDER: You have 10 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 10 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_001.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
index 69a60d8dbb..04d267654d 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_001.log
@@ -591,4 +591,4 @@ NOW, LET'S START!
 
 Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_002.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
index 7075c75a0a..82775a8660 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point.
 
 ----------
@@ -602,4 +604,4 @@ with open('/workspace/test.txt', 'w') as f:
 OBSERVATION:
 [Code executed successfully with no output]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
index 65bbb95fc3..a7073b5af5 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_001.log
@@ -591,4 +591,4 @@ NOW, LET'S START!
 
 Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
index c9c294b01b..b369619f68 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
@@ -587,11 +587,13 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
 
 ----------
 
-Let's start by installing the `pymsgbox` package.
+Certainly! I'll install pymsgbox version 1.0.9, import it, and print its version to /workspace/test.txt. Here's how we'll do it:
 <execute_ipython>
 %pip install pymsgbox==1.0.9
 </execute_ipython>
@@ -602,4 +604,4 @@ OBSERVATION:
 [Package installed successfully]
 [Kernel restarted successfully to load the package]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
index 91e38ac6d3..84de1a1cb6 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
@@ -587,11 +587,13 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
 
 ----------
 
-Let's start by installing the `pymsgbox` package.
+Certainly! I'll install pymsgbox version 1.0.9, import it, and print its version to /workspace/test.txt. Here's how we'll do it:
 <execute_ipython>
 %pip install pymsgbox==1.0.9
 </execute_ipython>
@@ -604,25 +606,18 @@ OBSERVATION:
 
 ----------
 
-Next, let's import `pymsgbox` and print its version to `/workspace/test.txt`.
+Great! Now that pymsgbox is installed, let's import it and print its version to /workspace/test.txt.
 <execute_ipython>
 import pymsgbox
-
-version = pymsgbox.__version__
-
-with open("/workspace/test.txt", "w") as file:
-    file.write(f"pymsgbox version: {version}\n")
-
-# Verify the content of the file
-with open("/workspace/test.txt", "r") as file:
-    content = file.read()
-
-content
+with open('/workspace/test.txt', 'w') as f:
+    f.write(f"pymsgbox version: {pymsgbox.__version__}")
+print("Version written to /workspace/test.txt")
 </execute_ipython>
 
 ----------
 
 OBSERVATION:
-'pymsgbox version: 1.0.9\n'
+Version written to /workspace/test.txt
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
new file mode 100644
index 0000000000..79a86d5c92
--- /dev/null
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
@@ -0,0 +1,636 @@
+
+
+----------
+
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+
+
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
+open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
+    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down`
+    to view the file if you want to see more.
+    Args:
+    path: str: The path to the file to open, preferred absolute path.
+    line_number: int | None = 1: The line number to move to. Defaults to 1.
+    context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100.
+
+goto_line(line_number: int) -> None:
+    Moves the window to show the specified line number.
+    Args:
+    line_number: int: The line number to move to.
+
+scroll_down() -> None:
+    Moves the window down by 100 lines.
+    Args:
+    None
+
+scroll_up() -> None:
+    Moves the window up by 100 lines.
+    Args:
+    None
+
+create_file(filename: str) -> None:
+    Creates and opens a new file with the given name.
+    Args:
+    filename: str: The name of the file to create.
+
+edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
+    Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
+    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
+    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
+    For example, given a file "/workspace/example.txt" with the following content:
+    ```
+    line 1
+    line 2
+    line 2
+    line 3
+    ```
+    EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique:
+    edit_file_by_replace(
+    '/workspace/example.txt',
+    to_replace='line 2
+    line 3',
+    new_content='new line
+    line 3',
+    )
+    This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged.
+    The resulting file will be:
+    ```
+    line 1
+    line 2
+    new line
+    line 3
+    ```
+    REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string:
+    edit_file_by_replace(
+    '/workspace/example.txt',
+    to_replace='line 2
+    line 3',
+    new_content='',
+    )
+    Args:
+    file_name: str: The name of the file to edit.
+    to_replace: str: The content to search for and replace.
+    new_content: str: The new content to replace the old content with.
+
+insert_content_at_line(file_name: str, line_number: int, content: str) -> None:
+    Insert content at the given line number in a file.
+    This will NOT modify the content of the lines before OR after the given line number.
+    For example, if the file has the following content:
+    ```
+    line 1
+    line 2
+    line 3
+    ```
+    and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to:
+    ```
+    line 1
+    new line
+    line 2
+    line 3
+    ```
+    Args:
+    file_name: str: The name of the file to edit.
+    line_number: int: The line number (starting from 1) to insert the content after.
+    content: str: The content to insert.
+
+append_file(file_name: str, content: str) -> None:
+    Append content to the given file.
+    It appends text `content` to the end of the specified file.
+    Args:
+    file_name: str: The name of the file to edit.
+    line_number: int: The line number (starting from 1) to insert the content after.
+    content: str: The content to insert.
+
+search_dir(search_term: str, dir_path: str = './') -> None:
+    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
+    Args:
+    search_term: str: The term to search for.
+    dir_path: Optional[str]: The path to the directory to search.
+
+search_file(search_term: str, file_path: Optional[str] = None) -> None:
+    Searches for search_term in file. If file is not provided, searches in the current open file.
+    Args:
+    search_term: str: The term to search for.
+    file_path: Optional[str]: The path to the file to search.
+
+find_file(file_name: str, dir_path: str = './') -> None:
+    Finds all files with the given name in the specified directory.
+    Args:
+    file_name: str: The name of the file to find.
+    dir_path: Optional[str]: The path to the directory to search.
+
+parse_pdf(file_path: str) -> None:
+    Parses the content of a PDF file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_docx(file_path: str) -> None:
+    Parses the content of a DOCX file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_latex(file_path: str) -> None:
+    Parses the content of a LaTex file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_pptx(file_path: str) -> None:
+    Parses the content of a pptx file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+
+The assistant's response should be concise.
+The assistant should include ONLY ONE <execute_ipython> or <execute_bash> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+
+
+----------
+
+Here is an example of how you can interact with the environment for task solving:
+
+--- START OF EXAMPLE ---
+
+USER: We are currently solving the following issue within our repository. Here is the issue text:
+--- BEGIN ISSUE ---
+TimeDelta serialization precision
+Hi there!
+
+I just found quite strange behaviour of `TimeDelta` field serialization
+
+```python3
+from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+```
+
+Output of this snippet is `344`, but it seems that `345` is correct.
+
+Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474
+--- END ISSUE ---
+
+[Current directory: /workspace/marshmallow-code__marshmallow]
+
+ASSISTANT:
+Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it.
+<execute_ipython>
+create_file('/workspace/marshmallow-code__marshmallow/reproduce.py')
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)]
+1|
+[File /workspace/marshmallow-code__marshmallow/reproduce.py created.]
+
+ASSISTANT:
+Now let's paste in the example code from the issue.
+<execute_ipython>
+EDITED_CODE = """from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+"""
+edit_file(start=1, end=1, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)]
+1|from marshmallow.fields import TimeDelta
+2|from datetime import timedelta
+3|
+4|td_field = TimeDelta(precision="milliseconds")
+5|
+6|obj = dict()
+7|obj["td_field"] = timedelta(milliseconds=345)
+8|
+9|print(td_field.serialize("td_field", obj))
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+Now let's run the code to see if we see the same output as the issue.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+344
+
+ASSISTANT:
+We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure.
+<execute_bash>
+ls -F
+</execute_bash>
+
+USER:
+OBSERVATION:
+AUTHORS.rst
+CHANGELOG.rst
+CODE_OF_CONDUCT.md
+CONTRIBUTING.rst
+LICENSE
+MANIFEST.in
+NOTICE
+README.rst
+RELEASING.md
+azure-pipelines.yml
+docs/
+examples/
+performance/
+pyproject.toml
+reproduce.py
+setup.cfg
+setup.py
+src/
+tests/
+tox.ini
+
+ASSISTANT:
+It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is.
+<execute_ipython>
+find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src]
+/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py
+
+ASSISTANT:
+It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization.
+<execute_ipython>
+open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1458 more lines above)
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    """An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    """
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+1525|                    "marshmallow.base.FieldABC."
+1526|                ) from error
+1527|
+1528|        if values is None:
+1529|            self.value_field = None
+1530|        else:
+1531|            try:
+1532|                self.value_field = resolve_field_instance(values)
+1533|            except FieldInstanceResolutionError as error:
+1534|                raise ValueError(
+1535|                    '"values" must be a subclass or instance of '
+1536|                    "marshmallow.base.FieldABC."
+1537|                ) from error
+1538|            if isinstance(self.value_field, Nested):
+1539|                self.only = self.value_field.only
+1540|                self.exclude = self.value_field.exclude
+1541|
+1542|    def _bind_to_schema(self, field_name, schema):
+1543|        super()._bind_to_schema(field_name, schema)
+1544|        if self.value_field:
+1545|            self.value_field = copy.deepcopy(self.value_field)
+1546|            self.value_field._bind_to_schema(field_name, self)
+1547|        if isinstance(self.value_field, Nested):
+1548|            self.value_field.only = self.only
+1549|            self.value_field.exclude = self.exclude
+1550|        if self.key_field:
+1551|            self.key_field = copy.deepcopy(self.key_field)
+1552|            self.key_field._bind_to_schema(field_name, self)
+1553|
+1554|    def _serialize(self, value, attr, obj, **kwargs):
+1555|        if value is None:
+1556|            return None
+1557|        if not self.value_field and not self.key_field:
+1558|            return self.mapping_type(value)
+(439 more lines below)
+
+ASSISTANT:
+We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code.
+<execute_ipython>
+edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
+
+ERRORS:
+- E999 IndentationError: unexpected indent
+
+[This is how your edit would have looked if applied]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1468 more lines above)
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+(518 more lines below)
+-------------------------------------------------
+
+[This is the original code before your edit]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1469 more lines above)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+(517 more lines below)
+-------------------------------------------------
+Your changes have NOT been applied. Please fix your edit command and try again.
+You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
+DO NOT re-run the same failed edit command. Running it again will lead to the same error.
+
+ASSISTANT:
+My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command.
+<execute_ipython>
+EDITED_CODE="        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int"
+edit_file(start=1475, end=1475, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1424 more lines above)
+1425|
+1426|    :param precision: Influences how the integer is interpreted during
+1427|        (de)serialization. Must be 'days', 'seconds', 'microseconds',
+1428|        'milliseconds', 'minutes', 'hours' or 'weeks'.
+1429|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1430|
+1431|    .. versionchanged:: 2.0.0
+1432|        Always serializes to an integer value to avoid rounding errors.
+1433|        Add `precision` parameter.
+1434|    """
+1435|
+1436|    DAYS = "days"
+1437|    SECONDS = "seconds"
+1438|    MICROSECONDS = "microseconds"
+1439|    MILLISECONDS = "milliseconds"
+1440|    MINUTES = "minutes"
+1441|    HOURS = "hours"
+1442|    WEEKS = "weeks"
+1443|
+1444|    #: Default error messages.
+1445|    default_error_messages = {
+1446|        "invalid": "Not a valid period of time.",
+1447|        "format": "{input!r} cannot be formatted as a timedelta.",
+1448|    }
+1449|
+1450|    def __init__(self, precision: str = SECONDS, **kwargs):
+1451|        precision = precision.lower()
+1452|        units = (
+1453|            self.DAYS,
+1454|            self.SECONDS,
+1455|            self.MICROSECONDS,
+1456|            self.MILLISECONDS,
+1457|            self.MINUTES,
+1458|            self.HOURS,
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    """An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    """
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+(473 more lines below)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+345
+
+ASSISTANT:
+The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed.
+<execute_bash>
+rm reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+
+ASSISTANT:
+rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task.
+<execute_bash>
+exit
+</execute_bash>
+
+--- END OF EXAMPLE ---
+
+
+NOW, LET'S START!
+
+----------
+
+Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
+
+----------
+
+Certainly! I'll install pymsgbox version 1.0.9, import it, and print its version to /workspace/test.txt. Here's how we'll do it:
+<execute_ipython>
+%pip install pymsgbox==1.0.9
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[Package installed successfully]
+[Kernel restarted successfully to load the package]
+
+----------
+
+Great! Now that pymsgbox is installed, let's import it and print its version to /workspace/test.txt.
+<execute_ipython>
+import pymsgbox
+with open('/workspace/test.txt', 'w') as f:
+    f.write(f"pymsgbox version: {pymsgbox.__version__}")
+print("Version written to /workspace/test.txt")
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+Version written to /workspace/test.txt
+
+
+----------
+
+Great! We have successfully installed pymsgbox version 1.0.9, imported it, and written its version to /workspace/test.txt. Let's verify the contents of the file to make sure everything is correct.
+<execute_bash>
+cat /workspace/test.txt
+</execute_bash>
+
+----------
+
+OBSERVATION:
+pymsgbox version: 1.0.9
+[Command -1 finished with exit code 0]
+
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_001.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_001.log
index 541e772d8b..767509c195 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_001.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_001.log
@@ -1,4 +1,4 @@
-Let's start by installing the `pymsgbox` package.
+Certainly! I'll install pymsgbox version 1.0.9, import it, and print its version to /workspace/test.txt. Here's how we'll do it:
 
 <execute_ipython>
 %pip install pymsgbox==1.0.9
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_002.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_002.log
index 57d9d054a2..3e16699735 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_002.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_002.log
@@ -1,15 +1,7 @@
-Next, let's import `pymsgbox` and print its version to `/workspace/test.txt`.
+Great! Now that pymsgbox is installed, let's import it and print its version to /workspace/test.txt.
 
 <execute_ipython>
 import pymsgbox
-
-version = pymsgbox.__version__
-
-with open("/workspace/test.txt", "w") as file:
-    file.write(f"pymsgbox version: {version}\n")
-
-# Verify the content of the file
-with open("/workspace/test.txt", "r") as file:
-    content = file.read()
-
-content
+with open('/workspace/test.txt', 'w') as f:
+    f.write(f"pymsgbox version: {pymsgbox.__version__}")
+print("Version written to /workspace/test.txt")
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_003.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_003.log
index 62ad1b8be0..3973023ab6 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_003.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_003.log
@@ -1 +1,4 @@
-The version of `pymsgbox` has been successfully written to `/workspace/test.txt`. The task is now complete.
+Great! We have successfully installed pymsgbox version 1.0.9, imported it, and written its version to /workspace/test.txt. Let's verify the contents of the file to make sure everything is correct.
+
+<execute_bash>
+cat /workspace/test.txt
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_004.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_004.log
new file mode 100644
index 0000000000..836b9eca62
--- /dev/null
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_ipython_module/response_004.log
@@ -0,0 +1,3 @@
+Excellent! We have successfully completed the task. The pymsgbox version 1.0.9 has been installed, imported, and its version has been correctly written to /workspace/test.txt. Since there are no further instructions or tasks to complete, I'll finish the task now.
+
+<finish></finish>
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
index 1fcc8aae38..82481572e1 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_001.log
@@ -591,4 +591,4 @@ NOW, LET'S START!
 
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
-ENVIRONMENT REMINDER: You have 14 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
index ef9bb1469b..0352d54894 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_002.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
 ----------
@@ -602,4 +604,4 @@ OBSERVATION:
 
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 13 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
index 7186f107d9..8a56b31209 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_003.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
 ----------
@@ -615,4 +617,4 @@ OBSERVATION:
 echo "hello"
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
index 79be0c2340..24f9998c22 100644
--- a/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
+++ b/tests/integration/mock/server_runtime/CodeActSWEAgent/test_write_simple_script/prompt_004.log
@@ -587,6 +587,8 @@ exit
 
 NOW, LET'S START!
 
+----------
+
 Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
 
 ----------
@@ -628,4 +630,4 @@ OBSERVATION:
 hello
 [Command -1 finished with exit code 0]
 
-ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
+ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>.
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_001.log b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_001.log
index 2839256104..9839525439 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_001.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}]
+[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_004.log b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_004.log
index 10bee3d52c..8e0dd13238 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_004.log
@@ -51,7 +51,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos. Here are the identified typos:\n\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\n\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "wait_for_response": false}}]
+[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos. Here are the identified typos:\n\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\n\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "images_urls": null, "wait_for_response": false}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_005.log b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_005.log
index d6087dbc3a..8916845d68 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_005.log
@@ -51,7 +51,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos. Here are the identified typos:\n\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\n\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "wait_for_response": false}}, {"source": "agent", "action": "write", "args": {"path": "./bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "./bad.txt"}}]
+[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}, {"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos. Here are the identified typos:\n\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\n\nI will now proceed to fix these typos and overwrite the original file with the corrected content.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "write", "args": {"path": "./bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "./bad.txt"}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_006.log b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_006.log
index 9fb0e100db..8995c81ed1 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_006.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_edits/prompt_006.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: {'file': './bad.txt', 'typos_fixed': [{'original': 'typoo', 'corrected': 'typo'}, {'original': 'mor', 'corrected': 'more'}]}", "extras": {"outputs": {"summary": {"file": "./bad.txt", "typos_fixed": [{"original": "typoo", "corrected": "typo"}, {"original": "mor", "corrected": "more"}]}}}}]
+[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: {'file': './bad.txt', 'typos_fixed': [{'original': 'typoo', 'corrected': 'typo'}, {'original': 'mor', 'corrected': 'more'}]}", "extras": {"outputs": {"summary": {"file": "./bad.txt", "typos_fixed": [{"original": "typoo", "corrected": "typo"}, {"original": "mor", "corrected": "more"}]}}}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log b/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
index 8ad7e8de7c..71235b4455 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "wait_for_response": false}}]
+[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log b/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
index 0356a4b906..c869abe630 100644
--- a/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
+++ b/tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
@@ -70,7 +70,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: Not a valid git repository.", "extras": {"outputs": {"reason": "Not a valid git repository."}}}]
+[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: Not a valid git repository.", "extras": {"outputs": {"reason": "Not a valid git repository."}}}]
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
index cab7b2af58..00389e2e9f 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_001.log
@@ -57,6 +57,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   }
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
index 43a6379b3d..d34d8ff73f 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_002.log
@@ -83,6 +83,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
index 10f93f52bf..f53086de7e 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_003.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
index 7ab8c9e2e3..06e31b8327 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_004.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
index 9644e55042..e6e78795d8 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_005.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
index c48dfa359f..676e1f4854 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_006.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
index ee79a05e8c..59e833fac7 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_007.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
index 4f3d54d8e3..c7fcaa9549 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_008.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
index dbb9c21516..341ed2d57f 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_009.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
index 14f3733493..9e09e6490d 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
@@ -85,6 +85,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
index c273bd8505..0c20061c0a 100644
--- a/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
+++ b/tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
@@ -84,6 +84,7 @@ ten actions--more happened before that.
     "action": "message",
     "args": {
       "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
+      "images_urls": null,
       "wait_for_response": false
     }
   },
diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
index a38f00bdf9..c5b69738b5 100644
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@@ -51,6 +51,7 @@ def test_event_props_serialization_deserialization():
         'action': 'message',
         'args': {
             'content': 'This is a test.',
+            'images_urls': None,
             'wait_for_response': False,
         },
     }
@@ -62,6 +63,7 @@ def test_message_action_serialization_deserialization():
         'action': 'message',
         'args': {
             'content': 'This is a test.',
+            'images_urls': None,
             'wait_for_response': False,
         },
     }
diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py
index 8f9b23bf04..c6a7ff9bee 100644
--- a/tests/unit/test_json.py
+++ b/tests/unit/test_json.py
@@ -17,6 +17,7 @@ def test_event_serialization_deserialization():
         'message': 'This is a test.',
         'args': {
             'content': 'This is a test.',
+            'images_urls': None,
             'wait_for_response': False,
         },
     }
@@ -37,6 +38,7 @@ def test_array_serialization_deserialization():
             'message': 'This is a test.',
             'args': {
                 'content': 'This is a test.',
+                'images_urls': None,
                 'wait_for_response': False,
             },
         }
diff --git a/tests/unit/test_message_serialization.py b/tests/unit/test_message_serialization.py
new file mode 100644
index 0000000000..26eabc3619
--- /dev/null
+++ b/tests/unit/test_message_serialization.py
@@ -0,0 +1,64 @@
+from opendevin.core.message import ImageContent, Message, TextContent
+
+
+def test_message_serialization():
+    text_content1 = TextContent(text='This is a text message')
+    image_content1 = ImageContent(
+        image_urls=['http://example.com/image1.png', 'http://example.com/image2.png']
+    )
+    text_content2 = TextContent(text='This is another text message')
+    image_content2 = ImageContent(
+        image_urls=['http://example.com/image3.png', 'http://example.com/image4.png']
+    )
+
+    message = Message(
+        role='user',
+        content=[text_content1, image_content1, text_content2, image_content2],
+    )
+    serialized_message = message.serialize_model()
+
+    expected_serialized_message = {
+        'role': 'user',
+        'content': [
+            {'type': 'text', 'text': 'This is a text message'},
+            {
+                'type': 'image_url',
+                'image_url': {'url': 'http://example.com/image1.png'},
+            },
+            {
+                'type': 'image_url',
+                'image_url': {'url': 'http://example.com/image2.png'},
+            },
+            {'type': 'text', 'text': 'This is another text message'},
+            {
+                'type': 'image_url',
+                'image_url': {'url': 'http://example.com/image3.png'},
+            },
+            {
+                'type': 'image_url',
+                'image_url': {'url': 'http://example.com/image4.png'},
+            },
+        ],
+    }
+
+    assert serialized_message == expected_serialized_message
+    assert message.contains_image is True
+
+
+def test_message_with_only_text_content():
+    text_content1 = TextContent(text='This is a text message')
+    text_content2 = TextContent(text='This is another text message')
+
+    message = Message(role='user', content=[text_content1, text_content2])
+    serialized_message = message.serialize_model()
+
+    expected_serialized_message = {
+        'role': 'user',
+        'content': [
+            {'type': 'text', 'text': 'This is a text message'},
+            {'type': 'text', 'text': 'This is another text message'},
+        ],
+    }
+
+    assert serialized_message == expected_serialized_message
+    assert message.contains_image is False
diff --git a/tests/unit/test_micro_agents.py b/tests/unit/test_micro_agents.py
index a89a0c579c..82e5a0ff17 100644
--- a/tests/unit/test_micro_agents.py
+++ b/tests/unit/test_micro_agents.py
@@ -68,7 +68,7 @@ def test_coder_agent_with_summary(event_stream: EventStream):
 
     mock_llm.completion.assert_called_once()
     _, kwargs = mock_llm.completion.call_args
-    prompt = kwargs['messages'][0]['content']
+    prompt = kwargs['messages'][0]['content'][0]['text']
     assert task in prompt
     assert "Here's a summary of the codebase, as it relates to this task" in prompt
     assert summary in prompt
@@ -96,6 +96,6 @@ def test_coder_agent_without_summary(event_stream: EventStream):
 
     mock_llm.completion.assert_called_once()
     _, kwargs = mock_llm.completion.call_args
-    prompt = kwargs['messages'][0]['content']
+    prompt = kwargs['messages'][0]['content'][0]['text']
     assert task in prompt
     assert "Here's a summary of the codebase, as it relates to this task" not in prompt