diff --git a/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py b/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
index d0c26b0bb4..171ec677ba 100644
--- a/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
+++ b/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
@@ -208,7 +208,7 @@ Note:
             # for visualwebarena, webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
             # initialize and retrieve the first observation by issuing an noop OP
             # For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
-            return BrowseInteractiveAction(browser_actions='noop(1000)')
+            return BrowseInteractiveAction(browser_actions='noop(1000)', return_axtree=True)
 
         for event in state.view:
             if isinstance(event, BrowseInteractiveAction):
diff --git a/openhands/events/action/browse.py b/openhands/events/action/browse.py
index c68c27ac0f..556b693a0e 100644
--- a/openhands/events/action/browse.py
+++ b/openhands/events/action/browse.py
@@ -12,6 +12,7 @@ class BrowseURLAction(Action):
     action: str = ActionType.BROWSE
     runnable: ClassVar[bool] = True
     security_risk: ActionSecurityRisk | None = None
+    return_axtree: bool = False
 
     @property
     def message(self) -> str:
@@ -33,6 +34,7 @@ class BrowseInteractiveAction(Action):
     action: str = ActionType.BROWSE_INTERACTIVE
     runnable: ClassVar[bool] = True
     security_risk: ActionSecurityRisk | None = None
+    return_axtree: bool = False
 
     @property
     def message(self) -> str:
diff --git a/openhands/events/observation/browse.py b/openhands/events/observation/browse.py
index 4474cfcb66..dcecd86123 100644
--- a/openhands/events/observation/browse.py
+++ b/openhands/events/observation/browse.py
@@ -1,9 +1,7 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from browsergym.utils.obs import flatten_axtree_to_str
-
-from openhands.core.schema import ActionType, ObservationType
+from openhands.core.schema import ObservationType
 from openhands.events.observation.observation import Observation
 
 
@@ -53,69 +51,5 @@ class BrowserOutputObservation(Observation):
         if self.screenshot_path:
             ret += f'Screenshot saved to: {self.screenshot_path}\n'
         ret += '--- Agent Observation ---\n'
-        ret += self.get_agent_obs_text()
+        ret += self.content
         return ret
-
-    def get_agent_obs_text(self) -> str:
-        """Get a concise text that will be shown to the agent."""
-        if self.trigger_by_action == ActionType.BROWSE_INTERACTIVE:
-            text = f'[Current URL: {self.url}]\n'
-            text += f'[Focused element bid: {self.focused_element_bid}]\n'
-
-            # Add screenshot path information if available
-            if self.screenshot_path:
-                text += f'[Screenshot saved to: {self.screenshot_path}]\n'
-
-            text += '\n'
-
-            if self.error:
-                text += (
-                    '================ BEGIN error message ===============\n'
-                    'The following error occurred when executing the last action:\n'
-                    f'{self.last_browser_action_error}\n'
-                    '================ END error message ===============\n'
-                )
-            else:
-                text += '[Action executed successfully.]\n'
-            try:
-                # We do not filter visible only here because we want to show the full content
-                # of the web page to the agent for simplicity.
-                # FIXME: handle the case when the web page is too large
-                cur_axtree_txt = self.get_axtree_str(filter_visible_only=False)
-                text += (
-                    f'============== BEGIN accessibility tree ==============\n'
-                    f'{cur_axtree_txt}\n'
-                    f'============== END accessibility tree ==============\n'
-                )
-            except Exception as e:
-                text += (
-                    f'\n[Error encountered when processing the accessibility tree: {e}]'
-                )
-            return text
-
-        elif self.trigger_by_action == ActionType.BROWSE:
-            text = f'[Current URL: {self.url}]\n'
-
-            if self.error:
-                text += (
-                    '================ BEGIN error message ===============\n'
-                    'The following error occurred when trying to visit the URL:\n'
-                    f'{self.last_browser_action_error}\n'
-                    '================ END error message ===============\n'
-                )
-            text += '============== BEGIN webpage content ==============\n'
-            text += self.content
-            text += '\n============== END webpage content ==============\n'
-            return text
-        else:
-            raise ValueError(f'Invalid trigger_by_action: {self.trigger_by_action}')
-
-    def get_axtree_str(self, filter_visible_only: bool = False) -> str:
-        cur_axtree_txt = flatten_axtree_to_str(
-            self.axtree_object,
-            extra_properties=self.extra_element_properties,
-            with_clickable=True,
-            skip_generic=False,
-            filter_visible_only=filter_visible_only,
-        )
-        return str(cur_axtree_txt)
diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py
index c0de1877b1..78873efc30 100644
--- a/openhands/memory/conversation_memory.py
+++ b/openhands/memory/conversation_memory.py
@@ -391,7 +391,7 @@ class ConversationMemory:
                 role='user', content=[TextContent(text=obs.content)]
             )  # Content is already truncated by openhands-aci
         elif isinstance(obs, BrowserOutputObservation):
-            text = obs.get_agent_obs_text()
+            text = obs.content
             if (
                 obs.trigger_by_action == ActionType.BROWSE_INTERACTIVE
                 and enable_som_visual_browsing
diff --git a/openhands/runtime/browser/utils.py b/openhands/runtime/browser/utils.py
index 1df1112a61..cb8be5509d 100644
--- a/openhands/runtime/browser/utils.py
+++ b/openhands/runtime/browser/utils.py
@@ -2,7 +2,9 @@ import base64
 import datetime
 import os
 from pathlib import Path
+from typing import Any
 
+from browsergym.utils.obs import flatten_axtree_to_str
 from PIL import Image
 
 from openhands.core.exceptions import BrowserUnavailableException
@@ -14,6 +16,78 @@ from openhands.runtime.browser.browser_env import BrowserEnv
 from openhands.utils.async_utils import call_sync_from_async
 
 
+def get_axtree_str(
+    axtree_object: dict[str, Any],
+    extra_element_properties: dict[str, Any],
+    filter_visible_only: bool = False,
+) -> str:
+    cur_axtree_txt = flatten_axtree_to_str(
+        axtree_object,
+        extra_properties=extra_element_properties,
+        with_clickable=True,
+        skip_generic=False,
+        filter_visible_only=filter_visible_only,
+    )
+    return str(cur_axtree_txt)
+
+
+def get_agent_obs_text(obs: BrowserOutputObservation) -> str:
+    """Get a concise text that will be shown to the agent."""
+    if obs.trigger_by_action == ActionType.BROWSE_INTERACTIVE:
+        text = f'[Current URL: {obs.url}]\n'
+        text += f'[Focused element bid: {obs.focused_element_bid}]\n'
+
+        # Add screenshot path information if available
+        if obs.screenshot_path:
+            text += f'[Screenshot saved to: {obs.screenshot_path}]\n'
+
+        text += '\n'
+
+        if obs.error:
+            text += (
+                '================ BEGIN error message ===============\n'
+                'The following error occurred when executing the last action:\n'
+                f'{obs.last_browser_action_error}\n'
+                '================ END error message ===============\n'
+            )
+        else:
+            text += '[Action executed successfully.]\n'
+        try:
+            # We do not filter visible only here because we want to show the full content
+            # of the web page to the agent for simplicity.
+            # FIXME: handle the case when the web page is too large
+            cur_axtree_txt = get_axtree_str(
+                obs.axtree_object,
+                obs.extra_element_properties,
+                filter_visible_only=False,
+            )
+            text += (
+                f'============== BEGIN accessibility tree ==============\n'
+                f'{cur_axtree_txt}\n'
+                f'============== END accessibility tree ==============\n'
+            )
+        except Exception as e:
+            text += f'\n[Error encountered when processing the accessibility tree: {e}]'
+        return text
+
+    elif obs.trigger_by_action == ActionType.BROWSE:
+        text = f'[Current URL: {obs.url}]\n'
+
+        if obs.error:
+            text += (
+                '================ BEGIN error message ===============\n'
+                'The following error occurred when trying to visit the URL:\n'
+                f'{obs.last_browser_action_error}\n'
+                '================ END error message ===============\n'
+            )
+        text += '============== BEGIN webpage content ==============\n'
+        text += obs.content
+        text += '\n============== END webpage content ==============\n'
+        return text
+    else:
+        raise ValueError(f'Invalid trigger_by_action: {obs.trigger_by_action}')
+
+
 async def browse(
     action: BrowseURLAction | BrowseInteractiveAction,
     browser: BrowserEnv | None,
@@ -78,7 +152,8 @@ async def browse(
                 image = png_base64_url_to_image(obs.get('screenshot'))
                 image.save(screenshot_path, format='PNG', optimize=True)
 
-        return BrowserOutputObservation(
+        # Create the observation with all data
+        observation = BrowserOutputObservation(
             content=obs['text_content'],  # text content of the page
             url=obs.get('url', ''),  # URL of the page
             screenshot=obs.get('screenshot', None),  # base64-encoded screenshot, png
@@ -103,13 +178,37 @@ async def browse(
             error=True if obs.get('last_action_error', '') else False,  # error flag
             trigger_by_action=action.action,
         )
+
+        # Process the content first using the axtree_object
+        observation.content = get_agent_obs_text(observation)
+
+        # If return_axtree is False, remove the axtree_object to save space
+        if not action.return_axtree:
+            observation.dom_object = {}
+            observation.axtree_object = {}
+            observation.extra_element_properties = {}
+
+        return observation
     except Exception as e:
-        return BrowserOutputObservation(
-            content=str(e),
+        error_message = str(e)
+        error_url = asked_url if action.action == ActionType.BROWSE else ''
+
+        # Create error observation
+        observation = BrowserOutputObservation(
+            content=error_message,
             screenshot='',
             screenshot_path=None,
             error=True,
-            last_browser_action_error=str(e),
-            url=asked_url if action.action == ActionType.BROWSE else '',
+            last_browser_action_error=error_message,
+            url=error_url,
             trigger_by_action=action.action,
         )
+
+        # Process the content using get_agent_obs_text regardless of return_axtree value
+        try:
+            observation.content = get_agent_obs_text(observation)
+        except Exception:
+            # If get_agent_obs_text fails, keep the original error message
+            pass
+
+        return observation
diff --git a/openhands/security/invariant/parser.py b/openhands/security/invariant/parser.py
index ba64583edd..01875e099b 100644
--- a/openhands/security/invariant/parser.py
+++ b/openhands/security/invariant/parser.py
@@ -50,6 +50,7 @@ def parse_action(trace: list[TraceElement], action: Action) -> list[TraceElement
         event_dict = event_to_dict(action)
         args = event_dict.get('args', {})
         thought = args.pop('thought', None)
+
         function = Function(name=action.action, arguments=args)
         if thought is not None:
             inv_trace.append(Message(role='assistant', content=thought))
diff --git a/tests/runtime/test_browsergym_envs.py b/tests/runtime/test_browsergym_envs.py
index c3806e484e..ad31d84647 100644
--- a/tests/runtime/test_browsergym_envs.py
+++ b/tests/runtime/test_browsergym_envs.py
@@ -43,7 +43,9 @@ def test_browsergym_eval_env(runtime_cls, temp_dir):
     )
 
     # Test browse
-    action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
+    action = BrowseInteractiveAction(
+        browser_actions=BROWSER_EVAL_GET_GOAL_ACTION, return_axtree=False
+    )
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -54,7 +56,7 @@ def test_browsergym_eval_env(runtime_cls, temp_dir):
     assert 'from the list and click Submit' in obs.content
 
     # Make sure the browser can produce observation in eval env
-    action = BrowseInteractiveAction(browser_actions='noop()')
+    action = BrowseInteractiveAction(browser_actions='noop()', return_axtree=False)
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -64,7 +66,9 @@ def test_browsergym_eval_env(runtime_cls, temp_dir):
     )
 
     # Make sure the rewards are working
-    action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
+    action = BrowseInteractiveAction(
+        browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION, return_axtree=False
+    )
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py
index 4cf0dcf14d..df42f5dff4 100644
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -45,7 +45,7 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    action_browse = BrowseURLAction(url='http://localhost:8000')
+    action_browse = BrowseURLAction(url='http://localhost:8000', return_axtree=False)
     logger.info(action_browse, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action_browse)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -116,7 +116,9 @@ def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
 
         # Browse to the PDF file
         pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
-        action_browse = BrowseInteractiveAction(browser_actions=f'goto("{pdf_url}")')
+        action_browse = BrowseInteractiveAction(
+            browser_actions=f'goto("{pdf_url}")', return_axtree=False
+        )
         logger.info(action_browse, extra={'msg_type': 'ACTION'})
         obs = runtime.run_action(action_browse)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -185,7 +187,9 @@ def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
 
         # Browse to the PNG file
         png_url = f'{server_url}/view?path=/workspace/test_image.png'
-        action_browse = BrowseInteractiveAction(browser_actions=f'goto("{png_url}")')
+        action_browse = BrowseInteractiveAction(
+            browser_actions=f'goto("{png_url}")', return_axtree=False
+        )
         logger.info(action_browse, extra={'msg_type': 'ACTION'})
         obs = runtime.run_action(action_browse)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
index 2b7daaa87f..05459ae850 100644
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@@ -108,7 +108,11 @@ def test_cmd_run_action_serialization_deserialization():
 def test_browse_url_action_serialization_deserialization():
     original_action_dict = {
         'action': 'browse',
-        'args': {'thought': '', 'url': 'https://www.example.com'},
+        'args': {
+            'thought': '',
+            'url': 'https://www.example.com',
+            'return_axtree': False,
+        },
     }
     serialization_deserialization(original_action_dict, BrowseURLAction)
 
@@ -120,6 +124,7 @@ def test_browse_interactive_action_serialization_deserialization():
             'thought': '',
             'browser_actions': 'goto("https://www.example.com")',
             'browsergym_send_msg_to_user': '',
+            'return_axtree': False,
         },
     }
     serialization_deserialization(original_action_dict, BrowseInteractiveAction)
diff --git a/tests/unit/test_browsing_agent_parser.py b/tests/unit/test_browsing_agent_parser.py
index 351b8e8eaf..6392f90c0a 100644
--- a/tests/unit/test_browsing_agent_parser.py
+++ b/tests/unit/test_browsing_agent_parser.py
@@ -80,3 +80,4 @@ def test_parse_action(
     assert action.browser_actions == expected_browser_actions
     assert action.thought == expected_thought
     assert action.browsergym_send_msg_to_user == expected_msg_content
+    assert action.return_axtree is False  # Default value should be False
diff --git a/tests/unit/test_conversation_memory.py b/tests/unit/test_conversation_memory.py
index 37a7c5a975..7ae0122f38 100644
--- a/tests/unit/test_conversation_memory.py
+++ b/tests/unit/test_conversation_memory.py
@@ -457,11 +457,13 @@ def test_process_events_with_file_read_observation(conversation_memory):
 
 
 def test_process_events_with_browser_output_observation(conversation_memory):
+    formatted_content = '[Current URL: http://example.com]\n\n============== BEGIN webpage content ==============\nPage loaded\n============== END webpage content =============='
+
     obs = BrowserOutputObservation(
         url='http://example.com',
         trigger_by_action='browse',
         screenshot='',
-        content='Page loaded',
+        content=formatted_content,
         error=False,
     )
 
diff --git a/tests/unit/test_function_calling.py b/tests/unit/test_function_calling.py
index 2da8da9858..5865db8090 100644
--- a/tests/unit/test_function_calling.py
+++ b/tests/unit/test_function_calling.py
@@ -178,6 +178,7 @@ def test_browser_valid():
     assert len(actions) == 1
     assert isinstance(actions[0], BrowseInteractiveAction)
     assert actions[0].browser_actions == "click('button-1')"
+    assert actions[0].return_axtree is False  # Default value should be False
 
 
 def test_browser_missing_code():
diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py
index 058a0b8303..44f9d6adc5 100644
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@@ -413,6 +413,7 @@ async def test_unsafe_bash_command(temp_dir: str):
                 browser_actions='goto("http://localhost:3000")',
                 thought='browsing to localhost',
                 browsergym_send_msg_to_user='browsergym',
+                return_axtree=False,
             ),
             [
                 Message(
@@ -430,6 +431,7 @@ async def test_unsafe_bash_command(temp_dir: str):
                         arguments={
                             'browser_actions': 'goto("http://localhost:3000")',
                             'browsergym_send_msg_to_user': 'browsergym',
+                            'return_axtree': False,
                         },
                     ),
                 ),
@@ -437,7 +439,9 @@ async def test_unsafe_bash_command(temp_dir: str):
         ),
         (  # Test BrowseURLAction
             BrowseURLAction(
-                url='http://localhost:3000', thought='browsing to localhost'
+                url='http://localhost:3000',
+                thought='browsing to localhost',
+                return_axtree=False,
             ),
             [
                 Message(
@@ -452,7 +456,10 @@ async def test_unsafe_bash_command(temp_dir: str):
                     type='function',
                     function=Function(
                         name=ActionType.BROWSE,
-                        arguments={'url': 'http://localhost:3000'},
+                        arguments={
+                            'url': 'http://localhost:3000',
+                            'return_axtree': False,
+                        },
                     ),
                 ),
             ],