diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py
index 49b1ef7847..b485e2f09c 100644
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -1,6 +1,7 @@
"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import os
+import re
import pytest
from conftest import _close_test_runtime, _load_runtime
@@ -23,10 +24,104 @@ from openhands.events.observation import (
# ============================================================================================================================
-@pytest.mark.skipif(
+# Skip all tests in this module for CLI runtime
+pytestmark = pytest.mark.skipif(
os.environ.get('TEST_RUNTIME') == 'cli',
reason='CLIRuntime does not support browsing actions',
)
+
+
+def parse_axtree_content(content: str) -> dict[str, str]:
+ """Parse the accessibility tree content to extract bid -> element description mapping."""
+ elements = {}
+ current_bid = None
+ description_lines = []
+
+ # Find the accessibility tree section
+ lines = content.split('\n')
+ in_axtree = False
+
+ for line in lines:
+ line = line.strip()
+
+ # Check if we're entering the accessibility tree section
+ if 'BEGIN accessibility tree' in line:
+ in_axtree = True
+ continue
+ elif 'END accessibility tree' in line:
+ break
+
+ if not in_axtree or not line:
+ continue
+
+ # Check for bid line format: [bid] element description
+ bid_match = re.match(r'\[([a-zA-Z0-9]+)\]\s*(.*)', line)
+ if bid_match:
+ # Save previous element if it exists
+ if current_bid and description_lines:
+ elements[current_bid] = ' '.join(description_lines)
+
+ # Start new element
+ current_bid = bid_match.group(1)
+ description_lines = [bid_match.group(2).strip()]
+ else:
+ # Add to current description if we have a bid
+ if current_bid:
+ description_lines.append(line)
+
+ # Save last element
+ if current_bid and description_lines:
+ elements[current_bid] = ' '.join(description_lines)
+
+ return elements
+
+
+def find_element_by_text(axtree_elements: dict[str, str], text: str) -> str | None:
+ """Find an element bid by searching for text in the element description."""
+ text = text.lower().strip()
+ for bid, description in axtree_elements.items():
+ if text in description.lower():
+ return bid
+ return None
+
+
+def find_element_by_id(axtree_elements: dict[str, str], element_id: str) -> str | None:
+ """Find an element bid by searching for HTML id attribute."""
+ for bid, description in axtree_elements.items():
+ # Look for id="element_id" or id='element_id' patterns
+ if f'id="{element_id}"' in description or f"id='{element_id}'" in description:
+ return bid
+ return None
+
+
+def find_element_by_tag_and_attributes(
+ axtree_elements: dict[str, str], tag: str, **attributes
+) -> str | None:
+ """Find an element bid by tag name and attributes."""
+ tag = tag.lower()
+ for bid, description in axtree_elements.items():
+ description_lower = description.lower()
+
+ # Check if this is the right tag
+ if not description_lower.startswith(tag):
+ continue
+
+ # Check all required attributes
+ match = True
+ for attr_name, attr_value in attributes.items():
+ attr_pattern = f'{attr_name}="{attr_value}"'
+ if attr_pattern not in description:
+ attr_pattern = f"{attr_name}='{attr_value}'"
+ if attr_pattern not in description:
+ match = False
+ break
+
+ if match:
+ return bid
+
+ return None
+
+
def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
@@ -71,10 +166,715 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
_close_test_runtime(runtime)
-@pytest.mark.skipif(
- os.environ.get('TEST_RUNTIME') == 'cli',
- reason='CLIRuntime does not support browsing actions',
-)
+def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
+ """Test browser navigation actions: goto, go_back, go_forward, noop."""
+ runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+ try:
+ # Create test HTML pages
+ page1_content = """
+
+
+
+
+
+
+
+
+ """
+
+ # Create HTML file
+ form_path = os.path.join(temp_dir, 'form.html')
+ with open(form_path, 'w') as f:
+ f.write(form_content)
+
+ # Copy to sandbox
+ sandbox_dir = config.workspace_mount_path_in_sandbox
+ runtime.copy_to(form_path, sandbox_dir)
+
+ # Start HTTP server
+ action_cmd = CmdRunAction(
+ command='python3 -m http.server 8000 > server.log 2>&1 &'
+ )
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'ACTION'})
+ assert obs.exit_code == 0
+
+ # Wait for server to start
+ action_cmd = CmdRunAction(command='sleep 3')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ # Navigate to form page
+ action_browse = BrowseInteractiveAction(
+ browser_actions='goto("http://localhost:8000/form.html")',
+ return_axtree=True, # Need axtree to get element bids
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error
+ assert 'Test Form' in obs.content
+
+ # Parse the axtree to get actual bid values
+ axtree_elements = parse_axtree_content(obs.content)
+
+ # Find elements by their characteristics visible in the axtree
+ text_input_bid = find_element_by_text(axtree_elements, 'Enter text')
+ textarea_bid = find_element_by_text(axtree_elements, 'Enter message')
+ select_bid = find_element_by_text(axtree_elements, 'combobox')
+ button_bid = find_element_by_text(axtree_elements, 'Test Button')
+
+ # Verify we found the correct elements
+ assert text_input_bid is not None, (
+ f'Could not find text input element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+ assert textarea_bid is not None, (
+ f'Could not find textarea element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+ assert button_bid is not None, (
+ f'Could not find button element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+ assert select_bid is not None, (
+ f'Could not find select element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+ assert text_input_bid != button_bid, (
+ 'Text input bid should be different from button bid'
+ )
+
+ # Test fill action with real bid values
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f"""
+fill("{text_input_bid}", "Hello World")
+fill("{textarea_bid}", "This is a test message")
+""".strip(),
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ # Verify the action executed successfully
+ assert not obs.error, (
+ f'Browser action failed with error: {obs.last_browser_action_error}'
+ )
+
+ # Parse the updated axtree to verify the text was actually filled
+ updated_axtree_elements = parse_axtree_content(obs.content)
+
+ # Check that the text input now contains our text
+ assert text_input_bid in updated_axtree_elements, (
+ f'Text input element {text_input_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ text_input_desc = updated_axtree_elements[text_input_bid]
+ # The filled value should appear in the element description (axtree shows values differently)
+ assert 'Hello World' in text_input_desc or "'Hello World'" in text_input_desc, (
+ f"Text input should contain 'Hello World' but description is: {text_input_desc}"
+ )
+
+ assert textarea_bid in updated_axtree_elements, (
+ f'Textarea element {textarea_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ textarea_desc = updated_axtree_elements[textarea_bid]
+ assert (
+ 'This is a test message' in textarea_desc
+ or "'This is a test message'" in textarea_desc
+ ), f'Textarea should contain test message but description is: {textarea_desc}'
+
+ # Test select_option action with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'select_option("{select_bid}", "option2")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, (
+ f'Select option action failed: {obs.last_browser_action_error}'
+ )
+
+ # Verify that option2 is now selected
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ assert select_bid in updated_axtree_elements, (
+ f'Select element {select_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ select_desc = updated_axtree_elements[select_bid]
+ # The selected option should be reflected in the select element description
+ assert 'option2' in select_desc or 'Option 2' in select_desc, (
+ f"Select element should show 'option2' as selected but description is: {select_desc}"
+ )
+
+ # Test click action with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'click("{button_bid}")', return_axtree=True
+ )
+ obs = runtime.run_action(action_browse)
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Click action failed: {obs.last_browser_action_error}'
+
+ # Verify that the button click triggered the JavaScript and updated the result div
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ # Look for the "Button clicked!" text that should appear in the result div
+ result_found = any(
+ 'Button clicked!' in desc for desc in updated_axtree_elements.values()
+ )
+ assert result_found, (
+ f"Button click should have triggered JavaScript to show 'Button clicked!' but not found in: {dict(list(updated_axtree_elements.items())[:10])}"
+ )
+
+ # Test clear action with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'clear("{text_input_bid}")', return_axtree=True
+ )
+ obs = runtime.run_action(action_browse)
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Clear action failed: {obs.last_browser_action_error}'
+
+ # Verify that the text input is now empty/cleared
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ assert text_input_bid in updated_axtree_elements
+ text_input_desc = updated_axtree_elements[text_input_bid]
+ # After clearing, the input should not contain the previous text
+ assert 'Hello World' not in text_input_desc, (
+ f'Text input should be cleared but still contains text: {text_input_desc}'
+ )
+ # Check that it's back to showing placeholder text or is empty
+ assert (
+ 'Enter text' in text_input_desc # placeholder text
+ or 'textbox' in text_input_desc.lower() # generic textbox description
+ or text_input_desc.strip() == '' # empty description
+ ), (
+ f'Cleared text input should show placeholder or be empty but description is: {text_input_desc}'
+ )
+
+ # Clean up
+ action_cmd = CmdRunAction(command='pkill -f "python3 -m http.server" || true')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ finally:
+ _close_test_runtime(runtime)
+
+
+def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands):
+ """Test browser interactive actions: scroll, hover, fill, press, focus."""
+ runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+ try:
+ # Create a test page with scrollable content
+ scroll_content = """
+
+
+
+ Scroll Test
+
+
+
+
Interactive Test Page
+
Hover over me
+
+
+
This is a long scrollable page...
+
Middle content
+
Bottom content
+
+
+
+ """
+
+ # Create HTML file
+ scroll_path = os.path.join(temp_dir, 'scroll.html')
+ with open(scroll_path, 'w') as f:
+ f.write(scroll_content)
+
+ # Copy to sandbox
+ sandbox_dir = config.workspace_mount_path_in_sandbox
+ runtime.copy_to(scroll_path, sandbox_dir)
+
+ # Start HTTP server
+ action_cmd = CmdRunAction(
+ command='python3 -m http.server 8000 > server.log 2>&1 &'
+ )
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+ assert obs.exit_code == 0
+
+ # Wait for server to start
+ action_cmd = CmdRunAction(command='sleep 3')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ # Navigate to scroll page
+ action_browse = BrowseInteractiveAction(
+ browser_actions='goto("http://localhost:8000/scroll.html")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error
+ assert 'Interactive Test Page' in obs.content
+
+ # Test scroll action
+ action_browse = BrowseInteractiveAction(
+ browser_actions='scroll(0, 300)', # Scroll down 300 pixels
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Scroll action failed: {obs.last_browser_action_error}'
+ # Verify the scroll action was recorded correctly
+ assert 'scroll(0, 300)' in obs.last_browser_action, (
+ f'Expected scroll action in browser history but got: {obs.last_browser_action}'
+ )
+
+ # Parse the axtree to get actual bid values for interactive elements
+ axtree_elements = parse_axtree_content(obs.content)
+
+ # Find elements by their characteristics visible in the axtree
+ hover_div_bid = find_element_by_text(axtree_elements, 'Hover over me')
+ focus_input_bid = find_element_by_text(axtree_elements, 'Focus me and type')
+
+ # Verify we found the required elements
+ assert hover_div_bid is not None, (
+ f'Could not find hover div element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+ assert focus_input_bid is not None, (
+ f'Could not find focus input element in axtree. Available elements: {dict(list(axtree_elements.items())[:5])}'
+ )
+
+ # Test hover action with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'hover("{hover_div_bid}")', return_axtree=True
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Hover action failed: {obs.last_browser_action_error}'
+
+ # Test focus action with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'focus("{focus_input_bid}")', return_axtree=True
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Focus action failed: {obs.last_browser_action_error}'
+
+ # Verify that the input element is now focused
+ assert obs.focused_element_bid == focus_input_bid, (
+ f'Expected focused element to be {focus_input_bid}, but got {obs.focused_element_bid}'
+ )
+
+ # Test fill action (type in focused input) with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'fill("{focus_input_bid}", "TestValue123")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Fill action failed: {obs.last_browser_action_error}'
+
+ # Verify that the text was actually entered
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ assert focus_input_bid in updated_axtree_elements, (
+ f'Focus input element {focus_input_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ input_desc = updated_axtree_elements[focus_input_bid]
+ assert 'TestValue123' in input_desc or "'TestValue123'" in input_desc, (
+ f"Input should contain 'TestValue123' but description is: {input_desc}"
+ )
+
+ # Test press action (for pressing individual keys) with real bid
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'press("{focus_input_bid}", "Backspace")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, f'Press action failed: {obs.last_browser_action_error}'
+
+ # Verify the backspace removed the last character (3 from TestValue123)
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ assert focus_input_bid in updated_axtree_elements, (
+ f'Focus input element {focus_input_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ input_desc = updated_axtree_elements[focus_input_bid]
+ assert 'TestValue12' in input_desc or "'TestValue12'" in input_desc, (
+ f"Input should contain 'TestValue12' after backspace but description is: {input_desc}"
+ )
+
+ # Test multiple actions in sequence
+ action_browse = BrowseInteractiveAction(
+ browser_actions="""
+scroll(0, -200)
+noop(1000)
+scroll(0, 400)
+""".strip(),
+ return_axtree=False,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, (
+ f'Multiple actions sequence failed: {obs.last_browser_action_error}'
+ )
+ # Verify the last action in the sequence was recorded
+ assert (
+ 'scroll(0, 400)' in obs.last_browser_action
+ or 'noop(1000)' in obs.last_browser_action
+ ), f'Expected final action from sequence but got: {obs.last_browser_action}'
+
+ # Clean up
+ action_cmd = CmdRunAction(command='pkill -f "python3 -m http.server" || true')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ finally:
+ _close_test_runtime(runtime)
+
+
+def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):
+ """Test browser file upload action."""
+ runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+ try:
+ # Create a test file to upload
+ test_file_content = 'This is a test file for upload testing.'
+ test_file_path = os.path.join(temp_dir, 'upload_test.txt')
+ with open(test_file_path, 'w') as f:
+ f.write(test_file_content)
+
+ # Create an upload form page
+ upload_content = """
+
+
+ File Upload Test
+
+
File Upload Test
+
+
+
+
+
+ """
+
+ # Create HTML file
+ upload_path = os.path.join(temp_dir, 'upload.html')
+ with open(upload_path, 'w') as f:
+ f.write(upload_content)
+
+ # Copy files to sandbox
+ sandbox_dir = config.workspace_mount_path_in_sandbox
+ runtime.copy_to(upload_path, sandbox_dir)
+ runtime.copy_to(test_file_path, sandbox_dir)
+
+ # Start HTTP server
+ action_cmd = CmdRunAction(
+ command='python3 -m http.server 8000 > server.log 2>&1 &'
+ )
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+ assert obs.exit_code == 0
+
+ # Wait for server to start
+ action_cmd = CmdRunAction(command='sleep 3')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ # Navigate to upload page
+ action_browse = BrowseInteractiveAction(
+ browser_actions='goto("http://localhost:8000/upload.html")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error
+ assert 'File Upload Test' in obs.content
+
+ # Parse the axtree to get the file input bid
+ axtree_elements = parse_axtree_content(obs.content)
+ # File inputs often show up as buttons in axtree, try multiple strategies
+ file_input_bid = (
+ find_element_by_text(axtree_elements, 'Choose File')
+ or find_element_by_text(axtree_elements, 'No file chosen')
+ or find_element_by_text(axtree_elements, 'Browse')
+ or find_element_by_text(axtree_elements, 'file')
+ or find_element_by_id(axtree_elements, 'file-input')
+ )
+
+ # Also look for button near the file input (Upload File button)
+ upload_button_bid = find_element_by_text(axtree_elements, 'Upload File')
+
+ # Test upload_file action with real bid
+ assert file_input_bid is not None, (
+ f'Could not find file input element in axtree. Available elements: {dict(list(axtree_elements.items())[:10])}'
+ )
+
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'upload_file("{file_input_bid}", "/workspace/upload_test.txt")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, (
+ f'File upload action failed: {obs.last_browser_action_error}'
+ )
+
+ # Verify the file input now shows the selected file
+ updated_axtree_elements = parse_axtree_content(obs.content)
+ assert file_input_bid in updated_axtree_elements, (
+ f'File input element {file_input_bid} should be present in updated axtree. Available elements: {list(updated_axtree_elements.keys())[:10]}'
+ )
+ file_input_desc = updated_axtree_elements[file_input_bid]
+ # File inputs typically show the filename when a file is selected
+ assert (
+ 'upload_test.txt' in file_input_desc
+ or 'upload_test' in file_input_desc
+ or 'txt' in file_input_desc
+ ), f'File input should show selected file but description is: {file_input_desc}'
+
+ # Test clicking the upload button to trigger the JavaScript function
+ if upload_button_bid:
+ action_browse = BrowseInteractiveAction(
+ browser_actions=f'click("{upload_button_bid}")',
+ return_axtree=True,
+ )
+ logger.info(action_browse, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_browse)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ assert isinstance(obs, BrowserOutputObservation)
+ assert not obs.error, (
+ f'Upload button click failed: {obs.last_browser_action_error}'
+ )
+
+ # Check if the JavaScript function executed and updated the result div
+ final_axtree_elements = parse_axtree_content(obs.content)
+ # Look for the result text that should be set by JavaScript
+ result_found = any(
+ 'File selected:' in desc or 'upload_test.txt' in desc
+ for desc in final_axtree_elements.values()
+ )
+ assert result_found, (
+ f'JavaScript upload handler should have updated the page but no result found in: {dict(list(final_axtree_elements.items())[:10])}'
+ )
+
+ # Clean up
+ action_cmd = CmdRunAction(command='pkill -f "python3 -m http.server" || true')
+ logger.info(action_cmd, extra={'msg_type': 'ACTION'})
+ obs = runtime.run_action(action_cmd)
+ logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+ finally:
+ _close_test_runtime(runtime)
+
+
def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
@@ -147,10 +947,6 @@ def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
_close_test_runtime(runtime)
-@pytest.mark.skipif(
- os.environ.get('TEST_RUNTIME') == 'cli',
- reason='CLIRuntime does not support browsing actions',
-)
def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
@@ -218,10 +1014,6 @@ def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
_close_test_runtime(runtime)
-@pytest.mark.skipif(
- os.environ.get('TEST_RUNTIME') == 'cli',
- reason='CLIRuntime does not support browsing actions',
-)
def test_download_file(temp_dir, runtime_cls, run_as_openhands):
"""Test downloading a file using the browser."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)