mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
384 lines
14 KiB
Python
384 lines
14 KiB
Python
"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
|
|
|
import os
|
|
|
|
import pytest
|
|
from conftest import _close_test_runtime, _load_runtime
|
|
|
|
from openhands.core.logger import openhands_logger as logger
|
|
from openhands.events.action import (
|
|
BrowseInteractiveAction,
|
|
BrowseURLAction,
|
|
CmdRunAction,
|
|
)
|
|
from openhands.events.observation import (
|
|
BrowserOutputObservation,
|
|
CmdOutputObservation,
|
|
FileDownloadObservation,
|
|
)
|
|
|
|
# ============================================================================================================================
|
|
# Browsing tests, without evaluation (poetry install --without evaluation)
|
|
# For eval environments, tests need to run with poetry install
|
|
# ============================================================================================================================
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.environ.get('TEST_RUNTIME') == 'cli',
|
|
reason='CLIRuntime does not support browsing actions',
|
|
)
|
|
def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
|
|
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
|
|
|
# Test browse
|
|
action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert '[1]' in obs.content
|
|
|
|
action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
|
|
action_browse = BrowseURLAction(url='http://localhost:8000', return_axtree=False)
|
|
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_browse)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
assert isinstance(obs, BrowserOutputObservation)
|
|
assert 'http://localhost:8000' in obs.url
|
|
assert not obs.error
|
|
assert obs.open_pages_urls == ['http://localhost:8000/']
|
|
assert obs.active_page_index == 0
|
|
assert obs.last_browser_action == 'goto("http://localhost:8000")'
|
|
assert obs.last_browser_action_error == ''
|
|
assert 'Directory listing for /' in obs.content
|
|
assert 'server.log' in obs.content
|
|
|
|
# clean up
|
|
action = CmdRunAction(command='rm -rf server.log')
|
|
logger.info(action, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
|
|
_close_test_runtime(runtime)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.environ.get('TEST_RUNTIME') == 'cli',
|
|
reason='CLIRuntime does not support browsing actions',
|
|
)
|
|
def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
|
|
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
|
try:
|
|
# Create a PDF file using reportlab in the host environment
|
|
from reportlab.lib.pagesizes import letter
|
|
from reportlab.pdfgen import canvas
|
|
|
|
pdf_path = os.path.join(temp_dir, 'test_document.pdf')
|
|
pdf_content = 'This is test content for PDF reading test'
|
|
|
|
c = canvas.Canvas(pdf_path, pagesize=letter)
|
|
# Add more content to make the PDF more robust
|
|
c.drawString(100, 750, pdf_content)
|
|
c.drawString(100, 700, 'Additional line for PDF structure')
|
|
c.drawString(100, 650, 'Third line to ensure valid PDF')
|
|
# Explicitly set PDF version and ensure proper structure
|
|
c.setPageCompression(0) # Disable compression for simpler structure
|
|
c.save()
|
|
|
|
# Copy the PDF to the sandbox
|
|
sandbox_dir = config.workspace_mount_path_in_sandbox
|
|
runtime.copy_to(pdf_path, sandbox_dir)
|
|
|
|
# Start HTTP server
|
|
action_cmd = CmdRunAction(command='ls -alh')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert 'test_document.pdf' in obs.content
|
|
|
|
# Get server url
|
|
action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
server_url = obs.content.strip()
|
|
|
|
# Browse to the PDF file
|
|
pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
|
|
action_browse = BrowseInteractiveAction(
|
|
browser_actions=f'goto("{pdf_url}")', return_axtree=False
|
|
)
|
|
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_browse)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Verify the browser observation
|
|
assert isinstance(obs, BrowserOutputObservation)
|
|
observation_text = str(obs)
|
|
assert '[Action executed successfully.]' in observation_text
|
|
assert 'Canvas' in observation_text
|
|
assert (
|
|
'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
|
|
in observation_text
|
|
)
|
|
|
|
# Check the /workspace/.browser_screenshots folder
|
|
action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert 'screenshot_' in obs.content
|
|
assert '.png' in obs.content
|
|
finally:
|
|
_close_test_runtime(runtime)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.environ.get('TEST_RUNTIME') == 'cli',
|
|
reason='CLIRuntime does not support browsing actions',
|
|
)
|
|
def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
|
|
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
|
try:
|
|
# Create a PNG file using PIL in the host environment
|
|
from PIL import Image, ImageDraw
|
|
|
|
png_path = os.path.join(temp_dir, 'test_image.png')
|
|
# Create a simple image with text
|
|
img = Image.new('RGB', (400, 200), color=(255, 255, 255))
|
|
d = ImageDraw.Draw(img)
|
|
text = 'This is a test PNG image'
|
|
d.text((20, 80), text, fill=(0, 0, 0))
|
|
img.save(png_path)
|
|
|
|
# Copy the PNG to the sandbox
|
|
sandbox_dir = config.workspace_mount_path_in_sandbox
|
|
runtime.copy_to(png_path, sandbox_dir)
|
|
|
|
# Verify the file exists in the sandbox
|
|
action_cmd = CmdRunAction(command='ls -alh')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert 'test_image.png' in obs.content
|
|
|
|
# Get server url
|
|
action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
server_url = obs.content.strip()
|
|
|
|
# Browse to the PNG file
|
|
png_url = f'{server_url}/view?path=/workspace/test_image.png'
|
|
action_browse = BrowseInteractiveAction(
|
|
browser_actions=f'goto("{png_url}")', return_axtree=False
|
|
)
|
|
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_browse)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Verify the browser observation
|
|
assert isinstance(obs, BrowserOutputObservation)
|
|
observation_text = str(obs)
|
|
assert '[Action executed successfully.]' in observation_text
|
|
assert 'File Viewer - test_image.png' in observation_text
|
|
assert (
|
|
'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
|
|
in observation_text
|
|
)
|
|
|
|
# Check the /workspace/.browser_screenshots folder
|
|
action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert 'screenshot_' in obs.content
|
|
assert '.png' in obs.content
|
|
finally:
|
|
_close_test_runtime(runtime)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.environ.get('TEST_RUNTIME') == 'cli',
|
|
reason='CLIRuntime does not support browsing actions',
|
|
)
|
|
def test_download_file(temp_dir, runtime_cls, run_as_openhands):
|
|
"""Test downloading a file using the browser."""
|
|
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
|
try:
|
|
# Minimal PDF content for testing
|
|
pdf_content = b"""%PDF-1.4
|
|
1 0 obj
|
|
|
|
/Type /Catalog
|
|
/Pages 2 0 R
|
|
>>
|
|
endobj
|
|
2 0 obj
|
|
|
|
/Type /Pages
|
|
/Kids [3 0 R]
|
|
/Count 1
|
|
>>
|
|
endobj
|
|
3 0 obj
|
|
|
|
/Type /Page
|
|
/Parent 2 0 R
|
|
/MediaBox [0 0 612 792]
|
|
>>
|
|
endobj
|
|
xref
|
|
0 4
|
|
0000000000 65535 f
|
|
0000000010 00000 n
|
|
0000000053 00000 n
|
|
0000000125 00000 n
|
|
trailer
|
|
|
|
/Size 4
|
|
/Root 1 0 R
|
|
>>
|
|
startxref
|
|
212
|
|
%%EOF"""
|
|
|
|
test_file_name = 'test_download.pdf'
|
|
test_file_path = os.path.join(temp_dir, test_file_name)
|
|
with open(test_file_path, 'wb') as f:
|
|
f.write(pdf_content)
|
|
|
|
# Copy the file to the sandbox
|
|
sandbox_dir = config.workspace_mount_path_in_sandbox
|
|
runtime.copy_to(test_file_path, sandbox_dir)
|
|
|
|
# Create a simple HTML page with a download link
|
|
html_content = f"""
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Download Test</title>
|
|
</head>
|
|
<body>
|
|
<h1>Download Test Page</h1>
|
|
<p>Click the link below to download the test file:</p>
|
|
<a href="/{test_file_name}" download="{test_file_name}" id="download-link">Download Test File</a>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
html_file_path = os.path.join(temp_dir, 'download_test.html')
|
|
with open(html_file_path, 'w') as f:
|
|
f.write(html_content)
|
|
|
|
# Copy the HTML file to the sandbox
|
|
runtime.copy_to(html_file_path, sandbox_dir)
|
|
|
|
# Verify the files exist in the sandbox
|
|
action_cmd = CmdRunAction(command='ls -alh')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert test_file_name in obs.content
|
|
assert 'download_test.html' in obs.content
|
|
|
|
# Ensure downloads directory exists
|
|
action_cmd = CmdRunAction(command='mkdir -p /workspace/.downloads')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
|
|
# Start HTTP server
|
|
action_cmd = CmdRunAction(
|
|
command='python3 -m http.server 8000 > server.log 2>&1 &'
|
|
)
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
|
|
# Wait for server to start
|
|
action_cmd = CmdRunAction(command='sleep 2')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Browse to the HTML page
|
|
action_browse = BrowseURLAction(url='http://localhost:8000/download_test.html')
|
|
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_browse)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Verify the browser observation
|
|
assert isinstance(obs, BrowserOutputObservation)
|
|
assert 'http://localhost:8000/download_test.html' in obs.url
|
|
assert not obs.error
|
|
assert 'Download Test Page' in obs.content
|
|
|
|
# Go to the PDF file url directly - this should trigger download
|
|
file_url = f'http://localhost:8000/{test_file_name}'
|
|
action_browse = BrowseInteractiveAction(
|
|
browser_actions=f'goto("{file_url}")',
|
|
)
|
|
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_browse)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Verify the browser observation after navigating to PDF file
|
|
downloaded_file_name = 'file_1.pdf'
|
|
assert isinstance(obs, FileDownloadObservation)
|
|
assert 'Location of downloaded file:' in str(obs)
|
|
assert downloaded_file_name in str(obs) # File is renamed
|
|
|
|
# Wait for download to complete
|
|
action_cmd = CmdRunAction(command='sleep 3')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
# Check if the file was downloaded
|
|
action_cmd = CmdRunAction(command='ls -la /workspace')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert isinstance(obs, CmdOutputObservation)
|
|
assert obs.exit_code == 0
|
|
assert downloaded_file_name in obs.content
|
|
|
|
# Clean up
|
|
action_cmd = CmdRunAction(command='pkill -f "python3 -m http.server" || true')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
action_cmd = CmdRunAction(command='rm -f server.log')
|
|
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action_cmd)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
finally:
|
|
_close_test_runtime(runtime)
|