Add core config to disable browser environment (#9570)

This commit is contained in:
Boxuan Li 2025-07-05 23:20:58 -07:00 committed by GitHub
parent b98615bc1c
commit a6301075ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 59 additions and 11 deletions

View File

@ -49,6 +49,9 @@
# Maximum file size for uploads, in megabytes
#file_uploads_max_file_size_mb = 0
# Enable the browser environment
#enable_browser = true
# Maximum budget per task, 0.0 means no limit
#max_budget_per_task = 0.0
@ -226,6 +229,7 @@ model = "gpt-4o"
[agent]
# Whether the browsing tool is enabled
# Note: when this is set to true, enable_browser in the core config must also be true
enable_browsing = true
# Whether the LLM draft editor is enabled

View File

@ -34,6 +34,7 @@ class OpenHandsConfig(BaseModel):
file_store_path: Path to the file store.
file_store_web_hook_url: Optional url for file store web hook
file_store_web_hook_headers: Optional headers for file_store web hook
enable_browser: Whether to enable the browser environment
save_trajectory_path: Either a folder path to store trajectories with auto-generated filenames, or a designated trajectory file path.
save_screenshots_in_trajectory: Whether to save screenshots in trajectory (in encoded image format).
replay_trajectory_path: Path to load trajectory and replay. If provided, trajectory would be replayed first before user's instruction.
@ -68,6 +69,7 @@ class OpenHandsConfig(BaseModel):
file_store_path: str = Field(default='~/.openhands')
file_store_web_hook_url: str | None = Field(default=None)
file_store_web_hook_headers: dict | None = Field(default=None)
enable_browser: bool = Field(default=True)
save_trajectory_path: str | None = Field(default=None)
save_screenshots_in_trajectory: bool = Field(default=False)
replay_trajectory_path: str | None = Field(default=None)

View File

@ -172,6 +172,7 @@ class ActionExecutor:
work_dir: str,
username: str,
user_id: int,
enable_browser: bool,
browsergym_eval_env: str | None,
) -> None:
self.plugins_to_load = plugins_to_load
@ -188,10 +189,16 @@ class ActionExecutor:
self.lock = asyncio.Lock()
self.plugins: dict[str, Plugin] = {}
self.file_editor = OHEditor(workspace_root=self._initial_cwd)
self.enable_browser = enable_browser
self.browser: BrowserEnv | None = None
self.browser_init_task: asyncio.Task | None = None
self.browsergym_eval_env = browsergym_eval_env
if (not self.enable_browser) and self.browsergym_eval_env:
raise BrowserUnavailableException(
'Browser environment is not enabled in config, but browsergym_eval_env is set'
)
self.start_time = time.time()
self.last_execution_time = self.start_time
self._initialized = False
@ -219,6 +226,10 @@ class ActionExecutor:
async def _init_browser_async(self):
"""Initialize the browser asynchronously."""
if not self.enable_browser:
logger.info('Browser environment is not enabled in config')
return
if sys.platform == 'win32':
logger.warning('Browser environment not supported on windows')
return
@ -596,7 +607,7 @@ class ActionExecutor:
async def browse(self, action: BrowseURLAction) -> Observation:
if self.browser is None:
return ErrorObservation(
'Browser functionality is not supported on Windows.'
'Browser functionality is not supported or disabled.'
)
await self._ensure_browser_ready()
return await browse(action, self.browser, self.initial_cwd)
@ -604,7 +615,7 @@ class ActionExecutor:
async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
if self.browser is None:
return ErrorObservation(
'Browser functionality is not supported on Windows.'
'Browser functionality is not supported or disabled.'
)
await self._ensure_browser_ready()
browser_observation = await browse(action, self.browser, self.initial_cwd)
@ -666,6 +677,12 @@ if __name__ == '__main__':
'--username', type=str, help='User to run as', default='openhands'
)
parser.add_argument('--user-id', type=int, help='User ID to run as', default=1000)
parser.add_argument(
'--enable-browser',
action=argparse.BooleanOptionalAction,
default=True,
help='Enable the browser environment',
)
parser.add_argument(
'--browsergym-eval-env',
type=str,
@ -703,6 +720,7 @@ if __name__ == '__main__':
work_dir=args.working_dir,
username=args.username,
user_id=args.user_id,
enable_browser=args.enable_browser,
browsergym_eval_env=args.browsergym_eval_env,
)
await client.ainit()

View File

@ -360,6 +360,8 @@ class DockerRuntime(ActionExecutionClient):
)
command = self.get_action_execution_server_startup_command()
self.log('info', f'Starting server with command: {command}')
if self.config.sandbox.enable_gpu:
gpu_ids = self.config.sandbox.cuda_visible_devices
if gpu_ids is None:

View File

@ -73,7 +73,7 @@ def get_user_info() -> tuple[int, str | None]:
return os.getuid(), username
def check_dependencies(code_repo_path: str, env_root_path: str) -> None:
def check_dependencies(code_repo_path: str, check_browser: bool) -> None:
ERROR_MESSAGE = 'Please follow the instructions in https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md to install OpenHands.'
if not os.path.exists(code_repo_path):
raise ValueError(
@ -91,7 +91,6 @@ def check_dependencies(code_repo_path: str, env_root_path: str) -> None:
raise ValueError('Jupyter is not properly installed. ' + ERROR_MESSAGE)
# Check libtmux is installed (skip on Windows)
if sys.platform != 'win32':
logger.debug('Checking dependencies: libtmux')
import libtmux
@ -108,15 +107,12 @@ def check_dependencies(code_repo_path: str, env_root_path: str) -> None:
if 'test' not in pane_output:
raise ValueError('libtmux is not properly installed. ' + ERROR_MESSAGE)
# Skip browser environment check on Windows
if sys.platform != 'win32':
if check_browser:
logger.debug('Checking dependencies: browser')
from openhands.runtime.browser.browser_env import BrowserEnv
browser = BrowserEnv()
browser.close()
else:
logger.warning('Running on Windows - browser environment check skipped.')
class LocalRuntime(ActionExecutionClient):
@ -289,7 +285,7 @@ class LocalRuntime(ActionExecutionClient):
override_username=self._username,
)
self.log('debug', f'Starting server with command: {cmd}')
self.log('info', f'Starting server with command: {cmd}')
env = os.environ.copy()
# Get the code repo path
code_repo_path = os.path.dirname(os.path.dirname(openhands.__file__))
@ -303,7 +299,6 @@ class LocalRuntime(ActionExecutionClient):
# Derive environment paths using sys.executable
interpreter_path = sys.executable
python_bin_path = os.path.dirname(interpreter_path)
env_root_path = os.path.dirname(python_bin_path)
# Prepend the interpreter's bin directory to PATH for subprocesses
env['PATH'] = f'{python_bin_path}{os.pathsep}{env.get("PATH", "")}'
@ -311,7 +306,8 @@ class LocalRuntime(ActionExecutionClient):
# Check dependencies using the derived env_root_path if not skipped
if os.getenv('SKIP_DEPENDENCY_CHECK', '') != '1':
check_dependencies(code_repo_path, env_root_path)
check_browser = self.config.enable_browser and sys.platform != 'win32'
check_dependencies(code_repo_path, check_browser)
self.server_process = subprocess.Popen( # noqa: S603
cmd,

View File

@ -60,4 +60,7 @@ def get_action_execution_server_startup_command(
*browsergym_args,
]
if not app_config.enable_browser:
base_cmd.append('--no-enable-browser')
return base_cmd

View File

@ -212,6 +212,7 @@ def _load_runtime(
runtime_startup_env_vars: dict[str, str] | None = None,
docker_runtime_kwargs: dict[str, str] | None = None,
override_mcp_config: MCPConfig | None = None,
enable_browser: bool = True,
) -> tuple[Runtime, OpenHandsConfig]:
sid = 'rt_' + str(random.randint(100000, 999999))
@ -221,6 +222,7 @@ def _load_runtime(
config = load_openhands_config()
config.run_as_openhands = run_as_openhands
config.enable_browser = enable_browser
config.sandbox.force_rebuild_runtime = force_rebuild_runtime
config.sandbox.keep_runtime_alive = False
config.sandbox.docker_runtime_kwargs = docker_runtime_kwargs

View File

@ -15,6 +15,7 @@ from openhands.events.action import (
from openhands.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
ErrorObservation,
FileDownloadObservation,
)
@ -122,6 +123,26 @@ def find_element_by_tag_and_attributes(
return None
def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands):
runtime, _ = _load_runtime(
temp_dir, runtime_cls, run_as_openhands, enable_browser=False
)
action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
action_browse = BrowseURLAction(url='http://localhost:8000', return_axtree=False)
logger.info(action_browse, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_browse)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, ErrorObservation)
assert 'Browser functionality is not supported or disabled' in obs.content
_close_test_runtime(runtime)
def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)