[Refactor] split runtime initialization (create, connect, init) in cli scripts (#7036)

2025-12-26 05:48:36 +08:00 · 2025-03-03 00:19:25 +01:00 · 2025-03-03 00:19:25 +01:00 · 395c1ea9e3
commit 395c1ea9e3
parent 91ad59dc24
26 changed files with 107 additions and 21 deletions
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@ -24,6 +24,7 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync

 game = None

@ -121,6 +122,7 @@ def process_instance(

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)

    state: State | None = asyncio.run(
        run_controller(
--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@ -34,6 +34,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync


 def get_config(
@ -210,6 +211,7 @@ def process_instance(
    # =============================================

    runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)

    initialize_runtime(runtime, instance=instance)

--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@ -34,6 +34,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 # Configure visibility of unit tests to the Agent.
 USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@ -203,7 +204,7 @@ def process_instance(
    # =============================================

    runtime: Runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance=instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@ -31,6 +31,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': functools.partial(
@ -274,6 +275,7 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@ -34,6 +34,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync


 def codeact_user_response(state: State) -> str:
@ -399,6 +400,7 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@ -25,6 +25,7 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync

 # Only CodeActAgent can delegate to BrowsingAgent
 SUPPORTED_AGENT_CLS = {'CodeActAgent'}
@ -74,6 +75,7 @@ def process_instance(
    )

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)

    state: State | None = asyncio.run(
        run_controller(
--- a/evaluation/benchmarks/commit0_bench/run_infer.py
+++ b/evaluation/benchmarks/commit0_bench/run_infer.py
@ -35,6 +35,7 @@ from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue

 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@ -394,6 +395,7 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {instance.instance_id}.')

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    try:
        initialize_runtime(runtime, instance)

--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@ -34,6 +34,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 EVALUATION_LLM = 'gpt-4-1106-preview'

@ -281,6 +282,7 @@ def process_instance(

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance.data_files)

    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@ -31,6 +31,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')

@ -148,6 +149,7 @@ def process_instance(
    logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@ -26,6 +26,7 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@ -82,6 +83,7 @@ def process_instance(

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    state: State | None = asyncio.run(
        run_controller(
            config=config,
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@ -49,6 +49,7 @@ from openhands.events.action import (
    MessageAction,
 )
 from openhands.events.observation import Observation
+from openhands.utils.async_utils import call_async_from_sync

 ACTION_FORMAT = """
 <<FINAL_ANSWER||
@ -214,6 +215,7 @@ Ok now its time to start solving the question. Good luck!
 """

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    state: State | None = asyncio.run(
        run_controller(
            config=config,
--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@ -39,6 +39,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 IMPORT_HELPER = {
    'python': [
@ -232,6 +233,7 @@ def process_instance(

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    state: State | None = asyncio.run(
        run_controller(
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@ -31,6 +31,7 @@ from openhands.events.action import (
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@ -206,6 +207,7 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/miniwob/run_infer.py
+++ b/evaluation/benchmarks/miniwob/run_infer.py
@ -41,6 +41,7 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync

 SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}

@ -145,6 +146,7 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    task_str, obs = initialize_runtime(runtime)

    task_str += (
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@ -35,6 +35,7 @@ from openhands.events.action import (
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync


 def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@ -184,6 +185,7 @@ def process_instance(
    )

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime)

    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@ -43,6 +43,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 config = load_app_config()

@ -234,6 +235,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Run the agent
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@ -29,6 +29,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@ -195,6 +196,7 @@ If the program uses some packages that are incompatible, please figure out alter
 """

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@ -40,6 +40,7 @@ from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue

 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@ -464,6 +465,7 @@ def process_instance(
            f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
        )
    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)

    try:
        initialize_runtime(runtime, instance)
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@ -28,6 +28,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync


 def get_config(
@ -275,7 +276,7 @@ if __name__ == '__main__':
        args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
    )
    runtime: Runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
    init_task_env(runtime, args.server_hostname, env_llm_config)

    dependencies = load_dependencies(runtime)
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@ -27,6 +27,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@ -104,6 +105,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
    logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime)

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/visualwebarena/run_infer.py
+++ b/evaluation/benchmarks/visualwebarena/run_infer.py
@ -37,6 +37,7 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync

 SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@ -159,6 +160,8 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+
    task_str, goal_image_urls = initialize_runtime(runtime)
    initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@ -36,6 +36,7 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync

 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}

@ -144,6 +145,7 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')

    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    task_str = initialize_runtime(runtime)

    state: State | None = asyncio.run(
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@ -30,6 +30,7 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync

 FAKE_RESPONSES = {
    'CodeActAgent': fake_user_response,
@ -108,6 +109,7 @@ def process_instance(
    # create sandbox and run the agent
    # =============================================
    runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
    try:
        test_class.initialize_runtime(runtime)

--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@ -14,7 +14,12 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
 from openhands.core.schema import AgentState
-from openhands.core.setup import create_agent, create_controller, create_runtime
+from openhands.core.setup import (
+    create_agent,
+    create_controller,
+    create_runtime,
+    initialize_repository_for_runtime,
+)
 from openhands.events import EventSource, EventStreamSubscriber
 from openhands.events.action import (
    Action,
@ -109,7 +114,6 @@ async def main(loop: asyncio.AbstractEventLoop):
        sid=sid,
        headless_mode=True,
        agent=agent,
-        selected_repository=config.sandbox.selected_repo,
    )

    controller, _ = create_controller(agent, runtime, config)
@ -165,6 +169,14 @@ async def main(loop: asyncio.AbstractEventLoop):

    await runtime.connect()

+    # Initialize repository if needed
+    if config.sandbox.selected_repo:
+        initialize_repository_for_runtime(
+            runtime,
+            agent=agent,
+            selected_repository=config.sandbox.selected_repo,
+        )
+
    if initial_user_action:
        # If there's an initial user action, enqueue it and do not prompt again
        event_stream.add_event(initial_user_action, EventSource.USER)
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@ -20,6 +20,7 @@ from openhands.core.setup import (
    create_controller,
    create_runtime,
    generate_sid,
+    initialize_repository_for_runtime,
 )
 from openhands.events import EventSource, EventStreamSubscriber
 from openhands.events.action import MessageAction, NullAction
@ -29,6 +30,7 @@ from openhands.events.observation import AgentStateChangedObservation
 from openhands.events.serialization import event_from_dict
 from openhands.io import read_input, read_task
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync


 class FakeUserResponseFunc(Protocol):
@ -97,8 +99,17 @@ async def run_controller(
            sid=sid,
            headless_mode=headless_mode,
            agent=agent,
-            selected_repository=config.sandbox.selected_repo,
        )
+        # Connect to the runtime
+        call_async_from_sync(runtime.connect)
+
+        # Initialize repository if needed
+        if config.sandbox.selected_repo:
+            initialize_repository_for_runtime(
+                runtime,
+                agent=agent,
+                selected_repository=config.sandbox.selected_repo,
+            )

    event_stream = runtime.event_stream

--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@ -21,7 +21,6 @@ from openhands.runtime import get_runtime_cls
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage import get_file_store
-from openhands.utils.async_utils import call_async_from_sync


 def create_runtime(
@ -29,18 +28,19 @@ def create_runtime(
    sid: str | None = None,
    headless_mode: bool = True,
    agent: Agent | None = None,
-    selected_repository: str | None = None,
-    github_token: SecretStr | None = None,
 ) -> Runtime:
    """Create a runtime for the agent to run on.

-    config: The app config.
-    sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
-    headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
-        where we don't want to have the VSCode UI open, so it defaults to True.
-    selected_repository: (optional) The GitHub repository to use.
-    github_token: (optional) The GitHub token to use.
+    Args:
+        config: The app config.
+        sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
+            Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
+        headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
+            where we don't want to have the VSCode UI open, so it defaults to True.
+        agent: (optional) The agent instance to use for configuring the runtime.
+
+    Returns:
+        The created Runtime instance (not yet connected or initialized).
    """
    # if sid is provided on the command line, use it as the name of the event stream
    # otherwise generate it on the basis of the configured jwt_secret
@ -74,8 +74,30 @@ def create_runtime(
        headless_mode=headless_mode,
    )

-    call_async_from_sync(runtime.connect)
+    logger.debug(
+        f'Runtime created with plugins: {[plugin.name for plugin in runtime.plugins]}'
+    )

+    return runtime
+
+
+def initialize_repository_for_runtime(
+    runtime: Runtime,
+    agent: Agent | None = None,
+    selected_repository: str | None = None,
+    github_token: SecretStr | None = None,
+) -> str | None:
+    """Initialize the repository for the runtime.
+
+    Args:
+        runtime: The runtime to initialize the repository for.
+        agent: (optional) The agent to load microagents for.
+        selected_repository: (optional) The GitHub repository to use.
+        github_token: (optional) The GitHub token to use.
+
+    Returns:
+        The repository directory path if a repository was cloned, None otherwise.
+    """
    # clone selected repository if provided
    repo_directory = None
    github_token = (
@ -98,11 +120,7 @@ def create_runtime(
        agent.prompt_manager.load_microagents(microagents)
        agent.prompt_manager.set_repository_info(selected_repository, repo_directory)

-    logger.debug(
-        f'Runtime initialized with plugins: {[plugin.name for plugin in runtime.plugins]}'
-    )
-
-    return runtime
+    return repo_directory


 def create_agent(config: AppConfig) -> Agent: