Add selected_repo to command line (#6949)

This commit is contained in:
Engel Nyst 2025-02-26 20:42:59 +01:00 committed by GitHub
parent b38039e626
commit 4f98bce6df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 102 additions and 66 deletions

View File

@ -40,6 +40,11 @@ jobs:
python-version: ${{ matrix.python-version }}
cache: "poetry"
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '22.x'
- name: Comment on PR if 'integration-test' label is present
if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
uses: KeisukeYamashita/create-comment@v1

View File

@ -24,7 +24,6 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
game = None
@ -122,7 +121,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(

View File

@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
@ -211,7 +210,6 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)

View File

@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
# Configure visibility of unit tests to the Agent.
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@ -204,7 +203,6 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)

View File

@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': functools.partial(
@ -275,7 +274,6 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def codeact_user_response(state: State) -> str:
@ -400,7 +399,6 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -35,7 +35,6 @@ from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@ -395,7 +394,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)

View File

@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
EVALUATION_LLM = 'gpt-4-1106-preview'
@ -282,7 +281,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance.data_files)
state: State | None = asyncio.run(

View File

@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
@ -149,7 +148,6 @@ def process_instance(
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -26,7 +26,6 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@ -83,7 +82,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,

View File

@ -49,7 +49,6 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import Observation
from openhands.utils.async_utils import call_async_from_sync
ACTION_FORMAT = """
<<FINAL_ANSWER||
@ -215,7 +214,6 @@ Ok now its time to start solving the question. Good luck!
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,

View File

@ -39,7 +39,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
IMPORT_HELPER = {
'python': [
@ -233,7 +232,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
state: State | None = asyncio.run(
run_controller(

View File

@ -31,7 +31,6 @@ from openhands.events.action import (
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@ -207,7 +206,6 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -41,7 +41,6 @@ from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.utils.async_utils import call_async_from_sync
SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}
@ -146,7 +145,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str, obs = initialize_runtime(runtime)
task_str += (

View File

@ -35,7 +35,6 @@ from openhands.events.action import (
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@ -185,7 +184,6 @@ def process_instance(
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@ -43,7 +43,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
config = load_app_config()
@ -235,7 +234,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Run the agent

View File

@ -29,7 +29,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@ -196,7 +195,6 @@ If the program uses some packages that are incompatible, please figure out alter
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -40,7 +40,6 @@ from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@ -422,7 +421,6 @@ def process_instance(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)

View File

@ -28,7 +28,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
@ -276,7 +275,6 @@ if __name__ == '__main__':
args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
)
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
init_task_env(runtime, args.server_hostname, env_llm_config)

View File

@ -27,7 +27,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@ -105,7 +104,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -37,7 +37,6 @@ from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.utils.async_utils import call_async_from_sync
SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@ -160,7 +159,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str, goal_image_urls = initialize_runtime(runtime)
initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
state: State | None = asyncio.run(

View File

@ -36,7 +36,6 @@ from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.utils.async_utils import call_async_from_sync
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@ -145,7 +144,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@ -30,7 +30,6 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
FAKE_RESPONSES = {
'CodeActAgent': fake_user_response,
@ -109,7 +108,6 @@ def process_instance(
# create sandbox and run the agent
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
test_class.initialize_runtime(runtime)

View File

@ -102,9 +102,16 @@ async def main(loop: asyncio.AbstractEventLoop):
sid = str(uuid4())
display_message(f'Session ID: {sid}')
runtime = create_runtime(config, sid=sid, headless_mode=True)
await runtime.connect()
agent = create_agent(runtime, config)
agent = create_agent(config)
runtime = create_runtime(
config,
sid=sid,
headless_mode=True,
agent=agent,
selected_repository=config.sandbox.selected_repo,
)
controller, _ = create_controller(agent, runtime, config)
event_stream = runtime.event_stream

View File

@ -71,5 +71,6 @@ class SandboxConfig(BaseModel):
remote_runtime_resource_factor: int = Field(default=1)
enable_gpu: bool = Field(default=False)
docker_runtime_kwargs: str | None = Field(default=None)
selected_repo: str | None = Field(default=None)
model_config = {'extra': 'forbid'}

View File

@ -475,9 +475,9 @@ def get_parser() -> argparse.ArgumentParser:
parser.add_argument(
'-n',
'--name',
default='',
help='Session name',
type=str,
help='Name for the session',
default='',
)
parser.add_argument(
'--eval-ids',
@ -487,8 +487,15 @@ def get_parser() -> argparse.ArgumentParser:
)
parser.add_argument(
'--no-auto-continue',
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
action='store_true',
help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
default=False,
)
parser.add_argument(
'--selected-repo',
help='GitHub repository to clone (format: owner/repo)',
type=str,
default=None,
)
return parser
@ -555,4 +562,8 @@ def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
if args.max_budget_per_task is not None:
config.max_budget_per_task = args.max_budget_per_task
# Read selected repository in config for use by CLI and main.py
if args.selected_repo is not None:
config.sandbox.selected_repo = args.selected_repo
return config

View File

@ -88,15 +88,20 @@ async def run_controller(
"""
sid = sid or generate_sid(config)
if agent is None:
agent = create_agent(config)
if runtime is None:
runtime = create_runtime(config, sid=sid, headless_mode=headless_mode)
await runtime.connect()
runtime = create_runtime(
config,
sid=sid,
headless_mode=headless_mode,
agent=agent,
selected_repository=config.sandbox.selected_repo,
)
event_stream = runtime.event_stream
if agent is None:
agent = create_agent(runtime, config)
replay_events: list[Event] | None = None
if config.replay_trajectory_path:
logger.info('Trajectory replay is enabled')

View File

@ -1,7 +1,10 @@
import hashlib
import os
import uuid
from typing import Tuple, Type
from pydantic import SecretStr
import openhands.agenthub # noqa F401 (we import this to get the agents registered)
from openhands.controller import AgentController
from openhands.controller.agent import Agent
@ -13,16 +16,21 @@ from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.events.event import Event
from openhands.llm.llm import LLM
from openhands.microagent.microagent import BaseMicroAgent
from openhands.runtime import get_runtime_cls
from openhands.runtime.base import Runtime
from openhands.security import SecurityAnalyzer, options
from openhands.storage import get_file_store
from openhands.utils.async_utils import call_async_from_sync
def create_runtime(
config: AppConfig,
sid: str | None = None,
headless_mode: bool = True,
agent: Agent | None = None,
selected_repository: str | None = None,
github_token: SecretStr | None = None,
) -> Runtime:
"""Create a runtime for the agent to run on.
@ -31,6 +39,8 @@ def create_runtime(
Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
where we don't want to have the VSCode UI open, so it defaults to True.
selected_repository: (optional) The GitHub repository to use.
github_token: (optional) The GitHub token to use.
"""
# if sid is provided on the command line, use it as the name of the event stream
# otherwise generate it on the basis of the configured jwt_secret
@ -41,8 +51,17 @@ def create_runtime(
file_store = get_file_store(config.file_store, config.file_store_path)
event_stream = EventStream(session_id, file_store)
# set up the security analyzer
if config.security.security_analyzer:
options.SecurityAnalyzers.get(
config.security.security_analyzer, SecurityAnalyzer
)(event_stream)
# agent class
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
if agent:
agent_cls = type(agent)
else:
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
# runtime and tools
runtime_cls = get_runtime_cls(config.runtime)
@ -55,10 +74,38 @@ def create_runtime(
headless_mode=headless_mode,
)
call_async_from_sync(runtime.connect)
# clone selected repository if provided
repo_directory = None
github_token = (
SecretStr(os.environ.get('GITHUB_TOKEN')) if not github_token else github_token
)
if selected_repository and github_token:
logger.debug(f'Selected repository {selected_repository}.')
repo_directory = runtime.clone_repo(
github_token,
selected_repository,
None,
)
# load microagents from selected repository
if agent and agent.prompt_manager and selected_repository and repo_directory:
agent.prompt_manager.set_runtime_info(runtime)
microagents: list[BaseMicroAgent] = runtime.get_microagents_from_selected_repo(
selected_repository
)
agent.prompt_manager.load_microagents(microagents)
agent.prompt_manager.set_repository_info(selected_repository, repo_directory)
logger.debug(
f'Runtime initialized with plugins: {[plugin.name for plugin in runtime.plugins]}'
)
return runtime
def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
def create_agent(config: AppConfig) -> Agent:
agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
agent_config = config.get_agent_config(config.default_agent)
llm_config = config.get_llm_config_from_agent(config.default_agent)
@ -66,14 +113,6 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
llm=LLM(config=llm_config),
config=agent_config,
)
if agent.prompt_manager:
microagents = runtime.get_microagents_from_selected_repo(None)
agent.prompt_manager.load_microagents(microagents)
if config.security.security_analyzer:
options.SecurityAnalyzers.get(
config.security.security_analyzer, SecurityAnalyzer
)(runtime.event_stream)
return agent

View File

@ -20,6 +20,7 @@ def test_parser_default_values():
assert args.llm_config is None
assert args.name == ''
assert not args.no_auto_continue
assert args.selected_repo is None
def test_parser_custom_values():
@ -52,6 +53,8 @@ def test_parser_custom_values():
'-n',
'test_session',
'--no-auto-continue',
'--selected-repo',
'owner/repo',
]
)
@ -69,6 +72,7 @@ def test_parser_custom_values():
assert args.name == 'test_session'
assert args.no_auto_continue
assert args.version
assert args.selected_repo == 'owner/repo'
def test_parser_file_overrides_task():
@ -132,10 +136,18 @@ def test_help_message(capsys):
'-n NAME, --name NAME',
'--config-file CONFIG_FILE',
'--no-auto-continue',
'--selected-repo SELECTED_REPO',
]
for element in expected_elements:
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 18, f'Expected 18 options, found {option_count}'
assert option_count == 19, f'Expected 19 options, found {option_count}'
def test_selected_repo_format():
"""Test that the selected-repo argument accepts owner/repo format."""
parser = get_parser()
args = parser.parse_args(['--selected-repo', 'owner/repo'])
assert args.selected_repo == 'owner/repo'