mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-25 21:36:52 +08:00
V1: Experiment manager (#11388)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
fc9a87550d
commit
9d19292619
@ -1,18 +1,47 @@
|
||||
from uuid import UUID
|
||||
|
||||
from experiments.constants import (
|
||||
ENABLE_EXPERIMENT_MANAGER,
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
)
|
||||
from experiments.experiment_versions import (
|
||||
handle_condenser_max_step_experiment,
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment__v1,
|
||||
)
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
class SaaSExperimentManager(ExperimentManager):
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return agent
|
||||
|
||||
agent = handle_condenser_max_step_experiment__v1(
|
||||
user_id, conversation_id, agent
|
||||
)
|
||||
|
||||
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
agent = agent.model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_settings
|
||||
|
||||
@ -5,12 +5,18 @@ This module contains the handler for the condenser max step experiment that test
|
||||
different max_size values for the condenser configuration.
|
||||
"""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.context.condenser import (
|
||||
LLMSummarizingCondenser,
|
||||
)
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
@ -190,3 +196,37 @@ def handle_condenser_max_step_experiment(
|
||||
return conversation_settings
|
||||
|
||||
return conversation_settings
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment__v1(
|
||||
user_id: str | None,
|
||||
conversation_id: UUID,
|
||||
agent: Agent,
|
||||
) -> Agent:
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
|
||||
|
||||
if enabled_variant is None:
|
||||
return agent
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return agent
|
||||
|
||||
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
|
||||
condenser = LLMSummarizingCondenser(
|
||||
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
|
||||
)
|
||||
|
||||
return agent.model_copy(update={'condenser': condenser})
|
||||
|
||||
24
enterprise/poetry.lock
generated
24
enterprise/poetry.lock
generated
@ -5737,7 +5737,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
|
||||
|
||||
[[package]]
|
||||
name = "openhands-agent-server"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -5759,8 +5759,8 @@ wsproto = ">=1.2.0"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-agent-server"
|
||||
|
||||
[[package]]
|
||||
@ -5805,9 +5805,9 @@ memory-profiler = "^0.61.0"
|
||||
numpy = "*"
|
||||
openai = "1.99.9"
|
||||
openhands-aci = "0.3.2"
|
||||
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-agent-server"}
|
||||
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-sdk"}
|
||||
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-tools"}
|
||||
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-agent-server"}
|
||||
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-sdk"}
|
||||
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-tools"}
|
||||
opentelemetry-api = "^1.33.1"
|
||||
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
|
||||
pathspec = "^0.12.1"
|
||||
@ -5863,7 +5863,7 @@ url = ".."
|
||||
|
||||
[[package]]
|
||||
name = "openhands-sdk"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands SDK - Core functionality for building AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -5887,13 +5887,13 @@ boto3 = ["boto3 (>=1.35.0)"]
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-sdk"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-tools"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands Tools - Runtime tools for AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -5914,8 +5914,8 @@ pydantic = ">=2.11.7"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-tools"
|
||||
|
||||
[[package]]
|
||||
|
||||
1
enterprise/tests/unit/experiments/__init__.py
Normal file
1
enterprise/tests/unit/experiments/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Unit tests for experiments module."""
|
||||
@ -0,0 +1,137 @@
|
||||
# tests/test_condenser_max_step_experiment_v1.py
|
||||
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
from experiments.experiment_manager import SaaSExperimentManager
|
||||
|
||||
# SUT imports (update the module path if needed)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment__v1,
|
||||
)
|
||||
from pydantic import SecretStr
|
||||
|
||||
from openhands.sdk import LLM, Agent
|
||||
from openhands.sdk.context.condenser import LLMSummarizingCondenser
|
||||
|
||||
|
||||
def make_agent() -> Agent:
|
||||
"""Build a minimal valid Agent."""
|
||||
llm = LLM(
|
||||
usage_id='primary-llm',
|
||||
model='provider/model',
|
||||
api_key=SecretStr('sk-test'),
|
||||
)
|
||||
return Agent(llm=llm)
|
||||
|
||||
|
||||
def _patch_variant(monkeypatch, return_value):
|
||||
"""Patch the internal variant getter to return a specific value."""
|
||||
monkeypatch.setattr(
|
||||
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
|
||||
lambda user_id, conv_id: return_value,
|
||||
raising=True,
|
||||
)
|
||||
|
||||
|
||||
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'control')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
|
||||
|
||||
# Should be a new Agent instance with a condenser installed
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
|
||||
# The condenser should have its own LLM (usage_id overridden to "condenser")
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
# The original agent LLM remains unchanged
|
||||
assert agent.llm.usage_id == 'primary-llm'
|
||||
|
||||
# Control: max_size = 120, keep_first = 4
|
||||
assert result.condenser.max_size == 120
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
|
||||
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
assert result.condenser.max_size == 80
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, None)
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
|
||||
|
||||
# No changes—same instance and no condenser attribute added
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'weird-variant')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
|
||||
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.handle_condenser_max_step_experiment__v1')
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
|
||||
def test_run_agent_variant_tests_v1_noop_when_manager_disabled(
|
||||
mock_handle_condenser,
|
||||
):
|
||||
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-123',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Same object returned (no copy)
|
||||
assert result is agent
|
||||
# Handler should not have been called
|
||||
mock_handle_condenser.assert_not_called()
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
|
||||
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-abc',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Should be a different instance than the original (copied after handler runs)
|
||||
assert result is not agent
|
||||
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
|
||||
|
||||
# The condenser returned by the handler must be preserved after the system-prompt override copy
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
assert result.condenser.max_size == 80
|
||||
@ -5,7 +5,7 @@ from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from time import time
|
||||
from typing import AsyncGenerator, Sequence
|
||||
from uuid import UUID
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import httpx
|
||||
from fastapi import Request
|
||||
@ -52,6 +52,7 @@ from openhands.app_server.services.injector import InjectorState
|
||||
from openhands.app_server.services.jwt_service import JwtService
|
||||
from openhands.app_server.user.user_context import UserContext
|
||||
from openhands.app_server.utils.async_remote_workspace import AsyncRemoteWorkspace
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import ProviderType
|
||||
from openhands.sdk import LocalWorkspace
|
||||
from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret
|
||||
@ -458,10 +459,17 @@ class LiveStatusAppConversationService(GitAppConversationService):
|
||||
model=user.llm_model,
|
||||
base_url=user.llm_base_url,
|
||||
api_key=user.llm_api_key,
|
||||
service_id='agent',
|
||||
usage_id='agent',
|
||||
)
|
||||
agent = get_default_agent(llm=llm)
|
||||
|
||||
conversation_id = uuid4()
|
||||
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
|
||||
user.id, conversation_id, agent
|
||||
)
|
||||
|
||||
start_conversation_request = StartConversationRequest(
|
||||
conversation_id=conversation_id,
|
||||
agent=agent,
|
||||
workspace=workspace,
|
||||
confirmation_policy=AlwaysConfirm()
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
from openhands.server.shared import file_store
|
||||
from openhands.storage.locations import get_experiment_config_filename
|
||||
@ -29,6 +31,12 @@ def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
|
||||
|
||||
|
||||
class ExperimentManager:
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id: str | None,
|
||||
|
||||
23
poetry.lock
generated
23
poetry.lock
generated
@ -5711,8 +5711,11 @@ files = [
|
||||
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
|
||||
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
|
||||
@ -7272,7 +7275,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
|
||||
|
||||
[[package]]
|
||||
name = "openhands-agent-server"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -7294,13 +7297,13 @@ wsproto = ">=1.2.0"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-agent-server"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-sdk"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands SDK - Core functionality for building AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -7324,13 +7327,13 @@ boto3 = ["boto3 (>=1.35.0)"]
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-sdk"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-tools"
|
||||
version = "1.0.0a2"
|
||||
version = "1.0.0a3"
|
||||
description = "OpenHands Tools - Runtime tools for AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
@ -7351,8 +7354,8 @@ pydantic = ">=2.11.7"
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
||||
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||
subdirectory = "openhands-tools"
|
||||
|
||||
[[package]]
|
||||
@ -16521,4 +16524,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12,<3.14"
|
||||
content-hash = "03639ad9782d05163b25c507e7232d797572902ee57408bf999b72c21e3adf5e"
|
||||
content-hash = "fd68ed845befeb646ee910db46f1ef9c5a1fd2e6d1ac6189c04864e0665f66ed"
|
||||
|
||||
@ -113,10 +113,10 @@ e2b-code-interpreter = { version = "^2.0.0", optional = true }
|
||||
pybase62 = "^1.0.0"
|
||||
|
||||
# V1 dependencies
|
||||
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
||||
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
||||
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||
# This refuses to install
|
||||
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
||||
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||
python-jose = { version = ">=3.3", extras = [ "cryptography" ] }
|
||||
sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" }
|
||||
pg8000 = "^1.31.5"
|
||||
|
||||
0
tests/unit/experiments/__init__.py
Normal file
0
tests/unit/experiments/__init__.py
Normal file
215
tests/unit/experiments/test_experiment_manager.py
Normal file
215
tests/unit/experiments/test_experiment_manager.py
Normal file
@ -0,0 +1,215 @@
|
||||
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock, patch
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
|
||||
LiveStatusAppConversationService,
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.llm import LLM
|
||||
|
||||
|
||||
class TestExperimentManager:
|
||||
"""Test cases for ExperimentManager class."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
|
||||
"""Test that the base ExperimentManager returns the agent unchanged."""
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
assert result is self.mock_agent
|
||||
assert result == self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_none_user_id(self):
|
||||
"""Test that the method works with None user_id."""
|
||||
# Act
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
None, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
|
||||
"""Test that the method works with different conversation IDs."""
|
||||
conversation_id_1 = uuid4()
|
||||
conversation_id_2 = uuid4()
|
||||
|
||||
# Act
|
||||
result_1 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_1, self.mock_agent
|
||||
)
|
||||
result_2 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_2, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result_1 is self.mock_agent
|
||||
assert result_2 is self.mock_agent
|
||||
|
||||
|
||||
class TestExperimentManagerIntegration:
|
||||
"""Integration tests for ExperimentManager with start_app_conversation."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
|
||||
def test_start_app_conversation_calls_experiment_manager_v1(
|
||||
self, mock_experiment_manager_impl
|
||||
):
|
||||
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
|
||||
# Arrange
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
|
||||
self.mock_agent
|
||||
)
|
||||
|
||||
# Create a mock service instance
|
||||
mock_service = Mock(spec=LiveStatusAppConversationService)
|
||||
|
||||
# Mock the _build_start_conversation_request_for_user method to simulate the call
|
||||
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
|
||||
# Simulate the part of the code that calls the experiment manager
|
||||
from uuid import uuid4
|
||||
|
||||
conversation_id = uuid4()
|
||||
|
||||
# This simulates the call that happens in the actual service
|
||||
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
assert result_agent == self.mock_agent
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
|
||||
self,
|
||||
):
|
||||
"""
|
||||
Use the real LiveStatusAppConversationService to build a StartConversationRequest,
|
||||
and verify ExperimentManagerImpl.run_agent_variant_tests__v1:
|
||||
- is called exactly once with the (user_id, generated conversation_id, agent)
|
||||
- returns the *same* agent instance (no copy/mutation)
|
||||
- does not tweak agent fields (LLM, system prompt, etc.)
|
||||
"""
|
||||
# --- Arrange: fixed UUID to assert call parameters deterministically
|
||||
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
|
||||
|
||||
# Create a stable Agent (and LLM) we can identity-check later
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
|
||||
# Minimal, real-ish user context used by the service
|
||||
class DummyUserContext:
|
||||
async def get_user_info(self):
|
||||
# confirmation_mode=False -> NeverConfirm()
|
||||
return SimpleNamespace(
|
||||
id='test_user_123',
|
||||
llm_model='gpt-4',
|
||||
llm_base_url=None,
|
||||
llm_api_key=None,
|
||||
confirmation_mode=False,
|
||||
)
|
||||
|
||||
async def get_secrets(self):
|
||||
return {}
|
||||
|
||||
async def get_latest_token(self, provider):
|
||||
return None
|
||||
|
||||
async def get_user_id(self):
|
||||
return 'test_user_123'
|
||||
|
||||
user_context = DummyUserContext()
|
||||
|
||||
# The service requires a lot of deps, but for this test we won't exercise them.
|
||||
app_conversation_info_service = Mock()
|
||||
app_conversation_start_task_service = Mock()
|
||||
sandbox_service = Mock()
|
||||
sandbox_spec_service = Mock()
|
||||
jwt_service = Mock()
|
||||
httpx_client = Mock()
|
||||
|
||||
service = LiveStatusAppConversationService(
|
||||
init_git_in_empty_workspace=False,
|
||||
user_context=user_context,
|
||||
app_conversation_info_service=app_conversation_info_service,
|
||||
app_conversation_start_task_service=app_conversation_start_task_service,
|
||||
sandbox_service=sandbox_service,
|
||||
sandbox_spec_service=sandbox_spec_service,
|
||||
jwt_service=jwt_service,
|
||||
sandbox_startup_timeout=30,
|
||||
sandbox_startup_poll_frequency=1,
|
||||
httpx_client=httpx_client,
|
||||
web_url=None,
|
||||
access_token_hard_timeout=None,
|
||||
)
|
||||
|
||||
# Patch the pieces invoked by the service
|
||||
with (
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.get_default_agent',
|
||||
return_value=mock_agent,
|
||||
),
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
|
||||
return_value=fixed_conversation_id,
|
||||
),
|
||||
):
|
||||
# --- Act: build the start request
|
||||
start_req = await service._build_start_conversation_request_for_user(
|
||||
initial_message=None,
|
||||
git_provider=None, # Keep secrets path simple
|
||||
working_dir='/tmp/project', # Arbitrary path
|
||||
)
|
||||
|
||||
# The agent in the StartConversationRequest is the *same* object we provided
|
||||
assert start_req.agent is mock_agent
|
||||
|
||||
# No tweaks to agent fields by the experiment manager (noop)
|
||||
assert start_req.agent.llm is mock_llm
|
||||
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'
|
||||
Loading…
x
Reference in New Issue
Block a user