V1: Experiment manager (#11388)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Rohit Malhotra 2025-10-21 12:04:48 -04:00 committed by GitHub
parent fc9a87550d
commit 9d19292619
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 468 additions and 27 deletions

View File

@ -1,18 +1,47 @@
from uuid import UUID
from experiments.constants import (
ENABLE_EXPERIMENT_MANAGER,
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
)
from experiments.experiment_versions import (
handle_condenser_max_step_experiment,
handle_system_prompt_experiment,
)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment__v1,
)
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.experiments.experiment_manager import ExperimentManager
from openhands.sdk import Agent
from openhands.server.session.conversation_init_data import ConversationInitData
class SaaSExperimentManager(ExperimentManager):
@staticmethod
def run_agent_variant_tests__v1(
user_id: str | None, conversation_id: UUID, agent: Agent
) -> Agent:
if not ENABLE_EXPERIMENT_MANAGER:
logger.info(
'experiment_manager:run_conversation_variant_test:skipped',
extra={'reason': 'experiment_manager_disabled'},
)
return agent
agent = handle_condenser_max_step_experiment__v1(
user_id, conversation_id, agent
)
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
agent = agent.model_copy(
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
)
return agent
@staticmethod
def run_conversation_variant_test(
user_id, conversation_id, conversation_settings

View File

@ -5,12 +5,18 @@ This module contains the handler for the condenser max step experiment that test
different max_size values for the condenser configuration.
"""
from uuid import UUID
import posthog
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
from server.constants import IS_FEATURE_ENV
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.logger import openhands_logger as logger
from openhands.sdk import Agent
from openhands.sdk.context.condenser import (
LLMSummarizingCondenser,
)
from openhands.server.session.conversation_init_data import ConversationInitData
@ -190,3 +196,37 @@ def handle_condenser_max_step_experiment(
return conversation_settings
return conversation_settings
def handle_condenser_max_step_experiment__v1(
user_id: str | None,
conversation_id: UUID,
agent: Agent,
) -> Agent:
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
if enabled_variant is None:
return agent
if enabled_variant == 'control':
condenser_max_size = 120
elif enabled_variant == 'treatment':
condenser_max_size = 80
else:
logger.error(
'condenser_max_step_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'unknown variant; returning original conversation settings',
},
)
return agent
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
condenser = LLMSummarizingCondenser(
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
)
return agent.model_copy(update={'condenser': condenser})

24
enterprise/poetry.lock generated
View File

@ -5737,7 +5737,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
[[package]]
name = "openhands-agent-server"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
optional = false
python-versions = ">=3.12"
@ -5759,8 +5759,8 @@ wsproto = ">=1.2.0"
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-agent-server"
[[package]]
@ -5805,9 +5805,9 @@ memory-profiler = "^0.61.0"
numpy = "*"
openai = "1.99.9"
openhands-aci = "0.3.2"
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-agent-server"}
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-sdk"}
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-tools"}
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-agent-server"}
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-sdk"}
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-tools"}
opentelemetry-api = "^1.33.1"
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
pathspec = "^0.12.1"
@ -5863,7 +5863,7 @@ url = ".."
[[package]]
name = "openhands-sdk"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands SDK - Core functionality for building AI agents"
optional = false
python-versions = ">=3.12"
@ -5887,13 +5887,13 @@ boto3 = ["boto3 (>=1.35.0)"]
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-sdk"
[[package]]
name = "openhands-tools"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands Tools - Runtime tools for AI agents"
optional = false
python-versions = ">=3.12"
@ -5914,8 +5914,8 @@ pydantic = ">=2.11.7"
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-tools"
[[package]]

View File

@ -0,0 +1 @@
"""Unit tests for experiments module."""

View File

@ -0,0 +1,137 @@
# tests/test_condenser_max_step_experiment_v1.py
from unittest.mock import patch
from uuid import uuid4
from experiments.experiment_manager import SaaSExperimentManager
# SUT imports (update the module path if needed)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment__v1,
)
from pydantic import SecretStr
from openhands.sdk import LLM, Agent
from openhands.sdk.context.condenser import LLMSummarizingCondenser
def make_agent() -> Agent:
"""Build a minimal valid Agent."""
llm = LLM(
usage_id='primary-llm',
model='provider/model',
api_key=SecretStr('sk-test'),
)
return Agent(llm=llm)
def _patch_variant(monkeypatch, return_value):
"""Patch the internal variant getter to return a specific value."""
monkeypatch.setattr(
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
lambda user_id, conv_id: return_value,
raising=True,
)
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
_patch_variant(monkeypatch, 'control')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
# Should be a new Agent instance with a condenser installed
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
# The condenser should have its own LLM (usage_id overridden to "condenser")
assert result.condenser.llm.usage_id == 'condenser'
# The original agent LLM remains unchanged
assert agent.llm.usage_id == 'primary-llm'
# Control: max_size = 120, keep_first = 4
assert result.condenser.max_size == 120
assert result.condenser.keep_first == 4
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
_patch_variant(monkeypatch, 'treatment')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
assert result.condenser.llm.usage_id == 'condenser'
assert result.condenser.max_size == 80
assert result.condenser.keep_first == 4
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, None)
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
# No changes—same instance and no condenser attribute added
assert result is agent
assert getattr(result, 'condenser', None) is None
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, 'weird-variant')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
assert result is agent
assert getattr(result, 'condenser', None) is None
@patch('experiments.experiment_manager.handle_condenser_max_step_experiment__v1')
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
def test_run_agent_variant_tests_v1_noop_when_manager_disabled(
mock_handle_condenser,
):
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
agent = make_agent()
conv_id = uuid4()
result = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-123',
conversation_id=conv_id,
agent=agent,
)
# Same object returned (no copy)
assert result is agent
# Handler should not have been called
mock_handle_condenser.assert_not_called()
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
agent = make_agent()
conv_id = uuid4()
_patch_variant(monkeypatch, 'treatment')
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-abc',
conversation_id=conv_id,
agent=agent,
)
# Should be a different instance than the original (copied after handler runs)
assert result is not agent
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
# The condenser returned by the handler must be preserved after the system-prompt override copy
assert isinstance(result.condenser, LLMSummarizingCondenser)
assert result.condenser.max_size == 80

View File

@ -5,7 +5,7 @@ from dataclasses import dataclass
from datetime import datetime, timedelta
from time import time
from typing import AsyncGenerator, Sequence
from uuid import UUID
from uuid import UUID, uuid4
import httpx
from fastapi import Request
@ -52,6 +52,7 @@ from openhands.app_server.services.injector import InjectorState
from openhands.app_server.services.jwt_service import JwtService
from openhands.app_server.user.user_context import UserContext
from openhands.app_server.utils.async_remote_workspace import AsyncRemoteWorkspace
from openhands.experiments.experiment_manager import ExperimentManagerImpl
from openhands.integrations.provider import ProviderType
from openhands.sdk import LocalWorkspace
from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret
@ -458,10 +459,17 @@ class LiveStatusAppConversationService(GitAppConversationService):
model=user.llm_model,
base_url=user.llm_base_url,
api_key=user.llm_api_key,
service_id='agent',
usage_id='agent',
)
agent = get_default_agent(llm=llm)
conversation_id = uuid4()
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
user.id, conversation_id, agent
)
start_conversation_request = StartConversationRequest(
conversation_id=conversation_id,
agent=agent,
workspace=workspace,
confirmation_policy=AlwaysConfirm()

View File

@ -1,9 +1,11 @@
import os
from uuid import UUID
from pydantic import BaseModel
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.sdk import Agent
from openhands.server.session.conversation_init_data import ConversationInitData
from openhands.server.shared import file_store
from openhands.storage.locations import get_experiment_config_filename
@ -29,6 +31,12 @@ def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
class ExperimentManager:
@staticmethod
def run_agent_variant_tests__v1(
user_id: str | None, conversation_id: UUID, agent: Agent
) -> Agent:
return agent
@staticmethod
def run_conversation_variant_test(
user_id: str | None,

23
poetry.lock generated
View File

@ -5711,8 +5711,11 @@ files = [
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@ -7272,7 +7275,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
[[package]]
name = "openhands-agent-server"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
optional = false
python-versions = ">=3.12"
@ -7294,13 +7297,13 @@ wsproto = ">=1.2.0"
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-agent-server"
[[package]]
name = "openhands-sdk"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands SDK - Core functionality for building AI agents"
optional = false
python-versions = ">=3.12"
@ -7324,13 +7327,13 @@ boto3 = ["boto3 (>=1.35.0)"]
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-sdk"
[[package]]
name = "openhands-tools"
version = "1.0.0a2"
version = "1.0.0a3"
description = "OpenHands Tools - Runtime tools for AI agents"
optional = false
python-versions = ">=3.12"
@ -7351,8 +7354,8 @@ pydantic = ">=2.11.7"
[package.source]
type = "git"
url = "https://github.com/All-Hands-AI/agent-sdk.git"
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
subdirectory = "openhands-tools"
[[package]]
@ -16521,4 +16524,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "03639ad9782d05163b25c507e7232d797572902ee57408bf999b72c21e3adf5e"
content-hash = "fd68ed845befeb646ee910db46f1ef9c5a1fd2e6d1ac6189c04864e0665f66ed"

View File

@ -113,10 +113,10 @@ e2b-code-interpreter = { version = "^2.0.0", optional = true }
pybase62 = "^1.0.0"
# V1 dependencies
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
# This refuses to install
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
python-jose = { version = ">=3.3", extras = [ "cryptography" ] }
sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" }
pg8000 = "^1.31.5"

View File

View File

@ -0,0 +1,215 @@
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
from types import SimpleNamespace
from unittest.mock import Mock, patch
from uuid import UUID, uuid4
import pytest
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
LiveStatusAppConversationService,
)
from openhands.experiments.experiment_manager import ExperimentManager
from openhands.sdk import Agent
from openhands.sdk.llm import LLM
class TestExperimentManager:
"""Test cases for ExperimentManager class."""
def setup_method(self):
"""Set up test fixtures."""
self.user_id = 'test_user_123'
self.conversation_id = uuid4()
# Create a mock LLM
self.mock_llm = Mock(spec=LLM)
self.mock_llm.model = 'gpt-4'
self.mock_llm.usage_id = 'agent'
# Create a mock Agent
self.mock_agent = Mock(spec=Agent)
self.mock_agent.llm = self.mock_llm
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
"""Test that the base ExperimentManager returns the agent unchanged."""
result = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, self.conversation_id, self.mock_agent
)
assert result is self.mock_agent
assert result == self.mock_agent
def test_run_agent_variant_tests__v1_with_none_user_id(self):
"""Test that the method works with None user_id."""
# Act
result = ExperimentManager.run_agent_variant_tests__v1(
None, self.conversation_id, self.mock_agent
)
# Assert
assert result is self.mock_agent
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
"""Test that the method works with different conversation IDs."""
conversation_id_1 = uuid4()
conversation_id_2 = uuid4()
# Act
result_1 = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, conversation_id_1, self.mock_agent
)
result_2 = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, conversation_id_2, self.mock_agent
)
# Assert
assert result_1 is self.mock_agent
assert result_2 is self.mock_agent
class TestExperimentManagerIntegration:
"""Integration tests for ExperimentManager with start_app_conversation."""
def setup_method(self):
"""Set up test fixtures."""
self.user_id = 'test_user_123'
self.conversation_id = uuid4()
# Create a mock LLM
self.mock_llm = Mock(spec=LLM)
self.mock_llm.model = 'gpt-4'
self.mock_llm.usage_id = 'agent'
# Create a mock Agent
self.mock_agent = Mock(spec=Agent)
self.mock_agent.llm = self.mock_llm
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
def test_start_app_conversation_calls_experiment_manager_v1(
self, mock_experiment_manager_impl
):
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
# Arrange
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
self.mock_agent
)
# Create a mock service instance
mock_service = Mock(spec=LiveStatusAppConversationService)
# Mock the _build_start_conversation_request_for_user method to simulate the call
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
# Simulate the part of the code that calls the experiment manager
from uuid import uuid4
conversation_id = uuid4()
# This simulates the call that happens in the actual service
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
self.user_id, conversation_id, self.mock_agent
)
# Assert
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
self.user_id, conversation_id, self.mock_agent
)
assert result_agent == self.mock_agent
@pytest.mark.asyncio
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
self,
):
"""
Use the real LiveStatusAppConversationService to build a StartConversationRequest,
and verify ExperimentManagerImpl.run_agent_variant_tests__v1:
- is called exactly once with the (user_id, generated conversation_id, agent)
- returns the *same* agent instance (no copy/mutation)
- does not tweak agent fields (LLM, system prompt, etc.)
"""
# --- Arrange: fixed UUID to assert call parameters deterministically
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
# Create a stable Agent (and LLM) we can identity-check later
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent = Mock(spec=Agent)
mock_agent.llm = mock_llm
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
# Minimal, real-ish user context used by the service
class DummyUserContext:
async def get_user_info(self):
# confirmation_mode=False -> NeverConfirm()
return SimpleNamespace(
id='test_user_123',
llm_model='gpt-4',
llm_base_url=None,
llm_api_key=None,
confirmation_mode=False,
)
async def get_secrets(self):
return {}
async def get_latest_token(self, provider):
return None
async def get_user_id(self):
return 'test_user_123'
user_context = DummyUserContext()
# The service requires a lot of deps, but for this test we won't exercise them.
app_conversation_info_service = Mock()
app_conversation_start_task_service = Mock()
sandbox_service = Mock()
sandbox_spec_service = Mock()
jwt_service = Mock()
httpx_client = Mock()
service = LiveStatusAppConversationService(
init_git_in_empty_workspace=False,
user_context=user_context,
app_conversation_info_service=app_conversation_info_service,
app_conversation_start_task_service=app_conversation_start_task_service,
sandbox_service=sandbox_service,
sandbox_spec_service=sandbox_spec_service,
jwt_service=jwt_service,
sandbox_startup_timeout=30,
sandbox_startup_poll_frequency=1,
httpx_client=httpx_client,
web_url=None,
access_token_hard_timeout=None,
)
# Patch the pieces invoked by the service
with (
patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.get_default_agent',
return_value=mock_agent,
),
patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
return_value=fixed_conversation_id,
),
):
# --- Act: build the start request
start_req = await service._build_start_conversation_request_for_user(
initial_message=None,
git_provider=None, # Keep secrets path simple
working_dir='/tmp/project', # Arbitrary path
)
# The agent in the StartConversationRequest is the *same* object we provided
assert start_req.agent is mock_agent
# No tweaks to agent fields by the experiment manager (noop)
assert start_req.agent.llm is mock_llm
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'