diff --git a/enterprise/experiments/experiment_manager.py b/enterprise/experiments/experiment_manager.py index b618bbed31..7c53f27414 100644 --- a/enterprise/experiments/experiment_manager.py +++ b/enterprise/experiments/experiment_manager.py @@ -1,18 +1,47 @@ +from uuid import UUID + from experiments.constants import ( ENABLE_EXPERIMENT_MANAGER, + EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, ) from experiments.experiment_versions import ( handle_condenser_max_step_experiment, handle_system_prompt_experiment, ) +from experiments.experiment_versions._004_condenser_max_step_experiment import ( + handle_condenser_max_step_experiment__v1, +) from openhands.core.config.openhands_config import OpenHandsConfig from openhands.core.logger import openhands_logger as logger from openhands.experiments.experiment_manager import ExperimentManager +from openhands.sdk import Agent from openhands.server.session.conversation_init_data import ConversationInitData class SaaSExperimentManager(ExperimentManager): + @staticmethod + def run_agent_variant_tests__v1( + user_id: str | None, conversation_id: UUID, agent: Agent + ) -> Agent: + if not ENABLE_EXPERIMENT_MANAGER: + logger.info( + 'experiment_manager:run_conversation_variant_test:skipped', + extra={'reason': 'experiment_manager_disabled'}, + ) + return agent + + agent = handle_condenser_max_step_experiment__v1( + user_id, conversation_id, agent + ) + + if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT: + agent = agent.model_copy( + update={'system_prompt_filename': 'system_prompt_long_horizon.j2'} + ) + + return agent + @staticmethod def run_conversation_variant_test( user_id, conversation_id, conversation_settings diff --git a/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py b/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py index 35eb14646e..5b5818cb1d 100644 --- a/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py +++ b/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py @@ -5,12 +5,18 @@ This module contains the handler for the condenser max step experiment that test different max_size values for the condenser configuration. """ +from uuid import UUID + import posthog from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP from server.constants import IS_FEATURE_ENV from storage.experiment_assignment_store import ExperimentAssignmentStore from openhands.core.logger import openhands_logger as logger +from openhands.sdk import Agent +from openhands.sdk.context.condenser import ( + LLMSummarizingCondenser, +) from openhands.server.session.conversation_init_data import ConversationInitData @@ -190,3 +196,37 @@ def handle_condenser_max_step_experiment( return conversation_settings return conversation_settings + + +def handle_condenser_max_step_experiment__v1( + user_id: str | None, + conversation_id: UUID, + agent: Agent, +) -> Agent: + enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id)) + + if enabled_variant is None: + return agent + + if enabled_variant == 'control': + condenser_max_size = 120 + elif enabled_variant == 'treatment': + condenser_max_size = 80 + else: + logger.error( + 'condenser_max_step_experiment:unknown_variant', + extra={ + 'user_id': user_id, + 'convo_id': conversation_id, + 'variant': enabled_variant, + 'reason': 'unknown variant; returning original conversation settings', + }, + ) + return agent + + condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'}) + condenser = LLMSummarizingCondenser( + llm=condenser_llm, max_size=condenser_max_size, keep_first=4 + ) + + return agent.model_copy(update={'condenser': condenser}) diff --git a/enterprise/poetry.lock b/enterprise/poetry.lock index 5172f87fed..d94cba5ae3 100644 --- a/enterprise/poetry.lock +++ b/enterprise/poetry.lock @@ -5737,7 +5737,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0 [[package]] name = "openhands-agent-server" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent" optional = false python-versions = ">=3.12" @@ -5759,8 +5759,8 @@ wsproto = ">=1.2.0" [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-agent-server" [[package]] @@ -5805,9 +5805,9 @@ memory-profiler = "^0.61.0" numpy = "*" openai = "1.99.9" openhands-aci = "0.3.2" -openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-agent-server"} -openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-sdk"} -openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-tools"} +openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-agent-server"} +openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-sdk"} +openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-tools"} opentelemetry-api = "^1.33.1" opentelemetry-exporter-otlp-proto-grpc = "^1.33.1" pathspec = "^0.12.1" @@ -5863,7 +5863,7 @@ url = ".." [[package]] name = "openhands-sdk" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands SDK - Core functionality for building AI agents" optional = false python-versions = ">=3.12" @@ -5887,13 +5887,13 @@ boto3 = ["boto3 (>=1.35.0)"] [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-sdk" [[package]] name = "openhands-tools" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands Tools - Runtime tools for AI agents" optional = false python-versions = ">=3.12" @@ -5914,8 +5914,8 @@ pydantic = ">=2.11.7" [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-tools" [[package]] diff --git a/enterprise/tests/unit/experiments/__init__.py b/enterprise/tests/unit/experiments/__init__.py new file mode 100644 index 0000000000..50b9db5067 --- /dev/null +++ b/enterprise/tests/unit/experiments/__init__.py @@ -0,0 +1 @@ +"""Unit tests for experiments module.""" diff --git a/enterprise/tests/unit/experiments/test_saas_experiment_manager.py b/enterprise/tests/unit/experiments/test_saas_experiment_manager.py new file mode 100644 index 0000000000..ec67c7479f --- /dev/null +++ b/enterprise/tests/unit/experiments/test_saas_experiment_manager.py @@ -0,0 +1,137 @@ +# tests/test_condenser_max_step_experiment_v1.py + +from unittest.mock import patch +from uuid import uuid4 + +from experiments.experiment_manager import SaaSExperimentManager + +# SUT imports (update the module path if needed) +from experiments.experiment_versions._004_condenser_max_step_experiment import ( + handle_condenser_max_step_experiment__v1, +) +from pydantic import SecretStr + +from openhands.sdk import LLM, Agent +from openhands.sdk.context.condenser import LLMSummarizingCondenser + + +def make_agent() -> Agent: + """Build a minimal valid Agent.""" + llm = LLM( + usage_id='primary-llm', + model='provider/model', + api_key=SecretStr('sk-test'), + ) + return Agent(llm=llm) + + +def _patch_variant(monkeypatch, return_value): + """Patch the internal variant getter to return a specific value.""" + monkeypatch.setattr( + 'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant', + lambda user_id, conv_id: return_value, + raising=True, + ) + + +def test_control_variant_sets_condenser_with_max_size_120(monkeypatch): + _patch_variant(monkeypatch, 'control') + agent = make_agent() + conv_id = uuid4() + + result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent) + + # Should be a new Agent instance with a condenser installed + assert result is not agent + assert isinstance(result.condenser, LLMSummarizingCondenser) + + # The condenser should have its own LLM (usage_id overridden to "condenser") + assert result.condenser.llm.usage_id == 'condenser' + # The original agent LLM remains unchanged + assert agent.llm.usage_id == 'primary-llm' + + # Control: max_size = 120, keep_first = 4 + assert result.condenser.max_size == 120 + assert result.condenser.keep_first == 4 + + +def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch): + _patch_variant(monkeypatch, 'treatment') + agent = make_agent() + conv_id = uuid4() + + result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent) + + assert result is not agent + assert isinstance(result.condenser, LLMSummarizingCondenser) + assert result.condenser.llm.usage_id == 'condenser' + assert result.condenser.max_size == 80 + assert result.condenser.keep_first == 4 + + +def test_none_variant_returns_original_agent_without_changes(monkeypatch): + _patch_variant(monkeypatch, None) + agent = make_agent() + conv_id = uuid4() + + result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent) + + # No changes—same instance and no condenser attribute added + assert result is agent + assert getattr(result, 'condenser', None) is None + + +def test_unknown_variant_returns_original_agent_without_changes(monkeypatch): + _patch_variant(monkeypatch, 'weird-variant') + agent = make_agent() + conv_id = uuid4() + + result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent) + + assert result is agent + assert getattr(result, 'condenser', None) is None + + +@patch('experiments.experiment_manager.handle_condenser_max_step_experiment__v1') +@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False) +def test_run_agent_variant_tests_v1_noop_when_manager_disabled( + mock_handle_condenser, +): + """If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler.""" + agent = make_agent() + conv_id = uuid4() + + result = SaaSExperimentManager.run_agent_variant_tests__v1( + user_id='user-123', + conversation_id=conv_id, + agent=agent, + ) + + # Same object returned (no copy) + assert result is agent + # Handler should not have been called + mock_handle_condenser.assert_not_called() + + +@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True) +@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True) +def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch): + """When enabled, it should call the condenser experiment handler and set the long-horizon system prompt.""" + agent = make_agent() + conv_id = uuid4() + + _patch_variant(monkeypatch, 'treatment') + + result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1( + user_id='user-abc', + conversation_id=conv_id, + agent=agent, + ) + + # Should be a different instance than the original (copied after handler runs) + assert result is not agent + assert result.system_prompt_filename == 'system_prompt_long_horizon.j2' + + # The condenser returned by the handler must be preserved after the system-prompt override copy + assert isinstance(result.condenser, LLMSummarizingCondenser) + assert result.condenser.max_size == 80 diff --git a/openhands/app_server/app_conversation/live_status_app_conversation_service.py b/openhands/app_server/app_conversation/live_status_app_conversation_service.py index d7a9c30023..6d2c9c2794 100644 --- a/openhands/app_server/app_conversation/live_status_app_conversation_service.py +++ b/openhands/app_server/app_conversation/live_status_app_conversation_service.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta from time import time from typing import AsyncGenerator, Sequence -from uuid import UUID +from uuid import UUID, uuid4 import httpx from fastapi import Request @@ -52,6 +52,7 @@ from openhands.app_server.services.injector import InjectorState from openhands.app_server.services.jwt_service import JwtService from openhands.app_server.user.user_context import UserContext from openhands.app_server.utils.async_remote_workspace import AsyncRemoteWorkspace +from openhands.experiments.experiment_manager import ExperimentManagerImpl from openhands.integrations.provider import ProviderType from openhands.sdk import LocalWorkspace from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret @@ -458,10 +459,17 @@ class LiveStatusAppConversationService(GitAppConversationService): model=user.llm_model, base_url=user.llm_base_url, api_key=user.llm_api_key, - service_id='agent', + usage_id='agent', ) agent = get_default_agent(llm=llm) + + conversation_id = uuid4() + agent = ExperimentManagerImpl.run_agent_variant_tests__v1( + user.id, conversation_id, agent + ) + start_conversation_request = StartConversationRequest( + conversation_id=conversation_id, agent=agent, workspace=workspace, confirmation_policy=AlwaysConfirm() diff --git a/openhands/experiments/experiment_manager.py b/openhands/experiments/experiment_manager.py index 4a5857d95b..013aa16bef 100644 --- a/openhands/experiments/experiment_manager.py +++ b/openhands/experiments/experiment_manager.py @@ -1,9 +1,11 @@ import os +from uuid import UUID from pydantic import BaseModel from openhands.core.config.openhands_config import OpenHandsConfig from openhands.core.logger import openhands_logger as logger +from openhands.sdk import Agent from openhands.server.session.conversation_init_data import ConversationInitData from openhands.server.shared import file_store from openhands.storage.locations import get_experiment_config_filename @@ -29,6 +31,12 @@ def load_experiment_config(conversation_id: str) -> ExperimentConfig | None: class ExperimentManager: + @staticmethod + def run_agent_variant_tests__v1( + user_id: str | None, conversation_id: UUID, agent: Agent + ) -> Agent: + return agent + @staticmethod def run_conversation_variant_test( user_id: str | None, diff --git a/poetry.lock b/poetry.lock index 8f98d0368e..49d7046fd2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5711,8 +5711,11 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -7272,7 +7275,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0 [[package]] name = "openhands-agent-server" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent" optional = false python-versions = ">=3.12" @@ -7294,13 +7297,13 @@ wsproto = ">=1.2.0" [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-agent-server" [[package]] name = "openhands-sdk" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands SDK - Core functionality for building AI agents" optional = false python-versions = ">=3.12" @@ -7324,13 +7327,13 @@ boto3 = ["boto3 (>=1.35.0)"] [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-sdk" [[package]] name = "openhands-tools" -version = "1.0.0a2" +version = "1.0.0a3" description = "OpenHands Tools - Runtime tools for AI agents" optional = false python-versions = ">=3.12" @@ -7351,8 +7354,8 @@ pydantic = ">=2.11.7" [package.source] type = "git" url = "https://github.com/All-Hands-AI/agent-sdk.git" -reference = "512399d896521aee3131eea4bb59087fb9dfa243" -resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243" +reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" +resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" subdirectory = "openhands-tools" [[package]] @@ -16521,4 +16524,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api [metadata] lock-version = "2.1" python-versions = "^3.12,<3.14" -content-hash = "03639ad9782d05163b25c507e7232d797572902ee57408bf999b72c21e3adf5e" +content-hash = "fd68ed845befeb646ee910db46f1ef9c5a1fd2e6d1ac6189c04864e0665f66ed" diff --git a/pyproject.toml b/pyproject.toml index 25b5172dd9..0f73bb0e7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,10 +113,10 @@ e2b-code-interpreter = { version = "^2.0.0", optional = true } pybase62 = "^1.0.0" # V1 dependencies -openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "512399d896521aee3131eea4bb59087fb9dfa243" } -openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "512399d896521aee3131eea4bb59087fb9dfa243" } +openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" } +openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" } # This refuses to install -openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "512399d896521aee3131eea4bb59087fb9dfa243" } +openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" } python-jose = { version = ">=3.3", extras = [ "cryptography" ] } sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" } pg8000 = "^1.31.5" diff --git a/tests/unit/experiments/__init__.py b/tests/unit/experiments/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/experiments/test_experiment_manager.py b/tests/unit/experiments/test_experiment_manager.py new file mode 100644 index 0000000000..7a23cf9079 --- /dev/null +++ b/tests/unit/experiments/test_experiment_manager.py @@ -0,0 +1,215 @@ +"""Unit tests for ExperimentManager class, focusing on the v1 agent method.""" + +from types import SimpleNamespace +from unittest.mock import Mock, patch +from uuid import UUID, uuid4 + +import pytest + +from openhands.app_server.app_conversation.live_status_app_conversation_service import ( + LiveStatusAppConversationService, +) +from openhands.experiments.experiment_manager import ExperimentManager +from openhands.sdk import Agent +from openhands.sdk.llm import LLM + + +class TestExperimentManager: + """Test cases for ExperimentManager class.""" + + def setup_method(self): + """Set up test fixtures.""" + self.user_id = 'test_user_123' + self.conversation_id = uuid4() + + # Create a mock LLM + self.mock_llm = Mock(spec=LLM) + self.mock_llm.model = 'gpt-4' + self.mock_llm.usage_id = 'agent' + + # Create a mock Agent + self.mock_agent = Mock(spec=Agent) + self.mock_agent.llm = self.mock_llm + self.mock_agent.system_prompt_filename = 'default_system_prompt.j2' + self.mock_agent.model_copy = Mock(return_value=self.mock_agent) + + def test_run_agent_variant_tests__v1_returns_agent_unchanged(self): + """Test that the base ExperimentManager returns the agent unchanged.""" + result = ExperimentManager.run_agent_variant_tests__v1( + self.user_id, self.conversation_id, self.mock_agent + ) + + assert result is self.mock_agent + assert result == self.mock_agent + + def test_run_agent_variant_tests__v1_with_none_user_id(self): + """Test that the method works with None user_id.""" + # Act + result = ExperimentManager.run_agent_variant_tests__v1( + None, self.conversation_id, self.mock_agent + ) + + # Assert + assert result is self.mock_agent + + def test_run_agent_variant_tests__v1_with_different_conversation_ids(self): + """Test that the method works with different conversation IDs.""" + conversation_id_1 = uuid4() + conversation_id_2 = uuid4() + + # Act + result_1 = ExperimentManager.run_agent_variant_tests__v1( + self.user_id, conversation_id_1, self.mock_agent + ) + result_2 = ExperimentManager.run_agent_variant_tests__v1( + self.user_id, conversation_id_2, self.mock_agent + ) + + # Assert + assert result_1 is self.mock_agent + assert result_2 is self.mock_agent + + +class TestExperimentManagerIntegration: + """Integration tests for ExperimentManager with start_app_conversation.""" + + def setup_method(self): + """Set up test fixtures.""" + self.user_id = 'test_user_123' + self.conversation_id = uuid4() + + # Create a mock LLM + self.mock_llm = Mock(spec=LLM) + self.mock_llm.model = 'gpt-4' + self.mock_llm.usage_id = 'agent' + + # Create a mock Agent + self.mock_agent = Mock(spec=Agent) + self.mock_agent.llm = self.mock_llm + self.mock_agent.system_prompt_filename = 'default_system_prompt.j2' + self.mock_agent.model_copy = Mock(return_value=self.mock_agent) + + @patch('openhands.experiments.experiment_manager.ExperimentManagerImpl') + def test_start_app_conversation_calls_experiment_manager_v1( + self, mock_experiment_manager_impl + ): + """Test that start_app_conversation calls the experiment manager v1 method with correct parameters.""" + # Arrange + mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = ( + self.mock_agent + ) + + # Create a mock service instance + mock_service = Mock(spec=LiveStatusAppConversationService) + + # Mock the _build_start_conversation_request_for_user method to simulate the call + with patch.object(mock_service, '_build_start_conversation_request_for_user'): + # Simulate the part of the code that calls the experiment manager + from uuid import uuid4 + + conversation_id = uuid4() + + # This simulates the call that happens in the actual service + result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1( + self.user_id, conversation_id, self.mock_agent + ) + + # Assert + mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with( + self.user_id, conversation_id, self.mock_agent + ) + assert result_agent == self.mock_agent + + @pytest.mark.asyncio + async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through( + self, + ): + """ + Use the real LiveStatusAppConversationService to build a StartConversationRequest, + and verify ExperimentManagerImpl.run_agent_variant_tests__v1: + - is called exactly once with the (user_id, generated conversation_id, agent) + - returns the *same* agent instance (no copy/mutation) + - does not tweak agent fields (LLM, system prompt, etc.) + """ + # --- Arrange: fixed UUID to assert call parameters deterministically + fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001') + + # Create a stable Agent (and LLM) we can identity-check later + mock_llm = Mock(spec=LLM) + mock_llm.model = 'gpt-4' + mock_llm.usage_id = 'agent' + + mock_agent = Mock(spec=Agent) + mock_agent.llm = mock_llm + mock_agent.system_prompt_filename = 'default_system_prompt.j2' + + # Minimal, real-ish user context used by the service + class DummyUserContext: + async def get_user_info(self): + # confirmation_mode=False -> NeverConfirm() + return SimpleNamespace( + id='test_user_123', + llm_model='gpt-4', + llm_base_url=None, + llm_api_key=None, + confirmation_mode=False, + ) + + async def get_secrets(self): + return {} + + async def get_latest_token(self, provider): + return None + + async def get_user_id(self): + return 'test_user_123' + + user_context = DummyUserContext() + + # The service requires a lot of deps, but for this test we won't exercise them. + app_conversation_info_service = Mock() + app_conversation_start_task_service = Mock() + sandbox_service = Mock() + sandbox_spec_service = Mock() + jwt_service = Mock() + httpx_client = Mock() + + service = LiveStatusAppConversationService( + init_git_in_empty_workspace=False, + user_context=user_context, + app_conversation_info_service=app_conversation_info_service, + app_conversation_start_task_service=app_conversation_start_task_service, + sandbox_service=sandbox_service, + sandbox_spec_service=sandbox_spec_service, + jwt_service=jwt_service, + sandbox_startup_timeout=30, + sandbox_startup_poll_frequency=1, + httpx_client=httpx_client, + web_url=None, + access_token_hard_timeout=None, + ) + + # Patch the pieces invoked by the service + with ( + patch( + 'openhands.app_server.app_conversation.live_status_app_conversation_service.get_default_agent', + return_value=mock_agent, + ), + patch( + 'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4', + return_value=fixed_conversation_id, + ), + ): + # --- Act: build the start request + start_req = await service._build_start_conversation_request_for_user( + initial_message=None, + git_provider=None, # Keep secrets path simple + working_dir='/tmp/project', # Arbitrary path + ) + + # The agent in the StartConversationRequest is the *same* object we provided + assert start_req.agent is mock_agent + + # No tweaks to agent fields by the experiment manager (noop) + assert start_req.agent.llm is mock_llm + assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'