Remove ExperimentManager concept from codebase (#13215)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Tim O'Farrell
2026-03-04 13:41:18 -07:00
committed by GitHub
parent 3e15b849a3
commit 15e9435b35
28 changed files with 42 additions and 1618 deletions

View File

@@ -53,7 +53,7 @@ repos:
# Use -p (package) to avoid dual module name conflict when using MYPYPATH
# MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
# Note: tests package excluded to avoid conflict with core openhands tests
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
always_run: true
pass_filenames: false
files: ^enterprise/

View File

@@ -1,47 +0,0 @@
import os
import posthog
from openhands.core.logger import openhands_logger as logger
# Initialize PostHog
posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
# Log PostHog configuration with masked API key for security
api_key = posthog.api_key
if api_key and len(api_key) > 8:
masked_key = f'{api_key[:4]}...{api_key[-4:]}'
else:
masked_key = 'not_set_or_too_short'
logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
# Global toggle for the experiment manager
ENABLE_EXPERIMENT_MANAGER = (
os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
)
# Get the current experiment type from environment variable
# If None, no experiment is running
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
)
# System prompt experiment toggle
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
)
EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
logger.info(
'experiment_manager:run_conversation_variant_test:experiment_config',
extra={
'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
},
)

View File

@@ -1,99 +0,0 @@
from uuid import UUID
from experiments.constants import (
ENABLE_EXPERIMENT_MANAGER,
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
)
from experiments.experiment_versions import (
handle_system_prompt_experiment,
)
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.experiments.experiment_manager import ExperimentManager
from openhands.sdk import Agent
from openhands.server.session.conversation_init_data import ConversationInitData
class SaaSExperimentManager(ExperimentManager):
@staticmethod
def run_agent_variant_tests__v1(
user_id: str | None, conversation_id: UUID, agent: Agent
) -> Agent:
if not ENABLE_EXPERIMENT_MANAGER:
logger.info(
'experiment_manager:run_conversation_variant_test:skipped',
extra={'reason': 'experiment_manager_disabled'},
)
return agent
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
# Skip experiment for planning agents which require their specialized prompt
if agent.system_prompt_filename != 'system_prompt_planning.j2':
agent = agent.model_copy(
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
)
return agent
@staticmethod
def run_conversation_variant_test(
user_id, conversation_id, conversation_settings
) -> ConversationInitData:
"""
Run conversation variant test and potentially modify the conversation settings
based on the PostHog feature flags.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings that may include convo_id and llm_model
Returns:
The modified conversation settings
"""
logger.debug(
'experiment_manager:run_conversation_variant_test:started',
extra={'user_id': user_id, 'conversation_id': conversation_id},
)
return conversation_settings
@staticmethod
def run_config_variant_test(
user_id: str | None, conversation_id: str, config: OpenHandsConfig
) -> OpenHandsConfig:
"""
Run agent config variant test and potentially modify the OpenHands config
based on the current experiment type and PostHog feature flags.
Args:
user_id: The user ID
conversation_id: The conversation ID
config: The OpenHands configuration
Returns:
The modified OpenHands configuration
"""
logger.info(
'experiment_manager:run_config_variant_test:started',
extra={'user_id': user_id},
)
# Skip all experiment processing if the experiment manager is disabled
if not ENABLE_EXPERIMENT_MANAGER:
logger.info(
'experiment_manager:run_config_variant_test:skipped',
extra={'reason': 'experiment_manager_disabled'},
)
return config
# Pass the entire OpenHands config to the system prompt experiment
# Let the experiment handler directly modify the config as needed
modified_config = handle_system_prompt_experiment(
user_id, conversation_id, config
)
# Condenser max step experiment is applied via conversation variant test,
# not config variant test. Return modified config from system prompt only.
return modified_config

View File

@@ -1,107 +0,0 @@
"""
LiteLLM model experiment handler.
This module contains the handler for the LiteLLM model experiment.
"""
import posthog
from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
from server.constants import (
IS_FEATURE_ENV,
build_litellm_proxy_model_path,
get_default_litellm_model,
)
from openhands.core.logger import openhands_logger as logger
def handle_litellm_default_model_experiment(
user_id, conversation_id, conversation_settings
):
"""
Handle the LiteLLM model experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings
Returns:
Modified conversation settings
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
logger.info(
'experiment_manager:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
},
)
return conversation_settings
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'error': str(e),
},
)
return conversation_settings
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='model_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'model_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
# Set the model based on the feature flag variant
if enabled_variant == 'claude37':
# Use the shared utility to construct the LiteLLM proxy model path
model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
# Update the conversation settings with the selected model
conversation_settings.llm_model = model
else:
# Update the conversation settings with the default model for the current version
conversation_settings.llm_model = get_default_litellm_model()
return conversation_settings

View File

@@ -1,181 +0,0 @@
"""
System prompt experiment handler.
This module contains the handler for the system prompt experiment that uses
the PostHog variant as the system prompt filename.
"""
import copy
import posthog
from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
from server.constants import IS_FEATURE_ENV
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
def _get_system_prompt_variant(user_id, conversation_id):
"""
Get the system prompt variant for the experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
Returns:
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
logger.info(
'experiment_manager_002:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
},
)
return None
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='system_prompt_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='system_prompt_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'system_prompt_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_system_prompt_experiment(
user_id, conversation_id, config: OpenHandsConfig
) -> OpenHandsConfig:
"""
Handle the system prompt experiment for OpenHands config.
Args:
user_id: The user ID
conversation_id: The conversation ID
config: The OpenHands configuration
Returns:
Modified OpenHands configuration
"""
enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
# If variant is None, experiment is not enabled or there was an error
if enabled_variant is None:
return config
# Deep copy the config to avoid modifying the original
modified_config = copy.deepcopy(config)
# Set the system prompt filename based on the variant
if enabled_variant == 'control':
# Use the long-horizon system prompt for the control variant
agent_config = modified_config.get_agent_config(modified_config.default_agent)
agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
agent_config.enable_plan_mode = True
elif enabled_variant == 'interactive':
modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename = 'system_prompt_interactive.j2'
elif enabled_variant == 'no_tools':
modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename = 'system_prompt.j2'
else:
logger.error(
'system_prompt_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'no explicit mapping; returning original config',
},
)
return config
# Log which prompt is being used
logger.info(
'system_prompt_experiment:prompt_selected',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'system_prompt_filename': modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename,
'variant': enabled_variant,
},
)
return modified_config

View File

@@ -1,137 +0,0 @@
"""
LiteLLM model experiment handler.
This module contains the handler for the LiteLLM model experiment.
"""
import posthog
from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
from server.constants import (
IS_FEATURE_ENV,
build_litellm_proxy_model_path,
get_default_litellm_model,
)
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.logger import openhands_logger as logger
from openhands.server.session.conversation_init_data import ConversationInitData
def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
if not EXPERIMENT_CLAUDE4_VS_GPT5:
logger.info(
'experiment_manager:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
},
)
return None
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='claude4_vs_gpt5_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='claude4_or_gpt5_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'claude4_or_gpt5_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_claude4_vs_gpt5_experiment(
user_id: str | None,
conversation_id: str,
conversation_settings: ConversationInitData,
) -> ConversationInitData:
"""
Handle the LiteLLM model experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings
Returns:
Modified conversation settings
"""
enabled_variant = _get_model_variant(user_id, conversation_id)
if not enabled_variant:
return conversation_settings
# Set the model based on the feature flag variant
if enabled_variant == 'gpt5':
model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
conversation_settings.llm_model = model
else:
conversation_settings.llm_model = get_default_litellm_model()
return conversation_settings

View File

@@ -1,232 +0,0 @@
"""
Condenser max step experiment handler.
This module contains the handler for the condenser max step experiment that tests
different max_size values for the condenser configuration.
"""
from uuid import UUID
import posthog
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
from server.constants import IS_FEATURE_ENV
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.logger import openhands_logger as logger
from openhands.sdk import Agent
from openhands.sdk.context.condenser import (
LLMSummarizingCondenser,
)
from openhands.server.session.conversation_init_data import ConversationInitData
def _get_condenser_max_step_variant(user_id, conversation_id):
"""
Get the condenser max step variant for the experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
Returns:
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_CONDENSER_MAX_STEP:
logger.info(
'experiment_manager_004:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
},
)
return None
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='condenser_max_step_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='condenser_max_step_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'condenser_max_step_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_condenser_max_step_experiment(
user_id: str | None,
conversation_id: str,
conversation_settings: ConversationInitData,
) -> ConversationInitData:
"""
Handle the condenser max step experiment for conversation settings.
We should not modify persistent user settings. Instead, apply the experiment
variant to the conversation's in-memory settings object for this session only.
Variants:
- control -> condenser_max_size = 120
- treatment -> condenser_max_size = 80
Returns the (potentially) modified conversation_settings.
"""
enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
if enabled_variant is None:
return conversation_settings
if enabled_variant == 'control':
condenser_max_size = 120
elif enabled_variant == 'treatment':
condenser_max_size = 80
else:
logger.error(
'condenser_max_step_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'unknown variant; returning original conversation settings',
},
)
return conversation_settings
try:
# Apply the variant to this conversation only; do not persist to DB.
# Not all OpenHands versions expose `condenser_max_size` on settings.
if hasattr(conversation_settings, 'condenser_max_size'):
conversation_settings.condenser_max_size = condenser_max_size
logger.info(
'condenser_max_step_experiment:conversation_settings_applied',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'condenser_max_size': condenser_max_size,
},
)
else:
logger.warning(
'condenser_max_step_experiment:field_missing_on_settings',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'condenser_max_size not present on ConversationInitData',
},
)
except Exception as e:
logger.error(
'condenser_max_step_experiment:apply_failed',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'error': str(e),
},
)
return conversation_settings
return conversation_settings
def handle_condenser_max_step_experiment__v1(
user_id: str | None,
conversation_id: UUID,
agent: Agent,
) -> Agent:
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
if enabled_variant is None:
return agent
if enabled_variant == 'control':
condenser_max_size = 120
elif enabled_variant == 'treatment':
condenser_max_size = 80
else:
logger.error(
'condenser_max_step_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'unknown variant; returning original conversation settings',
},
)
return agent
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
condenser = LLMSummarizingCondenser(
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
)
return agent.model_copy(update={'condenser': condenser})

View File

@@ -1,25 +0,0 @@
"""
Experiment versions package.
This package contains handlers for different experiment versions.
"""
from experiments.experiment_versions._001_litellm_default_model_experiment import (
handle_litellm_default_model_experiment,
)
from experiments.experiment_versions._002_system_prompt_experiment import (
handle_system_prompt_experiment,
)
from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
handle_claude4_vs_gpt5_experiment,
)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment,
)
__all__ = [
'handle_litellm_default_model_experiment',
'handle_system_prompt_experiment',
'handle_claude4_vs_gpt5_experiment',
'handle_condenser_max_step_experiment',
]

View File

@@ -17,7 +17,6 @@ packages = [
{ include = "storage" },
{ include = "sync" },
{ include = "integrations" },
{ include = "experiments" },
]
[tool.poetry.dependencies]

View File

@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
# No action required
continue
if subpath == 'exp_config.json':
# No action required
continue
# Log unhandled paths for future implementation
logger.warning(
'unknown_path_in_batch_webhook',

View File

@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
await self._setup_nested_settings(client, api_url, settings)
await self._setup_provider_tokens(client, api_url, settings)
await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore
await self._setup_experiment_config(client, api_url, sid, user_id)
await self._create_nested_conversation(
client, api_url, sid, user_id, settings, initial_user_msg, replay_json
)
await self._wait_for_conversation_ready(client, api_url, sid)
async def _setup_experiment_config(
self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
):
# Prevent circular import
from openhands.experiments.experiment_manager import (
ExperimentConfig,
ExperimentManagerImpl,
)
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
user_id, sid, self.config
)
experiment_config = ExperimentConfig(
config={
'system_prompt_filename': config.get_agent_config(
config.default_agent
).system_prompt_filename
}
)
response = await client.post(
f'{api_url}/api/conversations/{sid}/exp-config',
json=experiment_config.model_dump(),
)
response.raise_for_status()
async def _setup_nested_settings(
self, client: httpx.AsyncClient, api_url: str, settings: Settings
) -> None:

View File

@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
from storage.billing_session_type import BillingSessionType
from storage.conversation_callback import CallbackStatus, ConversationCallback
from storage.conversation_work import ConversationWork
from storage.experiment_assignment import ExperimentAssignment
from storage.feedback import ConversationFeedback, Feedback
from storage.github_app_installation import GithubAppInstallation
from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
@@ -50,7 +49,6 @@ __all__ = [
'ConversationFeedback',
'StoredConversationMetadataSaas',
'ConversationWork',
'ExperimentAssignment',
'Feedback',
'GithubAppInstallation',
'GitlabWebhook',

View File

@@ -1,41 +0,0 @@
"""
Database model for experiment assignments.
This model tracks which experiments a conversation is assigned to and what variant
they received from PostHog feature flags.
"""
import uuid
from datetime import UTC, datetime
from sqlalchemy import Column, DateTime, String, UniqueConstraint
from storage.base import Base
class ExperimentAssignment(Base): # type: ignore
__tablename__ = 'experiment_assignments'
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
conversation_id = Column(String, nullable=True, index=True)
experiment_name = Column(String, nullable=False)
variant = Column(String, nullable=False)
created_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
nullable=False,
)
updated_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined]
nullable=False,
)
__table_args__ = (
UniqueConstraint(
'conversation_id',
'experiment_name',
name='uq_experiment_assignments_conversation_experiment',
),
)

View File

@@ -1,52 +0,0 @@
"""
Store for managing experiment assignments.
This store handles creating and updating experiment assignments for conversations.
"""
from sqlalchemy.dialects.postgresql import insert
from storage.database import session_maker
from storage.experiment_assignment import ExperimentAssignment
from openhands.core.logger import openhands_logger as logger
class ExperimentAssignmentStore:
"""Store for managing experiment assignments."""
def update_experiment_variant(
self,
conversation_id: str,
experiment_name: str,
variant: str,
) -> None:
"""
Update the variant for a specific experiment.
Args:
conversation_id: The conversation ID
experiment_name: The name of the experiment
variant: The variant assigned
"""
with session_maker() as session:
# Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
stmt = insert(ExperimentAssignment).values(
conversation_id=conversation_id,
experiment_name=experiment_name,
variant=variant,
)
stmt = stmt.on_conflict_do_nothing(
constraint='uq_experiment_assignments_conversation_experiment'
)
session.execute(stmt)
session.commit()
logger.info(
'experiment_assignment_store:upserted_variant',
extra={
'conversation_id': conversation_id,
'experiment_name': experiment_name,
'variant': variant,
},
)

View File

@@ -1 +0,0 @@
"""Unit tests for experiments module."""

View File

@@ -1,149 +0,0 @@
# tests/test_condenser_max_step_experiment_v1.py
from unittest.mock import patch
from uuid import uuid4
from experiments.experiment_manager import SaaSExperimentManager
# SUT imports (update the module path if needed)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment__v1,
)
from pydantic import SecretStr
from openhands.sdk import LLM, Agent
from openhands.sdk.context.condenser import LLMSummarizingCondenser
def make_agent() -> Agent:
"""Build a minimal valid Agent."""
llm = LLM(
usage_id='primary-llm',
model='provider/model',
api_key=SecretStr('sk-test'),
)
return Agent(llm=llm)
def _patch_variant(monkeypatch, return_value):
"""Patch the internal variant getter to return a specific value."""
monkeypatch.setattr(
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
lambda user_id, conv_id: return_value,
raising=True,
)
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
_patch_variant(monkeypatch, 'control')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
# Should be a new Agent instance with a condenser installed
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
# The condenser should have its own LLM (usage_id overridden to "condenser")
assert result.condenser.llm.usage_id == 'condenser'
# The original agent LLM remains unchanged
assert agent.llm.usage_id == 'primary-llm'
# Control: max_size = 120, keep_first = 4
assert result.condenser.max_size == 120
assert result.condenser.keep_first == 4
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
_patch_variant(monkeypatch, 'treatment')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
assert result.condenser.llm.usage_id == 'condenser'
assert result.condenser.max_size == 80
assert result.condenser.keep_first == 4
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, None)
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
# No changes—same instance and no condenser attribute added
assert result is agent
assert getattr(result, 'condenser', None) is None
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, 'weird-variant')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
assert result is agent
assert getattr(result, 'condenser', None) is None
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
agent = make_agent()
conv_id = uuid4()
result = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-123',
conversation_id=conv_id,
agent=agent,
)
# Same object returned (no copy)
assert result is agent
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
agent = make_agent()
conv_id = uuid4()
_patch_variant(monkeypatch, 'treatment')
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-abc',
conversation_id=conv_id,
agent=agent,
)
# Should be a different instance than the original (copied after handler runs)
assert result is not agent
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
"""Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
# Arrange
planning_agent = make_agent().model_copy(
update={'system_prompt_filename': 'system_prompt_planning.j2'}
)
conv_id = uuid4()
# Act
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-planning',
conversation_id=conv_id,
agent=planning_agent,
)
# Assert
assert result.system_prompt_filename == 'system_prompt_planning.j2'

View File

@@ -77,7 +77,6 @@ from openhands.app_server.utils.llm_metadata import (
get_llm_metadata,
should_set_litellm_extra_body,
)
from openhands.experiments.experiment_manager import ExperimentManagerImpl
from openhands.integrations.provider import ProviderType
from openhands.integrations.service_types import SuggestedTask
from openhands.sdk import Agent, AgentContext, LocalWorkspace
@@ -1140,7 +1139,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
working_dir: str,
plugins: list[PluginSpec] | None = None,
) -> StartConversationRequest:
"""Finalize the conversation request with experiment variants and skills.
"""Finalize the conversation request with skills and metadata.
Args:
agent: The configured agent
@@ -1161,13 +1160,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
# Generate conversation ID if not provided
conversation_id = conversation_id or uuid4()
# Apply experiment variants
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
user.id, conversation_id, agent
)
# Update agent's LLM with litellm_extra_body metadata for tracing
# This is done after experiment variants to ensure the final LLM config is used
agent = self._update_agent_with_llm_metadata(agent, conversation_id, user.id)
# Load and merge skills if remote workspace is available
@@ -1230,7 +1223,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
1. Setting up git provider secrets
2. Configuring LLM and MCP settings
3. Creating an agent with appropriate context
4. Finalizing the request with skills and experiment variants
4. Finalizing the request with skills and metadata
5. Passing plugins to the agent server for remote plugin loading
"""
user = await self.user_context.get_user_info()

View File

@@ -1,72 +0,0 @@
import os
from uuid import UUID
from pydantic import BaseModel
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.sdk import Agent
from openhands.server.session.conversation_init_data import ConversationInitData
from openhands.server.shared import file_store
from openhands.storage.locations import get_experiment_config_filename
from openhands.utils.import_utils import get_impl
class ExperimentConfig(BaseModel):
config: dict[str, str] | None = None
def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
try:
file_path = get_experiment_config_filename(conversation_id)
exp_config = file_store.read(file_path)
return ExperimentConfig.model_validate_json(exp_config)
except FileNotFoundError:
pass
except Exception as e:
logger.warning(f'Failed to load experiment config: {e}')
return None
class ExperimentManager:
@staticmethod
def run_agent_variant_tests__v1(
user_id: str | None, conversation_id: UUID, agent: Agent
) -> Agent:
return agent
@staticmethod
def run_conversation_variant_test(
user_id: str | None,
conversation_id: str,
conversation_settings: ConversationInitData,
) -> ConversationInitData:
return conversation_settings
@staticmethod
def run_config_variant_test(
user_id: str | None, conversation_id: str, config: OpenHandsConfig
) -> OpenHandsConfig:
exp_config = load_experiment_config(conversation_id)
if exp_config and exp_config.config:
agent_cfg = config.get_agent_config(config.default_agent)
try:
for attr, value in exp_config.config.items():
if hasattr(agent_cfg, attr):
logger.info(
f'Set attrib {attr} to {value} for {conversation_id}'
)
setattr(agent_cfg, attr, value)
except Exception as e:
logger.warning(f'Error processing exp config: {e}')
return config
experiment_manager_cls = os.environ.get(
'OPENHANDS_EXPERIMENT_MANAGER_CLS',
'openhands.experiments.experiment_manager.ExperimentManager',
)
ExperimentManagerImpl = get_impl(ExperimentManager, experiment_manager_cls)

View File

@@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger
from openhands.events.action import MessageAction
from openhands.events.nested_event_store import NestedEventStore
from openhands.events.stream import EventStream
from openhands.experiments.experiment_manager import ExperimentManagerImpl
from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler
from openhands.runtime import get_runtime_cls
from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
@@ -551,12 +550,8 @@ class DockerNestedConversationManager(ConversationManager):
# This session is created here only because it is the easiest way to get a runtime, which
# is the easiest way to create the needed docker container
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
user_id, sid, self.config
)
llm_registry, conversation_stats, config = (
create_registry_and_conversation_stats(config, sid, user_id, settings)
create_registry_and_conversation_stats(self.config, sid, user_id, settings)
)
session = Session(

View File

@@ -60,7 +60,6 @@ from openhands.events.observation import (
AgentStateChangedObservation,
NullObservation,
)
from openhands.experiments.experiment_manager import ExperimentConfig
from openhands.integrations.provider import (
PROVIDER_TOKEN_TYPE,
ProviderHandler,
@@ -109,7 +108,6 @@ from openhands.storage.data_models.conversation_metadata import (
from openhands.storage.data_models.conversation_status import ConversationStatus
from openhands.storage.data_models.secrets import Secrets
from openhands.storage.data_models.settings import Settings
from openhands.storage.locations import get_experiment_config_filename
from openhands.storage.settings.settings_store import SettingsStore
from openhands.utils.async_utils import wait_all
from openhands.utils.conversation_summary import get_default_conversation_title
@@ -1240,32 +1238,6 @@ async def update_conversation(
)
@app.post('/conversations/{conversation_id}/exp-config')
def add_experiment_config_for_conversation(
exp_config: ExperimentConfig,
conversation_id: str = Depends(validate_conversation_id),
) -> bool:
exp_config_filepath = get_experiment_config_filename(conversation_id)
exists = False
try:
file_store.read(exp_config_filepath)
exists = True
except FileNotFoundError:
pass
# Don't modify again if it already exists
if exists:
return False
try:
file_store.write(exp_config_filepath, exp_config.model_dump_json())
except Exception as e:
logger.info(f'Failed to write experiment config for {conversation_id}: {e}')
return True
return False
def _parse_combined_page_id(page_id: str | None) -> tuple[str | None, str | None]:
"""Parse combined page_id to extract separate V0 and V1 page_ids.

View File

@@ -13,7 +13,6 @@ from typing import Any
from openhands.core.config.mcp_config import MCPConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.message import MessageAction
from openhands.experiments.experiment_manager import ExperimentManagerImpl
from openhands.integrations.provider import (
CUSTOM_SECRETS_TYPE,
PROVIDER_TOKEN_TYPE,
@@ -142,10 +141,6 @@ async def start_conversation(
conversation_init_data = ConversationInitData(**session_init_args)
conversation_init_data = ExperimentManagerImpl.run_conversation_variant_test(
user_id, conversation_id, conversation_init_data
)
logger.info(
f'Starting agent loop for conversation {conversation_id}',
extra={'user_id': user_id, 'session_id': conversation_id},
@@ -281,8 +276,4 @@ async def setup_init_conversation_settings(
if user_secrets:
session_init_args['custom_secrets'] = user_secrets.custom_secrets
conversation_init_data = ConversationInitData(**session_init_args)
# We should recreate the same experiment conditions when restarting a conversation
return ExperimentManagerImpl.run_conversation_variant_test(
user_id, conversation_id, conversation_init_data
)
return ConversationInitData(**session_init_args)

View File

@@ -108,13 +108,6 @@ class WebSession:
EventStreamSubscriber.SERVER, self.on_event, self.sid
)
self.config = config
# Lazy import to avoid circular dependency
from openhands.experiments.experiment_manager import ExperimentManagerImpl
self.config = ExperimentManagerImpl.run_config_variant_test(
user_id, sid, self.config
)
self.loop = asyncio.get_event_loop()
self.user_id = user_id

View File

@@ -36,7 +36,3 @@ def get_conversation_llm_registry_filename(sid: str, user_id: str | None = None)
def get_conversation_stats_filename(sid: str, user_id: str | None = None) -> str:
return f'{get_conversation_dir(sid, user_id)}conversation_stats.pkl'
def get_experiment_config_filename(sid: str, user_id: str | None = None) -> str:
return f'{get_conversation_dir(sid, user_id)}exp_config.json'

View File

@@ -129,9 +129,8 @@ def generate_openapi_spec():
"""Generate the OpenAPI specification from the FastAPI app."""
spec = app.openapi()
# Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
# Explicitly exclude certain endpoints that are operational or UI-only convenience
excluded_endpoints = [
'/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint
'/server_info', # Operational/system diagnostics
'/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience
'/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience

View File

@@ -1032,27 +1032,17 @@ class TestLiveStatusAppConversationService:
assert agent_context.system_message_suffix is None
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_with_skills(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_with_skills(self):
"""Test _finalize_conversation_request with skills loading."""
# Arrange
mock_agent = Mock(spec=Agent)
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
mock_llm.usage_id = 'agent'
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None # No condenser
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
# Arrange
mock_agent = Mock(spec=Agent)
mock_agent.llm = mock_llm
mock_agent.condenser = None # No condenser
conversation_id = uuid4()
workspace = LocalWorkspace(working_dir='/test')
@@ -1061,9 +1051,7 @@ class TestLiveStatusAppConversationService:
remote_workspace = Mock(spec=AsyncRemoteWorkspace)
# Mock the skills loading method
self.service._load_skills_and_update_agent = AsyncMock(
return_value=mock_updated_agent
)
self.service._load_skills_and_update_agent = AsyncMock(return_value=mock_agent)
# Act
result = await self.service._finalize_conversation_request(
@@ -1082,44 +1070,24 @@ class TestLiveStatusAppConversationService:
# Assert
assert isinstance(result, StartConversationRequest)
assert result.conversation_id == conversation_id
assert result.agent == mock_updated_agent
assert result.workspace == workspace
assert result.initial_message == initial_message
assert result.secrets == secrets
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
self.mock_user.id, conversation_id, mock_agent
)
self.service._load_skills_and_update_agent.assert_called_once_with(
self.mock_sandbox,
mock_updated_agent,
remote_workspace,
'test_repo',
'/test/dir',
)
self.service._load_skills_and_update_agent.assert_called_once()
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_without_skills(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_without_skills(self):
"""Test _finalize_conversation_request without remote workspace (no skills)."""
# Arrange
mock_agent = Mock(spec=Agent)
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
mock_llm.usage_id = 'agent'
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None # No condenser
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
# Arrange
mock_agent = Mock(spec=Agent)
mock_agent.llm = mock_llm
mock_agent.condenser = None # No condenser
workspace = LocalWorkspace(working_dir='/test')
secrets = {'test': StaticSecret(value='secret')}
@@ -1141,31 +1109,18 @@ class TestLiveStatusAppConversationService:
# Assert
assert isinstance(result, StartConversationRequest)
assert isinstance(result.conversation_id, UUID)
assert result.agent == mock_updated_agent
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once()
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_skills_loading_fails(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_skills_loading_fails(self):
"""Test _finalize_conversation_request when skills loading fails."""
# Arrange
mock_agent = Mock(spec=Agent)
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
mock_llm.usage_id = 'agent'
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None # No condenser
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent = Mock(spec=Agent)
mock_agent.llm = mock_llm
mock_agent.condenser = None # No condenser
workspace = LocalWorkspace(working_dir='/test')
secrets = {'test': StaticSecret(value='secret')}
@@ -1195,9 +1150,6 @@ class TestLiveStatusAppConversationService:
# Assert
assert isinstance(result, StartConversationRequest)
assert (
result.agent == mock_updated_agent
) # Should still use the experiment-modified agent
mock_logger.warning.assert_called_once()
@pytest.mark.asyncio
@@ -2266,12 +2218,7 @@ class TestPluginHandling:
assert 'key2: value2' in text
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_with_plugins(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_with_plugins(self):
"""Test _finalize_conversation_request passes plugins list to StartConversationRequest."""
from openhands.app_server.app_conversation.app_conversation_models import (
PluginSpec,
@@ -2282,13 +2229,13 @@ class TestPluginHandling:
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent.llm = mock_llm
mock_agent.condenser = None
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
workspace = LocalWorkspace(working_dir='/test')
secrets = {'test': StaticSecret(value='secret')}
@@ -2330,25 +2277,20 @@ class TestPluginHandling:
assert '- api_key: test123' in result.initial_message.content[0].text
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_without_plugins(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_without_plugins(self):
"""Test _finalize_conversation_request without plugins sets plugins to None."""
# Arrange
mock_agent = Mock(spec=Agent)
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent.llm = mock_llm
mock_agent.condenser = None
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
workspace = LocalWorkspace(working_dir='/test')
secrets = {}
@@ -2373,12 +2315,7 @@ class TestPluginHandling:
assert result.plugins is None
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_plugin_without_ref(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_plugin_without_ref(self):
"""Test _finalize_conversation_request with plugin that has no ref."""
from openhands.app_server.app_conversation.app_conversation_models import (
PluginSpec,
@@ -2389,13 +2326,13 @@ class TestPluginHandling:
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent.llm = mock_llm
mock_agent.condenser = None
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
workspace = LocalWorkspace(working_dir='/test')
secrets = {}
@@ -2428,12 +2365,7 @@ class TestPluginHandling:
assert result.initial_message is None
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_plugin_with_repo_path(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_plugin_with_repo_path(self):
"""Test _finalize_conversation_request passes repo_path to PluginSource."""
from openhands.app_server.app_conversation.app_conversation_models import (
PluginSpec,
@@ -2444,13 +2376,13 @@ class TestPluginHandling:
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent.llm = mock_llm
mock_agent.condenser = None
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
workspace = LocalWorkspace(working_dir='/test')
secrets = {}
@@ -2488,12 +2420,7 @@ class TestPluginHandling:
assert result.plugins[0].repo_path == 'plugins/city-weather'
@pytest.mark.asyncio
@patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
)
async def test_finalize_conversation_request_multiple_plugins(
self, mock_experiment_manager
):
async def test_finalize_conversation_request_multiple_plugins(self):
"""Test _finalize_conversation_request with multiple plugins."""
from openhands.app_server.app_conversation.app_conversation_models import (
PluginSpec,
@@ -2504,13 +2431,13 @@ class TestPluginHandling:
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent.llm = mock_llm
mock_agent.condenser = None
mock_updated_agent = Mock(spec=Agent)
mock_updated_agent.llm = mock_llm
mock_updated_agent.condenser = None
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_updated_agent
)
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
workspace = LocalWorkspace(working_dir='/test')
secrets = {}

View File

@@ -1,264 +0,0 @@
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
from types import SimpleNamespace
from unittest.mock import Mock, patch
from uuid import UUID, uuid4
import pytest
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
LiveStatusAppConversationService,
)
from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
from openhands.experiments.experiment_manager import ExperimentManager
from openhands.sdk import Agent
from openhands.sdk.llm import LLM
class TestExperimentManager:
"""Test cases for ExperimentManager class."""
def setup_method(self):
"""Set up test fixtures."""
self.user_id = 'test_user_123'
self.conversation_id = uuid4()
# Create a mock LLM
self.mock_llm = Mock(spec=LLM)
self.mock_llm.model = 'gpt-4'
self.mock_llm.usage_id = 'agent'
# Create a mock Agent
self.mock_agent = Mock(spec=Agent)
self.mock_agent.llm = self.mock_llm
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
"""Test that the base ExperimentManager returns the agent unchanged."""
result = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, self.conversation_id, self.mock_agent
)
assert result is self.mock_agent
assert result == self.mock_agent
def test_run_agent_variant_tests__v1_with_none_user_id(self):
"""Test that the method works with None user_id."""
# Act
result = ExperimentManager.run_agent_variant_tests__v1(
None, self.conversation_id, self.mock_agent
)
# Assert
assert result is self.mock_agent
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
"""Test that the method works with different conversation IDs."""
conversation_id_1 = uuid4()
conversation_id_2 = uuid4()
# Act
result_1 = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, conversation_id_1, self.mock_agent
)
result_2 = ExperimentManager.run_agent_variant_tests__v1(
self.user_id, conversation_id_2, self.mock_agent
)
# Assert
assert result_1 is self.mock_agent
assert result_2 is self.mock_agent
class TestExperimentManagerIntegration:
"""Integration tests for ExperimentManager with start_app_conversation."""
def setup_method(self):
"""Set up test fixtures."""
self.user_id = 'test_user_123'
self.conversation_id = uuid4()
# Create a mock LLM
self.mock_llm = Mock(spec=LLM)
self.mock_llm.model = 'gpt-4'
self.mock_llm.usage_id = 'agent'
# Create a mock Agent
self.mock_agent = Mock(spec=Agent)
self.mock_agent.llm = self.mock_llm
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
def test_start_app_conversation_calls_experiment_manager_v1(
self, mock_experiment_manager_impl
):
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
# Arrange
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
self.mock_agent
)
# Create a mock service instance
mock_service = Mock(spec=LiveStatusAppConversationService)
# Mock the _build_start_conversation_request_for_user method to simulate the call
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
# Simulate the part of the code that calls the experiment manager
from uuid import uuid4
conversation_id = uuid4()
# This simulates the call that happens in the actual service
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
self.user_id, conversation_id, self.mock_agent
)
# Assert
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
self.user_id, conversation_id, self.mock_agent
)
assert result_agent == self.mock_agent
@pytest.mark.asyncio
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
self,
):
"""
Test that ExperimentManagerImpl.run_agent_variant_tests__v1 is called with correct parameters
and returns the same agent instance (no copy/mutation) when building a StartConversationRequest.
"""
# --- Arrange: fixed UUID to assert call parameters deterministically
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
# Create a stable Agent (and LLM) we can identity-check later
mock_llm = Mock(spec=LLM)
mock_llm.model = 'gpt-4'
mock_llm.usage_id = 'agent'
mock_agent = Mock(spec=Agent)
mock_agent.llm = mock_llm
mock_agent.condenser = None # No condenser for this test
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
mock_agent.model_copy = Mock(return_value=mock_agent)
# Minimal, real-ish user context used by the service
class DummyUserContext:
async def get_user_info(self):
# confirmation_mode=False -> NeverConfirm()
return SimpleNamespace(
id='test_user_123',
llm_model='gpt-4',
llm_base_url=None,
llm_api_key=None,
confirmation_mode=False,
condenser_max_size=None,
security_analyzer=None,
)
async def get_secrets(self):
return {}
async def get_latest_token(self, provider):
return None
async def get_user_id(self):
return 'test_user_123'
user_context = DummyUserContext()
# The service requires a lot of deps, but for this test we won't exercise them.
app_conversation_info_service = Mock()
app_conversation_start_task_service = Mock()
event_callback_service = Mock()
sandbox_service = Mock()
sandbox_spec_service = Mock()
jwt_service = Mock()
httpx_client = Mock()
event_service = Mock()
service = LiveStatusAppConversationService(
init_git_in_empty_workspace=False,
user_context=user_context,
app_conversation_info_service=app_conversation_info_service,
app_conversation_start_task_service=app_conversation_start_task_service,
event_callback_service=event_callback_service,
event_service=event_service,
sandbox_service=sandbox_service,
sandbox_spec_service=sandbox_spec_service,
jwt_service=jwt_service,
sandbox_startup_timeout=30,
sandbox_startup_poll_frequency=1,
httpx_client=httpx_client,
web_url=None,
openhands_provider_base_url=None,
access_token_hard_timeout=None,
)
sandbox = SandboxInfo(
id='mock-sandbox-id',
created_by_user_id='mock-user-id',
sandbox_spec_id='mock-sandbox-spec-id',
status=SandboxStatus.RUNNING,
session_api_key='mock-session-api-key',
)
# Patch the pieces invoked by the service
with (
patch.object(
service,
'_setup_secrets_for_git_providers',
return_value={},
),
patch.object(
service,
'_configure_llm_and_mcp',
return_value=(mock_llm, {}),
),
patch.object(
service,
'_create_agent_with_context',
return_value=mock_agent,
),
patch.object(
service,
'_load_skills_and_update_agent',
return_value=mock_agent,
),
patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
return_value=fixed_conversation_id,
),
patch(
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
) as mock_experiment_manager,
):
# Configure the experiment manager mock to return the same agent
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
mock_agent
)
# --- Act: build the start request
start_req = await service._build_start_conversation_request_for_user(
sandbox=sandbox,
initial_message=None,
system_message_suffix=None, # No additional system message suffix
git_provider=None, # Keep secrets path simple
working_dir='/tmp/project', # Arbitrary path
)
# --- Assert: verify experiment manager was called with correct parameters
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
'test_user_123', # user_id
fixed_conversation_id, # conversation_id
mock_agent, # agent (after model_copy with agent_context)
)
# The agent in the StartConversationRequest is the *same* object returned by experiment manager
assert start_req.agent is mock_agent
# No tweaks to agent fields by the experiment manager (noop)
assert start_req.agent.llm is mock_llm
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'