diff --git a/enterprise/dev_config/python/.pre-commit-config.yaml b/enterprise/dev_config/python/.pre-commit-config.yaml index c0925363a4..231832386d 100644 --- a/enterprise/dev_config/python/.pre-commit-config.yaml +++ b/enterprise/dev_config/python/.pre-commit-config.yaml @@ -53,7 +53,7 @@ repos: # Use -p (package) to avoid dual module name conflict when using MYPYPATH # MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx" # Note: tests package excluded to avoid conflict with core openhands tests - entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments' + entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync' always_run: true pass_filenames: false files: ^enterprise/ diff --git a/enterprise/experiments/__init__.py b/enterprise/experiments/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/enterprise/experiments/constants.py b/enterprise/experiments/constants.py deleted file mode 100644 index b9eba97be7..0000000000 --- a/enterprise/experiments/constants.py +++ /dev/null @@ -1,47 +0,0 @@ -import os - -import posthog - -from openhands.core.logger import openhands_logger as logger - -# Initialize PostHog -posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder') -posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com') - -# Log PostHog configuration with masked API key for security -api_key = posthog.api_key -if api_key and len(api_key) > 8: - masked_key = f'{api_key[:4]}...{api_key[-4:]}' -else: - masked_key = 'not_set_or_too_short' -logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key}) - -# Global toggle for the experiment manager -ENABLE_EXPERIMENT_MANAGER = ( - os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true' -) - -# Get the current experiment type from environment variable -# If None, no experiment is running -EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get( - 'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', '' -) -# System prompt experiment toggle -EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get( - 'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', '' -) - -EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '') - -EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '') - -logger.info( - 'experiment_manager:run_conversation_variant_test:experiment_config', - extra={ - 'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER, - 'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, - 'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, - 'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5, - 'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP, - }, -) diff --git a/enterprise/experiments/experiment_manager.py b/enterprise/experiments/experiment_manager.py deleted file mode 100644 index 2a37e7449e..0000000000 --- a/enterprise/experiments/experiment_manager.py +++ /dev/null @@ -1,99 +0,0 @@ -from uuid import UUID - -from experiments.constants import ( - ENABLE_EXPERIMENT_MANAGER, - EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, -) -from experiments.experiment_versions import ( - handle_system_prompt_experiment, -) - -from openhands.core.config.openhands_config import OpenHandsConfig -from openhands.core.logger import openhands_logger as logger -from openhands.experiments.experiment_manager import ExperimentManager -from openhands.sdk import Agent -from openhands.server.session.conversation_init_data import ConversationInitData - - -class SaaSExperimentManager(ExperimentManager): - @staticmethod - def run_agent_variant_tests__v1( - user_id: str | None, conversation_id: UUID, agent: Agent - ) -> Agent: - if not ENABLE_EXPERIMENT_MANAGER: - logger.info( - 'experiment_manager:run_conversation_variant_test:skipped', - extra={'reason': 'experiment_manager_disabled'}, - ) - return agent - - if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT: - # Skip experiment for planning agents which require their specialized prompt - if agent.system_prompt_filename != 'system_prompt_planning.j2': - agent = agent.model_copy( - update={'system_prompt_filename': 'system_prompt_long_horizon.j2'} - ) - - return agent - - @staticmethod - def run_conversation_variant_test( - user_id, conversation_id, conversation_settings - ) -> ConversationInitData: - """ - Run conversation variant test and potentially modify the conversation settings - based on the PostHog feature flags. - - Args: - user_id: The user ID - conversation_id: The conversation ID - conversation_settings: The conversation settings that may include convo_id and llm_model - - Returns: - The modified conversation settings - """ - logger.debug( - 'experiment_manager:run_conversation_variant_test:started', - extra={'user_id': user_id, 'conversation_id': conversation_id}, - ) - - return conversation_settings - - @staticmethod - def run_config_variant_test( - user_id: str | None, conversation_id: str, config: OpenHandsConfig - ) -> OpenHandsConfig: - """ - Run agent config variant test and potentially modify the OpenHands config - based on the current experiment type and PostHog feature flags. - - Args: - user_id: The user ID - conversation_id: The conversation ID - config: The OpenHands configuration - - Returns: - The modified OpenHands configuration - """ - logger.info( - 'experiment_manager:run_config_variant_test:started', - extra={'user_id': user_id}, - ) - - # Skip all experiment processing if the experiment manager is disabled - if not ENABLE_EXPERIMENT_MANAGER: - logger.info( - 'experiment_manager:run_config_variant_test:skipped', - extra={'reason': 'experiment_manager_disabled'}, - ) - return config - - # Pass the entire OpenHands config to the system prompt experiment - # Let the experiment handler directly modify the config as needed - modified_config = handle_system_prompt_experiment( - user_id, conversation_id, config - ) - - # Condenser max step experiment is applied via conversation variant test, - # not config variant test. Return modified config from system prompt only. - return modified_config diff --git a/enterprise/experiments/experiment_versions/_001_litellm_default_model_experiment.py b/enterprise/experiments/experiment_versions/_001_litellm_default_model_experiment.py deleted file mode 100644 index 7524df1e76..0000000000 --- a/enterprise/experiments/experiment_versions/_001_litellm_default_model_experiment.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -LiteLLM model experiment handler. - -This module contains the handler for the LiteLLM model experiment. -""" - -import posthog -from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT -from server.constants import ( - IS_FEATURE_ENV, - build_litellm_proxy_model_path, - get_default_litellm_model, -) - -from openhands.core.logger import openhands_logger as logger - - -def handle_litellm_default_model_experiment( - user_id, conversation_id, conversation_settings -): - """ - Handle the LiteLLM model experiment. - - Args: - user_id: The user ID - conversation_id: The conversation ID - conversation_settings: The conversation settings - - Returns: - Modified conversation settings - """ - # No-op if the specific experiment is not enabled - if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT: - logger.info( - 'experiment_manager:ab_testing:skipped', - extra={ - 'convo_id': conversation_id, - 'reason': 'experiment_not_enabled', - 'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, - }, - ) - return conversation_settings - - # Use experiment name as the flag key - try: - enabled_variant = posthog.get_feature_flag( - EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id - ) - except Exception as e: - logger.error( - 'experiment_manager:get_feature_flag:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, - 'error': str(e), - }, - ) - return conversation_settings - - # Log the experiment event - # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog - posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id - - try: - posthog.capture( - distinct_id=posthog_user_id, - event='model_set', - properties={ - 'conversation_id': conversation_id, - 'variant': enabled_variant, - 'original_user_id': user_id, - 'is_feature_env': IS_FEATURE_ENV, - }, - ) - except Exception as e: - logger.error( - 'experiment_manager:posthog_capture:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, - 'error': str(e), - }, - ) - # Continue execution as this is not critical - - logger.info( - 'posthog_capture', - extra={ - 'event': 'model_set', - 'posthog_user_id': posthog_user_id, - 'is_feature_env': IS_FEATURE_ENV, - 'conversation_id': conversation_id, - 'variant': enabled_variant, - }, - ) - - # Set the model based on the feature flag variant - if enabled_variant == 'claude37': - # Use the shared utility to construct the LiteLLM proxy model path - model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219') - # Update the conversation settings with the selected model - conversation_settings.llm_model = model - else: - # Update the conversation settings with the default model for the current version - conversation_settings.llm_model = get_default_litellm_model() - - return conversation_settings diff --git a/enterprise/experiments/experiment_versions/_002_system_prompt_experiment.py b/enterprise/experiments/experiment_versions/_002_system_prompt_experiment.py deleted file mode 100644 index ef489c4ee4..0000000000 --- a/enterprise/experiments/experiment_versions/_002_system_prompt_experiment.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -System prompt experiment handler. - -This module contains the handler for the system prompt experiment that uses -the PostHog variant as the system prompt filename. -""" - -import copy - -import posthog -from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT -from server.constants import IS_FEATURE_ENV -from storage.experiment_assignment_store import ExperimentAssignmentStore - -from openhands.core.config.openhands_config import OpenHandsConfig -from openhands.core.logger import openhands_logger as logger - - -def _get_system_prompt_variant(user_id, conversation_id): - """ - Get the system prompt variant for the experiment. - - Args: - user_id: The user ID - conversation_id: The conversation ID - - Returns: - str or None: The PostHog variant name or None if experiment is not enabled or error occurs - """ - # No-op if the specific experiment is not enabled - if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT: - logger.info( - 'experiment_manager_002:ab_testing:skipped', - extra={ - 'convo_id': conversation_id, - 'reason': 'experiment_not_enabled', - 'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, - }, - ) - return None - - # Use experiment name as the flag key - try: - enabled_variant = posthog.get_feature_flag( - EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id - ) - except Exception as e: - logger.error( - 'experiment_manager:get_feature_flag:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, - 'error': str(e), - }, - ) - return None - - # Store the experiment assignment in the database - try: - experiment_store = ExperimentAssignmentStore() - experiment_store.update_experiment_variant( - conversation_id=conversation_id, - experiment_name='system_prompt_experiment', - variant=enabled_variant, - ) - except Exception as e: - logger.error( - 'experiment_manager:store_assignment:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, - 'variant': enabled_variant, - 'error': str(e), - }, - ) - # Fail the experiment if we cannot track the splits - results would not be explainable - return None - - # Log the experiment event - # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog - posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id - - try: - posthog.capture( - distinct_id=posthog_user_id, - event='system_prompt_set', - properties={ - 'conversation_id': conversation_id, - 'variant': enabled_variant, - 'original_user_id': user_id, - 'is_feature_env': IS_FEATURE_ENV, - }, - ) - except Exception as e: - logger.error( - 'experiment_manager:posthog_capture:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, - 'error': str(e), - }, - ) - # Continue execution as this is not critical - - logger.info( - 'posthog_capture', - extra={ - 'event': 'system_prompt_set', - 'posthog_user_id': posthog_user_id, - 'is_feature_env': IS_FEATURE_ENV, - 'conversation_id': conversation_id, - 'variant': enabled_variant, - }, - ) - - return enabled_variant - - -def handle_system_prompt_experiment( - user_id, conversation_id, config: OpenHandsConfig -) -> OpenHandsConfig: - """ - Handle the system prompt experiment for OpenHands config. - - Args: - user_id: The user ID - conversation_id: The conversation ID - config: The OpenHands configuration - - Returns: - Modified OpenHands configuration - """ - enabled_variant = _get_system_prompt_variant(user_id, conversation_id) - - # If variant is None, experiment is not enabled or there was an error - if enabled_variant is None: - return config - - # Deep copy the config to avoid modifying the original - modified_config = copy.deepcopy(config) - - # Set the system prompt filename based on the variant - if enabled_variant == 'control': - # Use the long-horizon system prompt for the control variant - agent_config = modified_config.get_agent_config(modified_config.default_agent) - agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2' - agent_config.enable_plan_mode = True - elif enabled_variant == 'interactive': - modified_config.get_agent_config( - modified_config.default_agent - ).system_prompt_filename = 'system_prompt_interactive.j2' - elif enabled_variant == 'no_tools': - modified_config.get_agent_config( - modified_config.default_agent - ).system_prompt_filename = 'system_prompt.j2' - else: - logger.error( - 'system_prompt_experiment:unknown_variant', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'reason': 'no explicit mapping; returning original config', - }, - ) - return config - - # Log which prompt is being used - logger.info( - 'system_prompt_experiment:prompt_selected', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'system_prompt_filename': modified_config.get_agent_config( - modified_config.default_agent - ).system_prompt_filename, - 'variant': enabled_variant, - }, - ) - - return modified_config diff --git a/enterprise/experiments/experiment_versions/_003_llm_claude4_vs_gpt5_experiment.py b/enterprise/experiments/experiment_versions/_003_llm_claude4_vs_gpt5_experiment.py deleted file mode 100644 index 8eb41ff042..0000000000 --- a/enterprise/experiments/experiment_versions/_003_llm_claude4_vs_gpt5_experiment.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -LiteLLM model experiment handler. - -This module contains the handler for the LiteLLM model experiment. -""" - -import posthog -from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5 -from server.constants import ( - IS_FEATURE_ENV, - build_litellm_proxy_model_path, - get_default_litellm_model, -) -from storage.experiment_assignment_store import ExperimentAssignmentStore - -from openhands.core.logger import openhands_logger as logger -from openhands.server.session.conversation_init_data import ConversationInitData - - -def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None: - if not EXPERIMENT_CLAUDE4_VS_GPT5: - logger.info( - 'experiment_manager:ab_testing:skipped', - extra={ - 'convo_id': conversation_id, - 'reason': 'experiment_not_enabled', - 'experiment': EXPERIMENT_CLAUDE4_VS_GPT5, - }, - ) - return None - - try: - enabled_variant = posthog.get_feature_flag( - EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id - ) - except Exception as e: - logger.error( - 'experiment_manager:get_feature_flag:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CLAUDE4_VS_GPT5, - 'error': str(e), - }, - ) - return None - - # Store the experiment assignment in the database - try: - experiment_store = ExperimentAssignmentStore() - experiment_store.update_experiment_variant( - conversation_id=conversation_id, - experiment_name='claude4_vs_gpt5_experiment', - variant=enabled_variant, - ) - except Exception as e: - logger.error( - 'experiment_manager:store_assignment:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CLAUDE4_VS_GPT5, - 'variant': enabled_variant, - 'error': str(e), - }, - ) - # Fail the experiment if we cannot track the splits - results would not be explainable - return None - - # Log the experiment event - # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog - posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id - - try: - posthog.capture( - distinct_id=posthog_user_id, - event='claude4_or_gpt5_set', - properties={ - 'conversation_id': conversation_id, - 'variant': enabled_variant, - 'original_user_id': user_id, - 'is_feature_env': IS_FEATURE_ENV, - }, - ) - except Exception as e: - logger.error( - 'experiment_manager:posthog_capture:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CLAUDE4_VS_GPT5, - 'error': str(e), - }, - ) - # Continue execution as this is not critical - - logger.info( - 'posthog_capture', - extra={ - 'event': 'claude4_or_gpt5_set', - 'posthog_user_id': posthog_user_id, - 'is_feature_env': IS_FEATURE_ENV, - 'conversation_id': conversation_id, - 'variant': enabled_variant, - }, - ) - - return enabled_variant - - -def handle_claude4_vs_gpt5_experiment( - user_id: str | None, - conversation_id: str, - conversation_settings: ConversationInitData, -) -> ConversationInitData: - """ - Handle the LiteLLM model experiment. - - Args: - user_id: The user ID - conversation_id: The conversation ID - conversation_settings: The conversation settings - - Returns: - Modified conversation settings - """ - - enabled_variant = _get_model_variant(user_id, conversation_id) - - if not enabled_variant: - return conversation_settings - - # Set the model based on the feature flag variant - if enabled_variant == 'gpt5': - model = build_litellm_proxy_model_path('gpt-5-2025-08-07') - conversation_settings.llm_model = model - else: - conversation_settings.llm_model = get_default_litellm_model() - - return conversation_settings diff --git a/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py b/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py deleted file mode 100644 index 5b5818cb1d..0000000000 --- a/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Condenser max step experiment handler. - -This module contains the handler for the condenser max step experiment that tests -different max_size values for the condenser configuration. -""" - -from uuid import UUID - -import posthog -from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP -from server.constants import IS_FEATURE_ENV -from storage.experiment_assignment_store import ExperimentAssignmentStore - -from openhands.core.logger import openhands_logger as logger -from openhands.sdk import Agent -from openhands.sdk.context.condenser import ( - LLMSummarizingCondenser, -) -from openhands.server.session.conversation_init_data import ConversationInitData - - -def _get_condenser_max_step_variant(user_id, conversation_id): - """ - Get the condenser max step variant for the experiment. - - Args: - user_id: The user ID - conversation_id: The conversation ID - - Returns: - str or None: The PostHog variant name or None if experiment is not enabled or error occurs - """ - # No-op if the specific experiment is not enabled - if not EXPERIMENT_CONDENSER_MAX_STEP: - logger.info( - 'experiment_manager_004:ab_testing:skipped', - extra={ - 'convo_id': conversation_id, - 'reason': 'experiment_not_enabled', - 'experiment': EXPERIMENT_CONDENSER_MAX_STEP, - }, - ) - return None - - # Use experiment name as the flag key - try: - enabled_variant = posthog.get_feature_flag( - EXPERIMENT_CONDENSER_MAX_STEP, conversation_id - ) - except Exception as e: - logger.error( - 'experiment_manager:get_feature_flag:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CONDENSER_MAX_STEP, - 'error': str(e), - }, - ) - return None - - # Store the experiment assignment in the database - try: - experiment_store = ExperimentAssignmentStore() - experiment_store.update_experiment_variant( - conversation_id=conversation_id, - experiment_name='condenser_max_step_experiment', - variant=enabled_variant, - ) - except Exception as e: - logger.error( - 'experiment_manager:store_assignment:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CONDENSER_MAX_STEP, - 'variant': enabled_variant, - 'error': str(e), - }, - ) - # Fail the experiment if we cannot track the splits - results would not be explainable - return None - - # Log the experiment event - # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog - posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id - - try: - posthog.capture( - distinct_id=posthog_user_id, - event='condenser_max_step_set', - properties={ - 'conversation_id': conversation_id, - 'variant': enabled_variant, - 'original_user_id': user_id, - 'is_feature_env': IS_FEATURE_ENV, - }, - ) - except Exception as e: - logger.error( - 'experiment_manager:posthog_capture:failed', - extra={ - 'convo_id': conversation_id, - 'experiment': EXPERIMENT_CONDENSER_MAX_STEP, - 'error': str(e), - }, - ) - # Continue execution as this is not critical - - logger.info( - 'posthog_capture', - extra={ - 'event': 'condenser_max_step_set', - 'posthog_user_id': posthog_user_id, - 'is_feature_env': IS_FEATURE_ENV, - 'conversation_id': conversation_id, - 'variant': enabled_variant, - }, - ) - - return enabled_variant - - -def handle_condenser_max_step_experiment( - user_id: str | None, - conversation_id: str, - conversation_settings: ConversationInitData, -) -> ConversationInitData: - """ - Handle the condenser max step experiment for conversation settings. - - We should not modify persistent user settings. Instead, apply the experiment - variant to the conversation's in-memory settings object for this session only. - - Variants: - - control -> condenser_max_size = 120 - - treatment -> condenser_max_size = 80 - - Returns the (potentially) modified conversation_settings. - """ - - enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id) - - if enabled_variant is None: - return conversation_settings - - if enabled_variant == 'control': - condenser_max_size = 120 - elif enabled_variant == 'treatment': - condenser_max_size = 80 - else: - logger.error( - 'condenser_max_step_experiment:unknown_variant', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'reason': 'unknown variant; returning original conversation settings', - }, - ) - return conversation_settings - - try: - # Apply the variant to this conversation only; do not persist to DB. - # Not all OpenHands versions expose `condenser_max_size` on settings. - if hasattr(conversation_settings, 'condenser_max_size'): - conversation_settings.condenser_max_size = condenser_max_size - logger.info( - 'condenser_max_step_experiment:conversation_settings_applied', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'condenser_max_size': condenser_max_size, - }, - ) - else: - logger.warning( - 'condenser_max_step_experiment:field_missing_on_settings', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'reason': 'condenser_max_size not present on ConversationInitData', - }, - ) - except Exception as e: - logger.error( - 'condenser_max_step_experiment:apply_failed', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'error': str(e), - }, - ) - return conversation_settings - - return conversation_settings - - -def handle_condenser_max_step_experiment__v1( - user_id: str | None, - conversation_id: UUID, - agent: Agent, -) -> Agent: - enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id)) - - if enabled_variant is None: - return agent - - if enabled_variant == 'control': - condenser_max_size = 120 - elif enabled_variant == 'treatment': - condenser_max_size = 80 - else: - logger.error( - 'condenser_max_step_experiment:unknown_variant', - extra={ - 'user_id': user_id, - 'convo_id': conversation_id, - 'variant': enabled_variant, - 'reason': 'unknown variant; returning original conversation settings', - }, - ) - return agent - - condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'}) - condenser = LLMSummarizingCondenser( - llm=condenser_llm, max_size=condenser_max_size, keep_first=4 - ) - - return agent.model_copy(update={'condenser': condenser}) diff --git a/enterprise/experiments/experiment_versions/__init__.py b/enterprise/experiments/experiment_versions/__init__.py deleted file mode 100644 index 76da1fbd3b..0000000000 --- a/enterprise/experiments/experiment_versions/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Experiment versions package. - -This package contains handlers for different experiment versions. -""" - -from experiments.experiment_versions._001_litellm_default_model_experiment import ( - handle_litellm_default_model_experiment, -) -from experiments.experiment_versions._002_system_prompt_experiment import ( - handle_system_prompt_experiment, -) -from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import ( - handle_claude4_vs_gpt5_experiment, -) -from experiments.experiment_versions._004_condenser_max_step_experiment import ( - handle_condenser_max_step_experiment, -) - -__all__ = [ - 'handle_litellm_default_model_experiment', - 'handle_system_prompt_experiment', - 'handle_claude4_vs_gpt5_experiment', - 'handle_condenser_max_step_experiment', -] diff --git a/enterprise/pyproject.toml b/enterprise/pyproject.toml index 25607ba315..ef1840a8e4 100644 --- a/enterprise/pyproject.toml +++ b/enterprise/pyproject.toml @@ -17,7 +17,6 @@ packages = [ { include = "storage" }, { include = "sync" }, { include = "integrations" }, - { include = "experiments" }, ] [tool.poetry.dependencies] diff --git a/enterprise/server/routes/event_webhook.py b/enterprise/server/routes/event_webhook.py index c308358f67..8c48d0a69f 100644 --- a/enterprise/server/routes/event_webhook.py +++ b/enterprise/server/routes/event_webhook.py @@ -129,10 +129,6 @@ async def _process_batch_operations_background( # No action required continue - if subpath == 'exp_config.json': - # No action required - continue - # Log unhandled paths for future implementation logger.warning( 'unknown_path_in_batch_webhook', diff --git a/enterprise/server/saas_nested_conversation_manager.py b/enterprise/server/saas_nested_conversation_manager.py index be5f787b10..5c8f8aa235 100644 --- a/enterprise/server/saas_nested_conversation_manager.py +++ b/enterprise/server/saas_nested_conversation_manager.py @@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager): await self._setup_nested_settings(client, api_url, settings) await self._setup_provider_tokens(client, api_url, settings) await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore - await self._setup_experiment_config(client, api_url, sid, user_id) await self._create_nested_conversation( client, api_url, sid, user_id, settings, initial_user_msg, replay_json ) await self._wait_for_conversation_ready(client, api_url, sid) - async def _setup_experiment_config( - self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str - ): - # Prevent circular import - from openhands.experiments.experiment_manager import ( - ExperimentConfig, - ExperimentManagerImpl, - ) - - config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test( - user_id, sid, self.config - ) - - experiment_config = ExperimentConfig( - config={ - 'system_prompt_filename': config.get_agent_config( - config.default_agent - ).system_prompt_filename - } - ) - - response = await client.post( - f'{api_url}/api/conversations/{sid}/exp-config', - json=experiment_config.model_dump(), - ) - response.raise_for_status() - async def _setup_nested_settings( self, client: httpx.AsyncClient, api_url: str, settings: Settings ) -> None: diff --git a/enterprise/storage/__init__.py b/enterprise/storage/__init__.py index 5a9ef9838c..c427ec9222 100644 --- a/enterprise/storage/__init__.py +++ b/enterprise/storage/__init__.py @@ -4,7 +4,6 @@ from storage.billing_session import BillingSession from storage.billing_session_type import BillingSessionType from storage.conversation_callback import CallbackStatus, ConversationCallback from storage.conversation_work import ConversationWork -from storage.experiment_assignment import ExperimentAssignment from storage.feedback import ConversationFeedback, Feedback from storage.github_app_installation import GithubAppInstallation from storage.gitlab_webhook import GitlabWebhook, WebhookStatus @@ -50,7 +49,6 @@ __all__ = [ 'ConversationFeedback', 'StoredConversationMetadataSaas', 'ConversationWork', - 'ExperimentAssignment', 'Feedback', 'GithubAppInstallation', 'GitlabWebhook', diff --git a/enterprise/storage/experiment_assignment.py b/enterprise/storage/experiment_assignment.py deleted file mode 100644 index f648fa8a03..0000000000 --- a/enterprise/storage/experiment_assignment.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Database model for experiment assignments. - -This model tracks which experiments a conversation is assigned to and what variant -they received from PostHog feature flags. -""" - -import uuid -from datetime import UTC, datetime - -from sqlalchemy import Column, DateTime, String, UniqueConstraint -from storage.base import Base - - -class ExperimentAssignment(Base): # type: ignore - __tablename__ = 'experiment_assignments' - - id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - conversation_id = Column(String, nullable=True, index=True) - experiment_name = Column(String, nullable=False) - variant = Column(String, nullable=False) - - created_at = Column( - DateTime(timezone=True), - default=lambda: datetime.now(UTC), # type: ignore[attr-defined] - nullable=False, - ) - updated_at = Column( - DateTime(timezone=True), - default=lambda: datetime.now(UTC), # type: ignore[attr-defined] - onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined] - nullable=False, - ) - - __table_args__ = ( - UniqueConstraint( - 'conversation_id', - 'experiment_name', - name='uq_experiment_assignments_conversation_experiment', - ), - ) diff --git a/enterprise/storage/experiment_assignment_store.py b/enterprise/storage/experiment_assignment_store.py deleted file mode 100644 index 283315e13f..0000000000 --- a/enterprise/storage/experiment_assignment_store.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -Store for managing experiment assignments. - -This store handles creating and updating experiment assignments for conversations. -""" - -from sqlalchemy.dialects.postgresql import insert -from storage.database import session_maker -from storage.experiment_assignment import ExperimentAssignment - -from openhands.core.logger import openhands_logger as logger - - -class ExperimentAssignmentStore: - """Store for managing experiment assignments.""" - - def update_experiment_variant( - self, - conversation_id: str, - experiment_name: str, - variant: str, - ) -> None: - """ - Update the variant for a specific experiment. - - Args: - conversation_id: The conversation ID - experiment_name: The name of the experiment - variant: The variant assigned - """ - with session_maker() as session: - # Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint - stmt = insert(ExperimentAssignment).values( - conversation_id=conversation_id, - experiment_name=experiment_name, - variant=variant, - ) - stmt = stmt.on_conflict_do_nothing( - constraint='uq_experiment_assignments_conversation_experiment' - ) - - session.execute(stmt) - session.commit() - - logger.info( - 'experiment_assignment_store:upserted_variant', - extra={ - 'conversation_id': conversation_id, - 'experiment_name': experiment_name, - 'variant': variant, - }, - ) diff --git a/enterprise/tests/unit/experiments/__init__.py b/enterprise/tests/unit/experiments/__init__.py deleted file mode 100644 index 50b9db5067..0000000000 --- a/enterprise/tests/unit/experiments/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests for experiments module.""" diff --git a/enterprise/tests/unit/experiments/test_saas_experiment_manager.py b/enterprise/tests/unit/experiments/test_saas_experiment_manager.py deleted file mode 100644 index 25883fd31c..0000000000 --- a/enterprise/tests/unit/experiments/test_saas_experiment_manager.py +++ /dev/null @@ -1,149 +0,0 @@ -# tests/test_condenser_max_step_experiment_v1.py - -from unittest.mock import patch -from uuid import uuid4 - -from experiments.experiment_manager import SaaSExperimentManager - -# SUT imports (update the module path if needed) -from experiments.experiment_versions._004_condenser_max_step_experiment import ( - handle_condenser_max_step_experiment__v1, -) -from pydantic import SecretStr - -from openhands.sdk import LLM, Agent -from openhands.sdk.context.condenser import LLMSummarizingCondenser - - -def make_agent() -> Agent: - """Build a minimal valid Agent.""" - llm = LLM( - usage_id='primary-llm', - model='provider/model', - api_key=SecretStr('sk-test'), - ) - return Agent(llm=llm) - - -def _patch_variant(monkeypatch, return_value): - """Patch the internal variant getter to return a specific value.""" - monkeypatch.setattr( - 'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant', - lambda user_id, conv_id: return_value, - raising=True, - ) - - -def test_control_variant_sets_condenser_with_max_size_120(monkeypatch): - _patch_variant(monkeypatch, 'control') - agent = make_agent() - conv_id = uuid4() - - result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent) - - # Should be a new Agent instance with a condenser installed - assert result is not agent - assert isinstance(result.condenser, LLMSummarizingCondenser) - - # The condenser should have its own LLM (usage_id overridden to "condenser") - assert result.condenser.llm.usage_id == 'condenser' - # The original agent LLM remains unchanged - assert agent.llm.usage_id == 'primary-llm' - - # Control: max_size = 120, keep_first = 4 - assert result.condenser.max_size == 120 - assert result.condenser.keep_first == 4 - - -def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch): - _patch_variant(monkeypatch, 'treatment') - agent = make_agent() - conv_id = uuid4() - - result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent) - - assert result is not agent - assert isinstance(result.condenser, LLMSummarizingCondenser) - assert result.condenser.llm.usage_id == 'condenser' - assert result.condenser.max_size == 80 - assert result.condenser.keep_first == 4 - - -def test_none_variant_returns_original_agent_without_changes(monkeypatch): - _patch_variant(monkeypatch, None) - agent = make_agent() - conv_id = uuid4() - - result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent) - - # No changes—same instance and no condenser attribute added - assert result is agent - assert getattr(result, 'condenser', None) is None - - -def test_unknown_variant_returns_original_agent_without_changes(monkeypatch): - _patch_variant(monkeypatch, 'weird-variant') - agent = make_agent() - conv_id = uuid4() - - result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent) - - assert result is agent - assert getattr(result, 'condenser', None) is None - - -@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False) -def test_run_agent_variant_tests_v1_noop_when_manager_disabled(): - """If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler.""" - agent = make_agent() - conv_id = uuid4() - - result = SaaSExperimentManager.run_agent_variant_tests__v1( - user_id='user-123', - conversation_id=conv_id, - agent=agent, - ) - - # Same object returned (no copy) - assert result is agent - - -@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True) -@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True) -def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch): - """When enabled, it should call the condenser experiment handler and set the long-horizon system prompt.""" - agent = make_agent() - conv_id = uuid4() - - _patch_variant(monkeypatch, 'treatment') - - result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1( - user_id='user-abc', - conversation_id=conv_id, - agent=agent, - ) - - # Should be a different instance than the original (copied after handler runs) - assert result is not agent - assert result.system_prompt_filename == 'system_prompt_long_horizon.j2' - - -@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True) -@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True) -def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt(): - """Planning agents should retain their specialized system prompt and not be overwritten by the experiment.""" - # Arrange - planning_agent = make_agent().model_copy( - update={'system_prompt_filename': 'system_prompt_planning.j2'} - ) - conv_id = uuid4() - - # Act - result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1( - user_id='user-planning', - conversation_id=conv_id, - agent=planning_agent, - ) - - # Assert - assert result.system_prompt_filename == 'system_prompt_planning.j2' diff --git a/openhands/app_server/app_conversation/live_status_app_conversation_service.py b/openhands/app_server/app_conversation/live_status_app_conversation_service.py index 32cb6b607b..b4ca372c16 100644 --- a/openhands/app_server/app_conversation/live_status_app_conversation_service.py +++ b/openhands/app_server/app_conversation/live_status_app_conversation_service.py @@ -77,7 +77,6 @@ from openhands.app_server.utils.llm_metadata import ( get_llm_metadata, should_set_litellm_extra_body, ) -from openhands.experiments.experiment_manager import ExperimentManagerImpl from openhands.integrations.provider import ProviderType from openhands.integrations.service_types import SuggestedTask from openhands.sdk import Agent, AgentContext, LocalWorkspace @@ -1140,7 +1139,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase): working_dir: str, plugins: list[PluginSpec] | None = None, ) -> StartConversationRequest: - """Finalize the conversation request with experiment variants and skills. + """Finalize the conversation request with skills and metadata. Args: agent: The configured agent @@ -1161,13 +1160,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase): # Generate conversation ID if not provided conversation_id = conversation_id or uuid4() - # Apply experiment variants - agent = ExperimentManagerImpl.run_agent_variant_tests__v1( - user.id, conversation_id, agent - ) - # Update agent's LLM with litellm_extra_body metadata for tracing - # This is done after experiment variants to ensure the final LLM config is used agent = self._update_agent_with_llm_metadata(agent, conversation_id, user.id) # Load and merge skills if remote workspace is available @@ -1230,7 +1223,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase): 1. Setting up git provider secrets 2. Configuring LLM and MCP settings 3. Creating an agent with appropriate context - 4. Finalizing the request with skills and experiment variants + 4. Finalizing the request with skills and metadata 5. Passing plugins to the agent server for remote plugin loading """ user = await self.user_context.get_user_info() diff --git a/openhands/experiments/experiment_manager.py b/openhands/experiments/experiment_manager.py deleted file mode 100644 index 013aa16bef..0000000000 --- a/openhands/experiments/experiment_manager.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -from uuid import UUID - -from pydantic import BaseModel - -from openhands.core.config.openhands_config import OpenHandsConfig -from openhands.core.logger import openhands_logger as logger -from openhands.sdk import Agent -from openhands.server.session.conversation_init_data import ConversationInitData -from openhands.server.shared import file_store -from openhands.storage.locations import get_experiment_config_filename -from openhands.utils.import_utils import get_impl - - -class ExperimentConfig(BaseModel): - config: dict[str, str] | None = None - - -def load_experiment_config(conversation_id: str) -> ExperimentConfig | None: - try: - file_path = get_experiment_config_filename(conversation_id) - exp_config = file_store.read(file_path) - return ExperimentConfig.model_validate_json(exp_config) - - except FileNotFoundError: - pass - except Exception as e: - logger.warning(f'Failed to load experiment config: {e}') - - return None - - -class ExperimentManager: - @staticmethod - def run_agent_variant_tests__v1( - user_id: str | None, conversation_id: UUID, agent: Agent - ) -> Agent: - return agent - - @staticmethod - def run_conversation_variant_test( - user_id: str | None, - conversation_id: str, - conversation_settings: ConversationInitData, - ) -> ConversationInitData: - return conversation_settings - - @staticmethod - def run_config_variant_test( - user_id: str | None, conversation_id: str, config: OpenHandsConfig - ) -> OpenHandsConfig: - exp_config = load_experiment_config(conversation_id) - if exp_config and exp_config.config: - agent_cfg = config.get_agent_config(config.default_agent) - try: - for attr, value in exp_config.config.items(): - if hasattr(agent_cfg, attr): - logger.info( - f'Set attrib {attr} to {value} for {conversation_id}' - ) - setattr(agent_cfg, attr, value) - except Exception as e: - logger.warning(f'Error processing exp config: {e}') - - return config - - -experiment_manager_cls = os.environ.get( - 'OPENHANDS_EXPERIMENT_MANAGER_CLS', - 'openhands.experiments.experiment_manager.ExperimentManager', -) -ExperimentManagerImpl = get_impl(ExperimentManager, experiment_manager_cls) diff --git a/openhands/server/conversation_manager/docker_nested_conversation_manager.py b/openhands/server/conversation_manager/docker_nested_conversation_manager.py index 425c9cb8a5..aa8feb477c 100644 --- a/openhands/server/conversation_manager/docker_nested_conversation_manager.py +++ b/openhands/server/conversation_manager/docker_nested_conversation_manager.py @@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger from openhands.events.action import MessageAction from openhands.events.nested_event_store import NestedEventStore from openhands.events.stream import EventStream -from openhands.experiments.experiment_manager import ExperimentManagerImpl from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler from openhands.runtime import get_runtime_cls from openhands.runtime.impl.docker.docker_runtime import DockerRuntime @@ -551,12 +550,8 @@ class DockerNestedConversationManager(ConversationManager): # This session is created here only because it is the easiest way to get a runtime, which # is the easiest way to create the needed docker container - config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test( - user_id, sid, self.config - ) - llm_registry, conversation_stats, config = ( - create_registry_and_conversation_stats(config, sid, user_id, settings) + create_registry_and_conversation_stats(self.config, sid, user_id, settings) ) session = Session( diff --git a/openhands/server/routes/manage_conversations.py b/openhands/server/routes/manage_conversations.py index eb9d359885..547ca6e252 100644 --- a/openhands/server/routes/manage_conversations.py +++ b/openhands/server/routes/manage_conversations.py @@ -60,7 +60,6 @@ from openhands.events.observation import ( AgentStateChangedObservation, NullObservation, ) -from openhands.experiments.experiment_manager import ExperimentConfig from openhands.integrations.provider import ( PROVIDER_TOKEN_TYPE, ProviderHandler, @@ -109,7 +108,6 @@ from openhands.storage.data_models.conversation_metadata import ( from openhands.storage.data_models.conversation_status import ConversationStatus from openhands.storage.data_models.secrets import Secrets from openhands.storage.data_models.settings import Settings -from openhands.storage.locations import get_experiment_config_filename from openhands.storage.settings.settings_store import SettingsStore from openhands.utils.async_utils import wait_all from openhands.utils.conversation_summary import get_default_conversation_title @@ -1240,32 +1238,6 @@ async def update_conversation( ) -@app.post('/conversations/{conversation_id}/exp-config') -def add_experiment_config_for_conversation( - exp_config: ExperimentConfig, - conversation_id: str = Depends(validate_conversation_id), -) -> bool: - exp_config_filepath = get_experiment_config_filename(conversation_id) - exists = False - try: - file_store.read(exp_config_filepath) - exists = True - except FileNotFoundError: - pass - - # Don't modify again if it already exists - if exists: - return False - - try: - file_store.write(exp_config_filepath, exp_config.model_dump_json()) - except Exception as e: - logger.info(f'Failed to write experiment config for {conversation_id}: {e}') - return True - - return False - - def _parse_combined_page_id(page_id: str | None) -> tuple[str | None, str | None]: """Parse combined page_id to extract separate V0 and V1 page_ids. diff --git a/openhands/server/services/conversation_service.py b/openhands/server/services/conversation_service.py index d37d26fbd8..bddf160c1f 100644 --- a/openhands/server/services/conversation_service.py +++ b/openhands/server/services/conversation_service.py @@ -13,7 +13,6 @@ from typing import Any from openhands.core.config.mcp_config import MCPConfig from openhands.core.logger import openhands_logger as logger from openhands.events.action.message import MessageAction -from openhands.experiments.experiment_manager import ExperimentManagerImpl from openhands.integrations.provider import ( CUSTOM_SECRETS_TYPE, PROVIDER_TOKEN_TYPE, @@ -142,10 +141,6 @@ async def start_conversation( conversation_init_data = ConversationInitData(**session_init_args) - conversation_init_data = ExperimentManagerImpl.run_conversation_variant_test( - user_id, conversation_id, conversation_init_data - ) - logger.info( f'Starting agent loop for conversation {conversation_id}', extra={'user_id': user_id, 'session_id': conversation_id}, @@ -281,8 +276,4 @@ async def setup_init_conversation_settings( if user_secrets: session_init_args['custom_secrets'] = user_secrets.custom_secrets - conversation_init_data = ConversationInitData(**session_init_args) - # We should recreate the same experiment conditions when restarting a conversation - return ExperimentManagerImpl.run_conversation_variant_test( - user_id, conversation_id, conversation_init_data - ) + return ConversationInitData(**session_init_args) diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index e55618993e..c8cb92bd74 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -108,13 +108,6 @@ class WebSession: EventStreamSubscriber.SERVER, self.on_event, self.sid ) self.config = config - - # Lazy import to avoid circular dependency - from openhands.experiments.experiment_manager import ExperimentManagerImpl - - self.config = ExperimentManagerImpl.run_config_variant_test( - user_id, sid, self.config - ) self.loop = asyncio.get_event_loop() self.user_id = user_id diff --git a/openhands/storage/locations.py b/openhands/storage/locations.py index 58883fccb2..0b1ec72c1d 100644 --- a/openhands/storage/locations.py +++ b/openhands/storage/locations.py @@ -36,7 +36,3 @@ def get_conversation_llm_registry_filename(sid: str, user_id: str | None = None) def get_conversation_stats_filename(sid: str, user_id: str | None = None) -> str: return f'{get_conversation_dir(sid, user_id)}conversation_stats.pkl' - - -def get_experiment_config_filename(sid: str, user_id: str | None = None) -> str: - return f'{get_conversation_dir(sid, user_id)}exp_config.json' diff --git a/scripts/update_openapi.py b/scripts/update_openapi.py index 0336924f11..5f58b35ece 100755 --- a/scripts/update_openapi.py +++ b/scripts/update_openapi.py @@ -129,9 +129,8 @@ def generate_openapi_spec(): """Generate the OpenAPI specification from the FastAPI app.""" spec = app.openapi() - # Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience + # Explicitly exclude certain endpoints that are operational or UI-only convenience excluded_endpoints = [ - '/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint '/server_info', # Operational/system diagnostics '/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience '/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience diff --git a/tests/unit/app_server/test_live_status_app_conversation_service.py b/tests/unit/app_server/test_live_status_app_conversation_service.py index e374d45146..b203c240f9 100644 --- a/tests/unit/app_server/test_live_status_app_conversation_service.py +++ b/tests/unit/app_server/test_live_status_app_conversation_service.py @@ -1032,27 +1032,17 @@ class TestLiveStatusAppConversationService: assert agent_context.system_message_suffix is None @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_with_skills( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_with_skills(self): """Test _finalize_conversation_request with skills loading.""" - # Arrange - mock_agent = Mock(spec=Agent) - # Create mock LLM with required attributes for _update_agent_with_llm_metadata mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update mock_llm.usage_id = 'agent' - mock_updated_agent = Mock(spec=Agent) - mock_updated_agent.llm = mock_llm - mock_updated_agent.condenser = None # No condenser - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + # Arrange + mock_agent = Mock(spec=Agent) + mock_agent.llm = mock_llm + mock_agent.condenser = None # No condenser conversation_id = uuid4() workspace = LocalWorkspace(working_dir='/test') @@ -1061,9 +1051,7 @@ class TestLiveStatusAppConversationService: remote_workspace = Mock(spec=AsyncRemoteWorkspace) # Mock the skills loading method - self.service._load_skills_and_update_agent = AsyncMock( - return_value=mock_updated_agent - ) + self.service._load_skills_and_update_agent = AsyncMock(return_value=mock_agent) # Act result = await self.service._finalize_conversation_request( @@ -1082,44 +1070,24 @@ class TestLiveStatusAppConversationService: # Assert assert isinstance(result, StartConversationRequest) assert result.conversation_id == conversation_id - assert result.agent == mock_updated_agent assert result.workspace == workspace assert result.initial_message == initial_message assert result.secrets == secrets - mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with( - self.mock_user.id, conversation_id, mock_agent - ) - self.service._load_skills_and_update_agent.assert_called_once_with( - self.mock_sandbox, - mock_updated_agent, - remote_workspace, - 'test_repo', - '/test/dir', - ) + self.service._load_skills_and_update_agent.assert_called_once() @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_without_skills( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_without_skills(self): """Test _finalize_conversation_request without remote workspace (no skills).""" - # Arrange - mock_agent = Mock(spec=Agent) - # Create mock LLM with required attributes for _update_agent_with_llm_metadata mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update mock_llm.usage_id = 'agent' - mock_updated_agent = Mock(spec=Agent) - mock_updated_agent.llm = mock_llm - mock_updated_agent.condenser = None # No condenser - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + # Arrange + mock_agent = Mock(spec=Agent) + mock_agent.llm = mock_llm + mock_agent.condenser = None # No condenser workspace = LocalWorkspace(working_dir='/test') secrets = {'test': StaticSecret(value='secret')} @@ -1141,31 +1109,18 @@ class TestLiveStatusAppConversationService: # Assert assert isinstance(result, StartConversationRequest) assert isinstance(result.conversation_id, UUID) - assert result.agent == mock_updated_agent - mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once() @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_skills_loading_fails( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_skills_loading_fails(self): """Test _finalize_conversation_request when skills loading fails.""" - # Arrange - mock_agent = Mock(spec=Agent) - # Create mock LLM with required attributes for _update_agent_with_llm_metadata mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update mock_llm.usage_id = 'agent' - mock_updated_agent = Mock(spec=Agent) - mock_updated_agent.llm = mock_llm - mock_updated_agent.condenser = None # No condenser - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent = Mock(spec=Agent) + mock_agent.llm = mock_llm + mock_agent.condenser = None # No condenser workspace = LocalWorkspace(working_dir='/test') secrets = {'test': StaticSecret(value='secret')} @@ -1195,9 +1150,6 @@ class TestLiveStatusAppConversationService: # Assert assert isinstance(result, StartConversationRequest) - assert ( - result.agent == mock_updated_agent - ) # Should still use the experiment-modified agent mock_logger.warning.assert_called_once() @pytest.mark.asyncio @@ -2266,12 +2218,7 @@ class TestPluginHandling: assert 'key2: value2' in text @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_with_plugins( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_with_plugins(self): """Test _finalize_conversation_request passes plugins list to StartConversationRequest.""" from openhands.app_server.app_conversation.app_conversation_models import ( PluginSpec, @@ -2282,13 +2229,13 @@ class TestPluginHandling: mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' mock_llm.usage_id = 'agent' + mock_agent.llm = mock_llm + mock_agent.condenser = None mock_updated_agent = Mock(spec=Agent) mock_updated_agent.llm = mock_llm mock_updated_agent.condenser = None - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent.model_copy = Mock(return_value=mock_updated_agent) workspace = LocalWorkspace(working_dir='/test') secrets = {'test': StaticSecret(value='secret')} @@ -2330,25 +2277,20 @@ class TestPluginHandling: assert '- api_key: test123' in result.initial_message.content[0].text @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_without_plugins( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_without_plugins(self): """Test _finalize_conversation_request without plugins sets plugins to None.""" # Arrange mock_agent = Mock(spec=Agent) mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' mock_llm.usage_id = 'agent' + mock_agent.llm = mock_llm + mock_agent.condenser = None mock_updated_agent = Mock(spec=Agent) mock_updated_agent.llm = mock_llm mock_updated_agent.condenser = None - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent.model_copy = Mock(return_value=mock_updated_agent) workspace = LocalWorkspace(working_dir='/test') secrets = {} @@ -2373,12 +2315,7 @@ class TestPluginHandling: assert result.plugins is None @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_plugin_without_ref( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_plugin_without_ref(self): """Test _finalize_conversation_request with plugin that has no ref.""" from openhands.app_server.app_conversation.app_conversation_models import ( PluginSpec, @@ -2389,13 +2326,13 @@ class TestPluginHandling: mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' mock_llm.usage_id = 'agent' + mock_agent.llm = mock_llm + mock_agent.condenser = None mock_updated_agent = Mock(spec=Agent) mock_updated_agent.llm = mock_llm mock_updated_agent.condenser = None - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent.model_copy = Mock(return_value=mock_updated_agent) workspace = LocalWorkspace(working_dir='/test') secrets = {} @@ -2428,12 +2365,7 @@ class TestPluginHandling: assert result.initial_message is None @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_plugin_with_repo_path( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_plugin_with_repo_path(self): """Test _finalize_conversation_request passes repo_path to PluginSource.""" from openhands.app_server.app_conversation.app_conversation_models import ( PluginSpec, @@ -2444,13 +2376,13 @@ class TestPluginHandling: mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' mock_llm.usage_id = 'agent' + mock_agent.llm = mock_llm + mock_agent.condenser = None mock_updated_agent = Mock(spec=Agent) mock_updated_agent.llm = mock_llm mock_updated_agent.condenser = None - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent.model_copy = Mock(return_value=mock_updated_agent) workspace = LocalWorkspace(working_dir='/test') secrets = {} @@ -2488,12 +2420,7 @@ class TestPluginHandling: assert result.plugins[0].repo_path == 'plugins/city-weather' @pytest.mark.asyncio - @patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) - async def test_finalize_conversation_request_multiple_plugins( - self, mock_experiment_manager - ): + async def test_finalize_conversation_request_multiple_plugins(self): """Test _finalize_conversation_request with multiple plugins.""" from openhands.app_server.app_conversation.app_conversation_models import ( PluginSpec, @@ -2504,13 +2431,13 @@ class TestPluginHandling: mock_llm = Mock(spec=LLM) mock_llm.model = 'gpt-4' mock_llm.usage_id = 'agent' + mock_agent.llm = mock_llm + mock_agent.condenser = None mock_updated_agent = Mock(spec=Agent) mock_updated_agent.llm = mock_llm mock_updated_agent.condenser = None - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_updated_agent - ) + mock_agent.model_copy = Mock(return_value=mock_updated_agent) workspace = LocalWorkspace(working_dir='/test') secrets = {} diff --git a/tests/unit/experiments/__init__.py b/tests/unit/experiments/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/unit/experiments/test_experiment_manager.py b/tests/unit/experiments/test_experiment_manager.py deleted file mode 100644 index 6a9d5e4a32..0000000000 --- a/tests/unit/experiments/test_experiment_manager.py +++ /dev/null @@ -1,264 +0,0 @@ -"""Unit tests for ExperimentManager class, focusing on the v1 agent method.""" - -from types import SimpleNamespace -from unittest.mock import Mock, patch -from uuid import UUID, uuid4 - -import pytest - -from openhands.app_server.app_conversation.live_status_app_conversation_service import ( - LiveStatusAppConversationService, -) -from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus -from openhands.experiments.experiment_manager import ExperimentManager -from openhands.sdk import Agent -from openhands.sdk.llm import LLM - - -class TestExperimentManager: - """Test cases for ExperimentManager class.""" - - def setup_method(self): - """Set up test fixtures.""" - self.user_id = 'test_user_123' - self.conversation_id = uuid4() - - # Create a mock LLM - self.mock_llm = Mock(spec=LLM) - self.mock_llm.model = 'gpt-4' - self.mock_llm.usage_id = 'agent' - - # Create a mock Agent - self.mock_agent = Mock(spec=Agent) - self.mock_agent.llm = self.mock_llm - self.mock_agent.system_prompt_filename = 'default_system_prompt.j2' - self.mock_agent.model_copy = Mock(return_value=self.mock_agent) - - def test_run_agent_variant_tests__v1_returns_agent_unchanged(self): - """Test that the base ExperimentManager returns the agent unchanged.""" - result = ExperimentManager.run_agent_variant_tests__v1( - self.user_id, self.conversation_id, self.mock_agent - ) - - assert result is self.mock_agent - assert result == self.mock_agent - - def test_run_agent_variant_tests__v1_with_none_user_id(self): - """Test that the method works with None user_id.""" - # Act - result = ExperimentManager.run_agent_variant_tests__v1( - None, self.conversation_id, self.mock_agent - ) - - # Assert - assert result is self.mock_agent - - def test_run_agent_variant_tests__v1_with_different_conversation_ids(self): - """Test that the method works with different conversation IDs.""" - conversation_id_1 = uuid4() - conversation_id_2 = uuid4() - - # Act - result_1 = ExperimentManager.run_agent_variant_tests__v1( - self.user_id, conversation_id_1, self.mock_agent - ) - result_2 = ExperimentManager.run_agent_variant_tests__v1( - self.user_id, conversation_id_2, self.mock_agent - ) - - # Assert - assert result_1 is self.mock_agent - assert result_2 is self.mock_agent - - -class TestExperimentManagerIntegration: - """Integration tests for ExperimentManager with start_app_conversation.""" - - def setup_method(self): - """Set up test fixtures.""" - self.user_id = 'test_user_123' - self.conversation_id = uuid4() - - # Create a mock LLM - self.mock_llm = Mock(spec=LLM) - self.mock_llm.model = 'gpt-4' - self.mock_llm.usage_id = 'agent' - - # Create a mock Agent - self.mock_agent = Mock(spec=Agent) - self.mock_agent.llm = self.mock_llm - self.mock_agent.system_prompt_filename = 'default_system_prompt.j2' - self.mock_agent.model_copy = Mock(return_value=self.mock_agent) - - @patch('openhands.experiments.experiment_manager.ExperimentManagerImpl') - def test_start_app_conversation_calls_experiment_manager_v1( - self, mock_experiment_manager_impl - ): - """Test that start_app_conversation calls the experiment manager v1 method with correct parameters.""" - # Arrange - mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = ( - self.mock_agent - ) - - # Create a mock service instance - mock_service = Mock(spec=LiveStatusAppConversationService) - - # Mock the _build_start_conversation_request_for_user method to simulate the call - with patch.object(mock_service, '_build_start_conversation_request_for_user'): - # Simulate the part of the code that calls the experiment manager - from uuid import uuid4 - - conversation_id = uuid4() - - # This simulates the call that happens in the actual service - result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1( - self.user_id, conversation_id, self.mock_agent - ) - - # Assert - mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with( - self.user_id, conversation_id, self.mock_agent - ) - assert result_agent == self.mock_agent - - @pytest.mark.asyncio - async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through( - self, - ): - """ - Test that ExperimentManagerImpl.run_agent_variant_tests__v1 is called with correct parameters - and returns the same agent instance (no copy/mutation) when building a StartConversationRequest. - """ - # --- Arrange: fixed UUID to assert call parameters deterministically - fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001') - - # Create a stable Agent (and LLM) we can identity-check later - mock_llm = Mock(spec=LLM) - mock_llm.model = 'gpt-4' - mock_llm.usage_id = 'agent' - - mock_agent = Mock(spec=Agent) - mock_agent.llm = mock_llm - mock_agent.condenser = None # No condenser for this test - mock_agent.system_prompt_filename = 'default_system_prompt.j2' - mock_agent.model_copy = Mock(return_value=mock_agent) - - # Minimal, real-ish user context used by the service - class DummyUserContext: - async def get_user_info(self): - # confirmation_mode=False -> NeverConfirm() - return SimpleNamespace( - id='test_user_123', - llm_model='gpt-4', - llm_base_url=None, - llm_api_key=None, - confirmation_mode=False, - condenser_max_size=None, - security_analyzer=None, - ) - - async def get_secrets(self): - return {} - - async def get_latest_token(self, provider): - return None - - async def get_user_id(self): - return 'test_user_123' - - user_context = DummyUserContext() - - # The service requires a lot of deps, but for this test we won't exercise them. - app_conversation_info_service = Mock() - app_conversation_start_task_service = Mock() - event_callback_service = Mock() - sandbox_service = Mock() - sandbox_spec_service = Mock() - jwt_service = Mock() - httpx_client = Mock() - - event_service = Mock() - - service = LiveStatusAppConversationService( - init_git_in_empty_workspace=False, - user_context=user_context, - app_conversation_info_service=app_conversation_info_service, - app_conversation_start_task_service=app_conversation_start_task_service, - event_callback_service=event_callback_service, - event_service=event_service, - sandbox_service=sandbox_service, - sandbox_spec_service=sandbox_spec_service, - jwt_service=jwt_service, - sandbox_startup_timeout=30, - sandbox_startup_poll_frequency=1, - httpx_client=httpx_client, - web_url=None, - openhands_provider_base_url=None, - access_token_hard_timeout=None, - ) - - sandbox = SandboxInfo( - id='mock-sandbox-id', - created_by_user_id='mock-user-id', - sandbox_spec_id='mock-sandbox-spec-id', - status=SandboxStatus.RUNNING, - session_api_key='mock-session-api-key', - ) - - # Patch the pieces invoked by the service - with ( - patch.object( - service, - '_setup_secrets_for_git_providers', - return_value={}, - ), - patch.object( - service, - '_configure_llm_and_mcp', - return_value=(mock_llm, {}), - ), - patch.object( - service, - '_create_agent_with_context', - return_value=mock_agent, - ), - patch.object( - service, - '_load_skills_and_update_agent', - return_value=mock_agent, - ), - patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4', - return_value=fixed_conversation_id, - ), - patch( - 'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl' - ) as mock_experiment_manager, - ): - # Configure the experiment manager mock to return the same agent - mock_experiment_manager.run_agent_variant_tests__v1.return_value = ( - mock_agent - ) - - # --- Act: build the start request - start_req = await service._build_start_conversation_request_for_user( - sandbox=sandbox, - initial_message=None, - system_message_suffix=None, # No additional system message suffix - git_provider=None, # Keep secrets path simple - working_dir='/tmp/project', # Arbitrary path - ) - - # --- Assert: verify experiment manager was called with correct parameters - mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with( - 'test_user_123', # user_id - fixed_conversation_id, # conversation_id - mock_agent, # agent (after model_copy with agent_context) - ) - - # The agent in the StartConversationRequest is the *same* object returned by experiment manager - assert start_req.agent is mock_agent - - # No tweaks to agent fields by the experiment manager (noop) - assert start_req.agent.llm is mock_llm - assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'