Remove ExperimentManager concept from codebase (#13215)

Co-authored-by: openhands <openhands@all-hands.dev>
2026-03-22 05:37:20 +08:00 · 2026-03-04 13:41:18 -07:00
parent 3e15b849a3
commit 15e9435b35
28 changed files with 42 additions and 1618 deletions
--- a/enterprise/dev_config/python/.pre-commit-config.yaml
+++ b/enterprise/dev_config/python/.pre-commit-config.yaml
@@ -53,7 +53,7 @@ repos:
        # Use -p (package) to avoid dual module name conflict when using MYPYPATH
        # MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
        # Note: tests package excluded to avoid conflict with core openhands tests
-        entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
+        entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
        always_run: true
        pass_filenames: false
        files: ^enterprise/
--- a/enterprise/experiments/init.py
+++ b/enterprise/experiments/init.py
--- a/enterprise/experiments/constants.py
+++ b/enterprise/experiments/constants.py
@@ -1,47 +0,0 @@
-import os
-
-import posthog
-
-from openhands.core.logger import openhands_logger as logger
-
-# Initialize PostHog
-posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
-posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
-
-# Log PostHog configuration with masked API key for security
-api_key = posthog.api_key
-if api_key and len(api_key) > 8:
-    masked_key = f'{api_key[:4]}...{api_key[-4:]}'
-else:
-    masked_key = 'not_set_or_too_short'
-logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
-
-# Global toggle for the experiment manager
-ENABLE_EXPERIMENT_MANAGER = (
-    os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
-)
-
-# Get the current experiment type from environment variable
-# If None, no experiment is running
-EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
-    'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
-)
-# System prompt experiment toggle
-EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
-    'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
-)
-
-EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
-
-EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
-
-logger.info(
-    'experiment_manager:run_conversation_variant_test:experiment_config',
-    extra={
-        'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
-        'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
-        'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-        'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
-        'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
-    },
-)
--- a/enterprise/experiments/experiment_manager.py
+++ b/enterprise/experiments/experiment_manager.py
@@ -1,99 +0,0 @@
-from uuid import UUID
-
-from experiments.constants import (
-    ENABLE_EXPERIMENT_MANAGER,
-    EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-)
-from experiments.experiment_versions import (
-    handle_system_prompt_experiment,
-)
-
-from openhands.core.config.openhands_config import OpenHandsConfig
-from openhands.core.logger import openhands_logger as logger
-from openhands.experiments.experiment_manager import ExperimentManager
-from openhands.sdk import Agent
-from openhands.server.session.conversation_init_data import ConversationInitData
-
-
-class SaaSExperimentManager(ExperimentManager):
-    @staticmethod
-    def run_agent_variant_tests__v1(
-        user_id: str | None, conversation_id: UUID, agent: Agent
-    ) -> Agent:
-        if not ENABLE_EXPERIMENT_MANAGER:
-            logger.info(
-                'experiment_manager:run_conversation_variant_test:skipped',
-                extra={'reason': 'experiment_manager_disabled'},
-            )
-            return agent
-
-        if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
-            # Skip experiment for planning agents which require their specialized prompt
-            if agent.system_prompt_filename != 'system_prompt_planning.j2':
-                agent = agent.model_copy(
-                    update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
-                )
-
-        return agent
-
-    @staticmethod
-    def run_conversation_variant_test(
-        user_id, conversation_id, conversation_settings
-    ) -> ConversationInitData:
-        """
-        Run conversation variant test and potentially modify the conversation settings
-        based on the PostHog feature flags.
-
-        Args:
-            user_id: The user ID
-            conversation_id: The conversation ID
-            conversation_settings: The conversation settings that may include convo_id and llm_model
-
-        Returns:
-            The modified conversation settings
-        """
-        logger.debug(
-            'experiment_manager:run_conversation_variant_test:started',
-            extra={'user_id': user_id, 'conversation_id': conversation_id},
-        )
-
-        return conversation_settings
-
-    @staticmethod
-    def run_config_variant_test(
-        user_id: str | None, conversation_id: str, config: OpenHandsConfig
-    ) -> OpenHandsConfig:
-        """
-        Run agent config variant test and potentially modify the OpenHands config
-        based on the current experiment type and PostHog feature flags.
-
-        Args:
-            user_id: The user ID
-            conversation_id: The conversation ID
-            config: The OpenHands configuration
-
-        Returns:
-            The modified OpenHands configuration
-        """
-        logger.info(
-            'experiment_manager:run_config_variant_test:started',
-            extra={'user_id': user_id},
-        )
-
-        # Skip all experiment processing if the experiment manager is disabled
-        if not ENABLE_EXPERIMENT_MANAGER:
-            logger.info(
-                'experiment_manager:run_config_variant_test:skipped',
-                extra={'reason': 'experiment_manager_disabled'},
-            )
-            return config
-
-        # Pass the entire OpenHands config to the system prompt experiment
-        # Let the experiment handler directly modify the config as needed
-        modified_config = handle_system_prompt_experiment(
-            user_id, conversation_id, config
-        )
-
-        # Condenser max step experiment is applied via conversation variant test,
-        # not config variant test. Return modified config from system prompt only.
-        return modified_config
--- a/enterprise/experiments/experiment_versions/_001_litellm_default_model_experiment.py
+++ b/enterprise/experiments/experiment_versions/_001_litellm_default_model_experiment.py
@@ -1,107 +0,0 @@
-"""
-LiteLLM model experiment handler.
-
-This module contains the handler for the LiteLLM model experiment.
-"""
-
-import posthog
-from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
-from server.constants import (
-    IS_FEATURE_ENV,
-    build_litellm_proxy_model_path,
-    get_default_litellm_model,
-)
-
-from openhands.core.logger import openhands_logger as logger
-
-
-def handle_litellm_default_model_experiment(
-    user_id, conversation_id, conversation_settings
-):
-    """
-    Handle the LiteLLM model experiment.
-
-    Args:
-        user_id: The user ID
-        conversation_id: The conversation ID
-        conversation_settings: The conversation settings
-
-    Returns:
-        Modified conversation settings
-    """
-    # No-op if the specific experiment is not enabled
-    if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
-        logger.info(
-            'experiment_manager:ab_testing:skipped',
-            extra={
-                'convo_id': conversation_id,
-                'reason': 'experiment_not_enabled',
-                'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
-            },
-        )
-        return conversation_settings
-
-    # Use experiment name as the flag key
-    try:
-        enabled_variant = posthog.get_feature_flag(
-            EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:get_feature_flag:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
-                'error': str(e),
-            },
-        )
-        return conversation_settings
-
-    # Log the experiment event
-    # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
-    posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
-
-    try:
-        posthog.capture(
-            distinct_id=posthog_user_id,
-            event='model_set',
-            properties={
-                'conversation_id': conversation_id,
-                'variant': enabled_variant,
-                'original_user_id': user_id,
-                'is_feature_env': IS_FEATURE_ENV,
-            },
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:posthog_capture:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
-                'error': str(e),
-            },
-        )
-        # Continue execution as this is not critical
-
-    logger.info(
-        'posthog_capture',
-        extra={
-            'event': 'model_set',
-            'posthog_user_id': posthog_user_id,
-            'is_feature_env': IS_FEATURE_ENV,
-            'conversation_id': conversation_id,
-            'variant': enabled_variant,
-        },
-    )
-
-    # Set the model based on the feature flag variant
-    if enabled_variant == 'claude37':
-        # Use the shared utility to construct the LiteLLM proxy model path
-        model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
-        # Update the conversation settings with the selected model
-        conversation_settings.llm_model = model
-    else:
-        # Update the conversation settings with the default model for the current version
-        conversation_settings.llm_model = get_default_litellm_model()
-
-    return conversation_settings
--- a/enterprise/experiments/experiment_versions/_002_system_prompt_experiment.py
+++ b/enterprise/experiments/experiment_versions/_002_system_prompt_experiment.py
@@ -1,181 +0,0 @@
-"""
-System prompt experiment handler.
-
-This module contains the handler for the system prompt experiment that uses
-the PostHog variant as the system prompt filename.
-"""
-
-import copy
-
-import posthog
-from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
-from server.constants import IS_FEATURE_ENV
-from storage.experiment_assignment_store import ExperimentAssignmentStore
-
-from openhands.core.config.openhands_config import OpenHandsConfig
-from openhands.core.logger import openhands_logger as logger
-
-
-def _get_system_prompt_variant(user_id, conversation_id):
-    """
-    Get the system prompt variant for the experiment.
-
-    Args:
-        user_id: The user ID
-        conversation_id: The conversation ID
-
-    Returns:
-        str or None: The PostHog variant name or None if experiment is not enabled or error occurs
-    """
-    # No-op if the specific experiment is not enabled
-    if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
-        logger.info(
-            'experiment_manager_002:ab_testing:skipped',
-            extra={
-                'convo_id': conversation_id,
-                'reason': 'experiment_not_enabled',
-                'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-            },
-        )
-        return None
-
-    # Use experiment name as the flag key
-    try:
-        enabled_variant = posthog.get_feature_flag(
-            EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:get_feature_flag:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-                'error': str(e),
-            },
-        )
-        return None
-
-    # Store the experiment assignment in the database
-    try:
-        experiment_store = ExperimentAssignmentStore()
-        experiment_store.update_experiment_variant(
-            conversation_id=conversation_id,
-            experiment_name='system_prompt_experiment',
-            variant=enabled_variant,
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:store_assignment:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-                'variant': enabled_variant,
-                'error': str(e),
-            },
-        )
-        # Fail the experiment if we cannot track the splits - results would not be explainable
-        return None
-
-    # Log the experiment event
-    # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
-    posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
-
-    try:
-        posthog.capture(
-            distinct_id=posthog_user_id,
-            event='system_prompt_set',
-            properties={
-                'conversation_id': conversation_id,
-                'variant': enabled_variant,
-                'original_user_id': user_id,
-                'is_feature_env': IS_FEATURE_ENV,
-            },
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:posthog_capture:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
-                'error': str(e),
-            },
-        )
-        # Continue execution as this is not critical
-
-    logger.info(
-        'posthog_capture',
-        extra={
-            'event': 'system_prompt_set',
-            'posthog_user_id': posthog_user_id,
-            'is_feature_env': IS_FEATURE_ENV,
-            'conversation_id': conversation_id,
-            'variant': enabled_variant,
-        },
-    )
-
-    return enabled_variant
-
-
-def handle_system_prompt_experiment(
-    user_id, conversation_id, config: OpenHandsConfig
-) -> OpenHandsConfig:
-    """
-    Handle the system prompt experiment for OpenHands config.
-
-    Args:
-        user_id: The user ID
-        conversation_id: The conversation ID
-        config: The OpenHands configuration
-
-    Returns:
-        Modified OpenHands configuration
-    """
-    enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
-
-    # If variant is None, experiment is not enabled or there was an error
-    if enabled_variant is None:
-        return config
-
-    # Deep copy the config to avoid modifying the original
-    modified_config = copy.deepcopy(config)
-
-    # Set the system prompt filename based on the variant
-    if enabled_variant == 'control':
-        # Use the long-horizon system prompt for the control variant
-        agent_config = modified_config.get_agent_config(modified_config.default_agent)
-        agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
-        agent_config.enable_plan_mode = True
-    elif enabled_variant == 'interactive':
-        modified_config.get_agent_config(
-            modified_config.default_agent
-        ).system_prompt_filename = 'system_prompt_interactive.j2'
-    elif enabled_variant == 'no_tools':
-        modified_config.get_agent_config(
-            modified_config.default_agent
-        ).system_prompt_filename = 'system_prompt.j2'
-    else:
-        logger.error(
-            'system_prompt_experiment:unknown_variant',
-            extra={
-                'user_id': user_id,
-                'convo_id': conversation_id,
-                'variant': enabled_variant,
-                'reason': 'no explicit mapping; returning original config',
-            },
-        )
-        return config
-
-    # Log which prompt is being used
-    logger.info(
-        'system_prompt_experiment:prompt_selected',
-        extra={
-            'user_id': user_id,
-            'convo_id': conversation_id,
-            'system_prompt_filename': modified_config.get_agent_config(
-                modified_config.default_agent
-            ).system_prompt_filename,
-            'variant': enabled_variant,
-        },
-    )
-
-    return modified_config
--- a/enterprise/experiments/experiment_versions/_003_llm_claude4_vs_gpt5_experiment.py
+++ b/enterprise/experiments/experiment_versions/_003_llm_claude4_vs_gpt5_experiment.py
@@ -1,137 +0,0 @@
-"""
-LiteLLM model experiment handler.
-
-This module contains the handler for the LiteLLM model experiment.
-"""
-
-import posthog
-from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
-from server.constants import (
-    IS_FEATURE_ENV,
-    build_litellm_proxy_model_path,
-    get_default_litellm_model,
-)
-from storage.experiment_assignment_store import ExperimentAssignmentStore
-
-from openhands.core.logger import openhands_logger as logger
-from openhands.server.session.conversation_init_data import ConversationInitData
-
-
-def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
-    if not EXPERIMENT_CLAUDE4_VS_GPT5:
-        logger.info(
-            'experiment_manager:ab_testing:skipped',
-            extra={
-                'convo_id': conversation_id,
-                'reason': 'experiment_not_enabled',
-                'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
-            },
-        )
-        return None
-
-    try:
-        enabled_variant = posthog.get_feature_flag(
-            EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:get_feature_flag:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
-                'error': str(e),
-            },
-        )
-        return None
-
-    # Store the experiment assignment in the database
-    try:
-        experiment_store = ExperimentAssignmentStore()
-        experiment_store.update_experiment_variant(
-            conversation_id=conversation_id,
-            experiment_name='claude4_vs_gpt5_experiment',
-            variant=enabled_variant,
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:store_assignment:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
-                'variant': enabled_variant,
-                'error': str(e),
-            },
-        )
-        # Fail the experiment if we cannot track the splits - results would not be explainable
-        return None
-
-    # Log the experiment event
-    # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
-    posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
-
-    try:
-        posthog.capture(
-            distinct_id=posthog_user_id,
-            event='claude4_or_gpt5_set',
-            properties={
-                'conversation_id': conversation_id,
-                'variant': enabled_variant,
-                'original_user_id': user_id,
-                'is_feature_env': IS_FEATURE_ENV,
-            },
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:posthog_capture:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
-                'error': str(e),
-            },
-        )
-        # Continue execution as this is not critical
-
-    logger.info(
-        'posthog_capture',
-        extra={
-            'event': 'claude4_or_gpt5_set',
-            'posthog_user_id': posthog_user_id,
-            'is_feature_env': IS_FEATURE_ENV,
-            'conversation_id': conversation_id,
-            'variant': enabled_variant,
-        },
-    )
-
-    return enabled_variant
-
-
-def handle_claude4_vs_gpt5_experiment(
-    user_id: str | None,
-    conversation_id: str,
-    conversation_settings: ConversationInitData,
-) -> ConversationInitData:
-    """
-    Handle the LiteLLM model experiment.
-
-    Args:
-        user_id: The user ID
-        conversation_id: The conversation ID
-        conversation_settings: The conversation settings
-
-    Returns:
-        Modified conversation settings
-    """
-
-    enabled_variant = _get_model_variant(user_id, conversation_id)
-
-    if not enabled_variant:
-        return conversation_settings
-
-    # Set the model based on the feature flag variant
-    if enabled_variant == 'gpt5':
-        model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
-        conversation_settings.llm_model = model
-    else:
-        conversation_settings.llm_model = get_default_litellm_model()
-
-    return conversation_settings
--- a/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py
+++ b/enterprise/experiments/experiment_versions/_004_condenser_max_step_experiment.py
@@ -1,232 +0,0 @@
-"""
-Condenser max step experiment handler.
-
-This module contains the handler for the condenser max step experiment that tests
-different max_size values for the condenser configuration.
-"""
-
-from uuid import UUID
-
-import posthog
-from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
-from server.constants import IS_FEATURE_ENV
-from storage.experiment_assignment_store import ExperimentAssignmentStore
-
-from openhands.core.logger import openhands_logger as logger
-from openhands.sdk import Agent
-from openhands.sdk.context.condenser import (
-    LLMSummarizingCondenser,
-)
-from openhands.server.session.conversation_init_data import ConversationInitData
-
-
-def _get_condenser_max_step_variant(user_id, conversation_id):
-    """
-    Get the condenser max step variant for the experiment.
-
-    Args:
-        user_id: The user ID
-        conversation_id: The conversation ID
-
-    Returns:
-        str or None: The PostHog variant name or None if experiment is not enabled or error occurs
-    """
-    # No-op if the specific experiment is not enabled
-    if not EXPERIMENT_CONDENSER_MAX_STEP:
-        logger.info(
-            'experiment_manager_004:ab_testing:skipped',
-            extra={
-                'convo_id': conversation_id,
-                'reason': 'experiment_not_enabled',
-                'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
-            },
-        )
-        return None
-
-    # Use experiment name as the flag key
-    try:
-        enabled_variant = posthog.get_feature_flag(
-            EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:get_feature_flag:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
-                'error': str(e),
-            },
-        )
-        return None
-
-    # Store the experiment assignment in the database
-    try:
-        experiment_store = ExperimentAssignmentStore()
-        experiment_store.update_experiment_variant(
-            conversation_id=conversation_id,
-            experiment_name='condenser_max_step_experiment',
-            variant=enabled_variant,
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:store_assignment:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
-                'variant': enabled_variant,
-                'error': str(e),
-            },
-        )
-        # Fail the experiment if we cannot track the splits - results would not be explainable
-        return None
-
-    # Log the experiment event
-    # If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
-    posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
-
-    try:
-        posthog.capture(
-            distinct_id=posthog_user_id,
-            event='condenser_max_step_set',
-            properties={
-                'conversation_id': conversation_id,
-                'variant': enabled_variant,
-                'original_user_id': user_id,
-                'is_feature_env': IS_FEATURE_ENV,
-            },
-        )
-    except Exception as e:
-        logger.error(
-            'experiment_manager:posthog_capture:failed',
-            extra={
-                'convo_id': conversation_id,
-                'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
-                'error': str(e),
-            },
-        )
-        # Continue execution as this is not critical
-
-    logger.info(
-        'posthog_capture',
-        extra={
-            'event': 'condenser_max_step_set',
-            'posthog_user_id': posthog_user_id,
-            'is_feature_env': IS_FEATURE_ENV,
-            'conversation_id': conversation_id,
-            'variant': enabled_variant,
-        },
-    )
-
-    return enabled_variant
-
-
-def handle_condenser_max_step_experiment(
-    user_id: str | None,
-    conversation_id: str,
-    conversation_settings: ConversationInitData,
-) -> ConversationInitData:
-    """
-    Handle the condenser max step experiment for conversation settings.
-
-    We should not modify persistent user settings. Instead, apply the experiment
-    variant to the conversation's in-memory settings object for this session only.
-
-    Variants:
-    - control -> condenser_max_size = 120
-    - treatment -> condenser_max_size = 80
-
-    Returns the (potentially) modified conversation_settings.
-    """
-
-    enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
-
-    if enabled_variant is None:
-        return conversation_settings
-
-    if enabled_variant == 'control':
-        condenser_max_size = 120
-    elif enabled_variant == 'treatment':
-        condenser_max_size = 80
-    else:
-        logger.error(
-            'condenser_max_step_experiment:unknown_variant',
-            extra={
-                'user_id': user_id,
-                'convo_id': conversation_id,
-                'variant': enabled_variant,
-                'reason': 'unknown variant; returning original conversation settings',
-            },
-        )
-        return conversation_settings
-
-    try:
-        # Apply the variant to this conversation only; do not persist to DB.
-        # Not all OpenHands versions expose `condenser_max_size` on settings.
-        if hasattr(conversation_settings, 'condenser_max_size'):
-            conversation_settings.condenser_max_size = condenser_max_size
-            logger.info(
-                'condenser_max_step_experiment:conversation_settings_applied',
-                extra={
-                    'user_id': user_id,
-                    'convo_id': conversation_id,
-                    'variant': enabled_variant,
-                    'condenser_max_size': condenser_max_size,
-                },
-            )
-        else:
-            logger.warning(
-                'condenser_max_step_experiment:field_missing_on_settings',
-                extra={
-                    'user_id': user_id,
-                    'convo_id': conversation_id,
-                    'variant': enabled_variant,
-                    'reason': 'condenser_max_size not present on ConversationInitData',
-                },
-            )
-    except Exception as e:
-        logger.error(
-            'condenser_max_step_experiment:apply_failed',
-            extra={
-                'user_id': user_id,
-                'convo_id': conversation_id,
-                'variant': enabled_variant,
-                'error': str(e),
-            },
-        )
-        return conversation_settings
-
-    return conversation_settings
-
-
-def handle_condenser_max_step_experiment__v1(
-    user_id: str | None,
-    conversation_id: UUID,
-    agent: Agent,
-) -> Agent:
-    enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
-
-    if enabled_variant is None:
-        return agent
-
-    if enabled_variant == 'control':
-        condenser_max_size = 120
-    elif enabled_variant == 'treatment':
-        condenser_max_size = 80
-    else:
-        logger.error(
-            'condenser_max_step_experiment:unknown_variant',
-            extra={
-                'user_id': user_id,
-                'convo_id': conversation_id,
-                'variant': enabled_variant,
-                'reason': 'unknown variant; returning original conversation settings',
-            },
-        )
-        return agent
-
-    condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
-    condenser = LLMSummarizingCondenser(
-        llm=condenser_llm, max_size=condenser_max_size, keep_first=4
-    )
-
-    return agent.model_copy(update={'condenser': condenser})
--- a/enterprise/experiments/experiment_versions/init.py
+++ b/enterprise/experiments/experiment_versions/init.py
@@ -1,25 +0,0 @@
-"""
-Experiment versions package.
-
-This package contains handlers for different experiment versions.
-"""
-
-from experiments.experiment_versions._001_litellm_default_model_experiment import (
-    handle_litellm_default_model_experiment,
-)
-from experiments.experiment_versions._002_system_prompt_experiment import (
-    handle_system_prompt_experiment,
-)
-from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
-    handle_claude4_vs_gpt5_experiment,
-)
-from experiments.experiment_versions._004_condenser_max_step_experiment import (
-    handle_condenser_max_step_experiment,
-)
-
-__all__ = [
-    'handle_litellm_default_model_experiment',
-    'handle_system_prompt_experiment',
-    'handle_claude4_vs_gpt5_experiment',
-    'handle_condenser_max_step_experiment',
-]
--- a/enterprise/pyproject.toml
+++ b/enterprise/pyproject.toml
@@ -17,7 +17,6 @@ packages = [
  { include = "storage" },
  { include = "sync" },
  { include = "integrations" },
-  { include = "experiments" },
 ]

 [tool.poetry.dependencies]
--- a/enterprise/server/routes/event_webhook.py
+++ b/enterprise/server/routes/event_webhook.py
@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
                # No action required
                continue

-            if subpath == 'exp_config.json':
-                # No action required
-                continue
-
            # Log unhandled paths for future implementation
            logger.warning(
                'unknown_path_in_batch_webhook',
--- a/enterprise/server/saas_nested_conversation_manager.py
+++ b/enterprise/server/saas_nested_conversation_manager.py
@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
            await self._setup_nested_settings(client, api_url, settings)
            await self._setup_provider_tokens(client, api_url, settings)
            await self._setup_custom_secrets(client, api_url, settings.custom_secrets)  # type: ignore
-            await self._setup_experiment_config(client, api_url, sid, user_id)
            await self._create_nested_conversation(
                client, api_url, sid, user_id, settings, initial_user_msg, replay_json
            )
            await self._wait_for_conversation_ready(client, api_url, sid)

-    async def _setup_experiment_config(
-        self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
-    ):
-        # Prevent circular import
-        from openhands.experiments.experiment_manager import (
-            ExperimentConfig,
-            ExperimentManagerImpl,
-        )
-
-        config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
-            user_id, sid, self.config
-        )
-
-        experiment_config = ExperimentConfig(
-            config={
-                'system_prompt_filename': config.get_agent_config(
-                    config.default_agent
-                ).system_prompt_filename
-            }
-        )
-
-        response = await client.post(
-            f'{api_url}/api/conversations/{sid}/exp-config',
-            json=experiment_config.model_dump(),
-        )
-        response.raise_for_status()
-
    async def _setup_nested_settings(
        self, client: httpx.AsyncClient, api_url: str, settings: Settings
    ) -> None:
--- a/enterprise/storage/init.py
+++ b/enterprise/storage/init.py
@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
 from storage.billing_session_type import BillingSessionType
 from storage.conversation_callback import CallbackStatus, ConversationCallback
 from storage.conversation_work import ConversationWork
-from storage.experiment_assignment import ExperimentAssignment
 from storage.feedback import ConversationFeedback, Feedback
 from storage.github_app_installation import GithubAppInstallation
 from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
@@ -50,7 +49,6 @@ __all__ = [
    'ConversationFeedback',
    'StoredConversationMetadataSaas',
    'ConversationWork',
-    'ExperimentAssignment',
    'Feedback',
    'GithubAppInstallation',
    'GitlabWebhook',
--- a/enterprise/storage/experiment_assignment.py
+++ b/enterprise/storage/experiment_assignment.py
@@ -1,41 +0,0 @@
-"""
-Database model for experiment assignments.
-
-This model tracks which experiments a conversation is assigned to and what variant
-they received from PostHog feature flags.
-"""
-
-import uuid
-from datetime import UTC, datetime
-
-from sqlalchemy import Column, DateTime, String, UniqueConstraint
-from storage.base import Base
-
-
-class ExperimentAssignment(Base):  # type: ignore
-    __tablename__ = 'experiment_assignments'
-
-    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
-    conversation_id = Column(String, nullable=True, index=True)
-    experiment_name = Column(String, nullable=False)
-    variant = Column(String, nullable=False)
-
-    created_at = Column(
-        DateTime(timezone=True),
-        default=lambda: datetime.now(UTC),  # type: ignore[attr-defined]
-        nullable=False,
-    )
-    updated_at = Column(
-        DateTime(timezone=True),
-        default=lambda: datetime.now(UTC),  # type: ignore[attr-defined]
-        onupdate=lambda: datetime.now(UTC),  # type: ignore[attr-defined]
-        nullable=False,
-    )
-
-    __table_args__ = (
-        UniqueConstraint(
-            'conversation_id',
-            'experiment_name',
-            name='uq_experiment_assignments_conversation_experiment',
-        ),
-    )
--- a/enterprise/storage/experiment_assignment_store.py
+++ b/enterprise/storage/experiment_assignment_store.py
@@ -1,52 +0,0 @@
-"""
-Store for managing experiment assignments.
-
-This store handles creating and updating experiment assignments for conversations.
-"""
-
-from sqlalchemy.dialects.postgresql import insert
-from storage.database import session_maker
-from storage.experiment_assignment import ExperimentAssignment
-
-from openhands.core.logger import openhands_logger as logger
-
-
-class ExperimentAssignmentStore:
-    """Store for managing experiment assignments."""
-
-    def update_experiment_variant(
-        self,
-        conversation_id: str,
-        experiment_name: str,
-        variant: str,
-    ) -> None:
-        """
-        Update the variant for a specific experiment.
-
-        Args:
-            conversation_id: The conversation ID
-            experiment_name: The name of the experiment
-            variant: The variant assigned
-        """
-        with session_maker() as session:
-            # Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
-            stmt = insert(ExperimentAssignment).values(
-                conversation_id=conversation_id,
-                experiment_name=experiment_name,
-                variant=variant,
-            )
-            stmt = stmt.on_conflict_do_nothing(
-                constraint='uq_experiment_assignments_conversation_experiment'
-            )
-
-            session.execute(stmt)
-            session.commit()
-
-            logger.info(
-                'experiment_assignment_store:upserted_variant',
-                extra={
-                    'conversation_id': conversation_id,
-                    'experiment_name': experiment_name,
-                    'variant': variant,
-                },
-            )
--- a/enterprise/tests/unit/experiments/init.py
+++ b/enterprise/tests/unit/experiments/init.py
@@ -1 +0,0 @@
-"""Unit tests for experiments module."""
--- a/enterprise/tests/unit/experiments/test_saas_experiment_manager.py
+++ b/enterprise/tests/unit/experiments/test_saas_experiment_manager.py
@@ -1,149 +0,0 @@
-# tests/test_condenser_max_step_experiment_v1.py
-
-from unittest.mock import patch
-from uuid import uuid4
-
-from experiments.experiment_manager import SaaSExperimentManager
-
-# SUT imports (update the module path if needed)
-from experiments.experiment_versions._004_condenser_max_step_experiment import (
-    handle_condenser_max_step_experiment__v1,
-)
-from pydantic import SecretStr
-
-from openhands.sdk import LLM, Agent
-from openhands.sdk.context.condenser import LLMSummarizingCondenser
-
-
-def make_agent() -> Agent:
-    """Build a minimal valid Agent."""
-    llm = LLM(
-        usage_id='primary-llm',
-        model='provider/model',
-        api_key=SecretStr('sk-test'),
-    )
-    return Agent(llm=llm)
-
-
-def _patch_variant(monkeypatch, return_value):
-    """Patch the internal variant getter to return a specific value."""
-    monkeypatch.setattr(
-        'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
-        lambda user_id, conv_id: return_value,
-        raising=True,
-    )
-
-
-def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
-    _patch_variant(monkeypatch, 'control')
-    agent = make_agent()
-    conv_id = uuid4()
-
-    result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
-
-    # Should be a new Agent instance with a condenser installed
-    assert result is not agent
-    assert isinstance(result.condenser, LLMSummarizingCondenser)
-
-    # The condenser should have its own LLM (usage_id overridden to "condenser")
-    assert result.condenser.llm.usage_id == 'condenser'
-    # The original agent LLM remains unchanged
-    assert agent.llm.usage_id == 'primary-llm'
-
-    # Control: max_size = 120, keep_first = 4
-    assert result.condenser.max_size == 120
-    assert result.condenser.keep_first == 4
-
-
-def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
-    _patch_variant(monkeypatch, 'treatment')
-    agent = make_agent()
-    conv_id = uuid4()
-
-    result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
-
-    assert result is not agent
-    assert isinstance(result.condenser, LLMSummarizingCondenser)
-    assert result.condenser.llm.usage_id == 'condenser'
-    assert result.condenser.max_size == 80
-    assert result.condenser.keep_first == 4
-
-
-def test_none_variant_returns_original_agent_without_changes(monkeypatch):
-    _patch_variant(monkeypatch, None)
-    agent = make_agent()
-    conv_id = uuid4()
-
-    result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
-
-    # No changes—same instance and no condenser attribute added
-    assert result is agent
-    assert getattr(result, 'condenser', None) is None
-
-
-def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
-    _patch_variant(monkeypatch, 'weird-variant')
-    agent = make_agent()
-    conv_id = uuid4()
-
-    result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
-
-    assert result is agent
-    assert getattr(result, 'condenser', None) is None
-
-
-@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
-def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
-    """If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
-    agent = make_agent()
-    conv_id = uuid4()
-
-    result = SaaSExperimentManager.run_agent_variant_tests__v1(
-        user_id='user-123',
-        conversation_id=conv_id,
-        agent=agent,
-    )
-
-    # Same object returned (no copy)
-    assert result is agent
-
-
-@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
-@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
-def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
-    """When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
-    agent = make_agent()
-    conv_id = uuid4()
-
-    _patch_variant(monkeypatch, 'treatment')
-
-    result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
-        user_id='user-abc',
-        conversation_id=conv_id,
-        agent=agent,
-    )
-
-    # Should be a different instance than the original (copied after handler runs)
-    assert result is not agent
-    assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
-
-
-@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
-@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
-def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
-    """Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
-    # Arrange
-    planning_agent = make_agent().model_copy(
-        update={'system_prompt_filename': 'system_prompt_planning.j2'}
-    )
-    conv_id = uuid4()
-
-    # Act
-    result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
-        user_id='user-planning',
-        conversation_id=conv_id,
-        agent=planning_agent,
-    )
-
-    # Assert
-    assert result.system_prompt_filename == 'system_prompt_planning.j2'
--- a/openhands/app_server/app_conversation/live_status_app_conversation_service.py
+++ b/openhands/app_server/app_conversation/live_status_app_conversation_service.py
@@ -77,7 +77,6 @@ from openhands.app_server.utils.llm_metadata import (
    get_llm_metadata,
    should_set_litellm_extra_body,
 )
-from openhands.experiments.experiment_manager import ExperimentManagerImpl
 from openhands.integrations.provider import ProviderType
 from openhands.integrations.service_types import SuggestedTask
 from openhands.sdk import Agent, AgentContext, LocalWorkspace
@@ -1140,7 +1139,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
        working_dir: str,
        plugins: list[PluginSpec] | None = None,
    ) -> StartConversationRequest:
-        """Finalize the conversation request with experiment variants and skills.
+        """Finalize the conversation request with skills and metadata.

        Args:
            agent: The configured agent
@@ -1161,13 +1160,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
        # Generate conversation ID if not provided
        conversation_id = conversation_id or uuid4()

-        # Apply experiment variants
-        agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
-            user.id, conversation_id, agent
-        )
-
        # Update agent's LLM with litellm_extra_body metadata for tracing
-        # This is done after experiment variants to ensure the final LLM config is used
        agent = self._update_agent_with_llm_metadata(agent, conversation_id, user.id)

        # Load and merge skills if remote workspace is available
@@ -1230,7 +1223,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
        1. Setting up git provider secrets
        2. Configuring LLM and MCP settings
        3. Creating an agent with appropriate context
-        4. Finalizing the request with skills and experiment variants
+        4. Finalizing the request with skills and metadata
        5. Passing plugins to the agent server for remote plugin loading
        """
        user = await self.user_context.get_user_info()
--- a/openhands/experiments/experiment_manager.py
+++ b/openhands/experiments/experiment_manager.py
@@ -1,72 +0,0 @@
-import os
-from uuid import UUID
-
-from pydantic import BaseModel
-
-from openhands.core.config.openhands_config import OpenHandsConfig
-from openhands.core.logger import openhands_logger as logger
-from openhands.sdk import Agent
-from openhands.server.session.conversation_init_data import ConversationInitData
-from openhands.server.shared import file_store
-from openhands.storage.locations import get_experiment_config_filename
-from openhands.utils.import_utils import get_impl
-
-
-class ExperimentConfig(BaseModel):
-    config: dict[str, str] | None = None
-
-
-def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
-    try:
-        file_path = get_experiment_config_filename(conversation_id)
-        exp_config = file_store.read(file_path)
-        return ExperimentConfig.model_validate_json(exp_config)
-
-    except FileNotFoundError:
-        pass
-    except Exception as e:
-        logger.warning(f'Failed to load experiment config: {e}')
-
-    return None
-
-
-class ExperimentManager:
-    @staticmethod
-    def run_agent_variant_tests__v1(
-        user_id: str | None, conversation_id: UUID, agent: Agent
-    ) -> Agent:
-        return agent
-
-    @staticmethod
-    def run_conversation_variant_test(
-        user_id: str | None,
-        conversation_id: str,
-        conversation_settings: ConversationInitData,
-    ) -> ConversationInitData:
-        return conversation_settings
-
-    @staticmethod
-    def run_config_variant_test(
-        user_id: str | None, conversation_id: str, config: OpenHandsConfig
-    ) -> OpenHandsConfig:
-        exp_config = load_experiment_config(conversation_id)
-        if exp_config and exp_config.config:
-            agent_cfg = config.get_agent_config(config.default_agent)
-            try:
-                for attr, value in exp_config.config.items():
-                    if hasattr(agent_cfg, attr):
-                        logger.info(
-                            f'Set attrib {attr} to {value} for {conversation_id}'
-                        )
-                        setattr(agent_cfg, attr, value)
-            except Exception as e:
-                logger.warning(f'Error processing exp config: {e}')
-
-        return config
-
-
-experiment_manager_cls = os.environ.get(
-    'OPENHANDS_EXPERIMENT_MANAGER_CLS',
-    'openhands.experiments.experiment_manager.ExperimentManager',
-)
-ExperimentManagerImpl = get_impl(ExperimentManager, experiment_manager_cls)
--- a/openhands/server/conversation_manager/docker_nested_conversation_manager.py
+++ b/openhands/server/conversation_manager/docker_nested_conversation_manager.py
@@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import MessageAction
 from openhands.events.nested_event_store import NestedEventStore
 from openhands.events.stream import EventStream
-from openhands.experiments.experiment_manager import ExperimentManagerImpl
 from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
@@ -551,12 +550,8 @@ class DockerNestedConversationManager(ConversationManager):
        # This session is created here only because it is the easiest way to get a runtime, which
        # is the easiest way to create the needed docker container

-        config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
-            user_id, sid, self.config
-        )
-
        llm_registry, conversation_stats, config = (
-            create_registry_and_conversation_stats(config, sid, user_id, settings)
+            create_registry_and_conversation_stats(self.config, sid, user_id, settings)
        )

        session = Session(
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@@ -60,7 +60,6 @@ from openhands.events.observation import (
    AgentStateChangedObservation,
    NullObservation,
 )
-from openhands.experiments.experiment_manager import ExperimentConfig
 from openhands.integrations.provider import (
    PROVIDER_TOKEN_TYPE,
    ProviderHandler,
@@ -109,7 +108,6 @@ from openhands.storage.data_models.conversation_metadata import (
 from openhands.storage.data_models.conversation_status import ConversationStatus
 from openhands.storage.data_models.secrets import Secrets
 from openhands.storage.data_models.settings import Settings
-from openhands.storage.locations import get_experiment_config_filename
 from openhands.storage.settings.settings_store import SettingsStore
 from openhands.utils.async_utils import wait_all
 from openhands.utils.conversation_summary import get_default_conversation_title
@@ -1240,32 +1238,6 @@ async def update_conversation(
        )


-@app.post('/conversations/{conversation_id}/exp-config')
-def add_experiment_config_for_conversation(
-    exp_config: ExperimentConfig,
-    conversation_id: str = Depends(validate_conversation_id),
-) -> bool:
-    exp_config_filepath = get_experiment_config_filename(conversation_id)
-    exists = False
-    try:
-        file_store.read(exp_config_filepath)
-        exists = True
-    except FileNotFoundError:
-        pass
-
-    # Don't modify again if it already exists
-    if exists:
-        return False
-
-    try:
-        file_store.write(exp_config_filepath, exp_config.model_dump_json())
-    except Exception as e:
-        logger.info(f'Failed to write experiment config for {conversation_id}: {e}')
-        return True
-
-    return False
-
-
 def _parse_combined_page_id(page_id: str | None) -> tuple[str | None, str | None]:
    """Parse combined page_id to extract separate V0 and V1 page_ids.

--- a/openhands/server/services/conversation_service.py
+++ b/openhands/server/services/conversation_service.py
@@ -13,7 +13,6 @@ from typing import Any
 from openhands.core.config.mcp_config import MCPConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.message import MessageAction
-from openhands.experiments.experiment_manager import ExperimentManagerImpl
 from openhands.integrations.provider import (
    CUSTOM_SECRETS_TYPE,
    PROVIDER_TOKEN_TYPE,
@@ -142,10 +141,6 @@ async def start_conversation(

    conversation_init_data = ConversationInitData(**session_init_args)

-    conversation_init_data = ExperimentManagerImpl.run_conversation_variant_test(
-        user_id, conversation_id, conversation_init_data
-    )
-
    logger.info(
        f'Starting agent loop for conversation {conversation_id}',
        extra={'user_id': user_id, 'session_id': conversation_id},
@@ -281,8 +276,4 @@ async def setup_init_conversation_settings(
    if user_secrets:
        session_init_args['custom_secrets'] = user_secrets.custom_secrets

-    conversation_init_data = ConversationInitData(**session_init_args)
-    # We should recreate the same experiment conditions when restarting a conversation
-    return ExperimentManagerImpl.run_conversation_variant_test(
-        user_id, conversation_id, conversation_init_data
-    )
+    return ConversationInitData(**session_init_args)
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -108,13 +108,6 @@ class WebSession:
            EventStreamSubscriber.SERVER, self.on_event, self.sid
        )
        self.config = config
-
-        # Lazy import to avoid circular dependency
-        from openhands.experiments.experiment_manager import ExperimentManagerImpl
-
-        self.config = ExperimentManagerImpl.run_config_variant_test(
-            user_id, sid, self.config
-        )
        self.loop = asyncio.get_event_loop()
        self.user_id = user_id

--- a/openhands/storage/locations.py
+++ b/openhands/storage/locations.py
@@ -36,7 +36,3 @@ def get_conversation_llm_registry_filename(sid: str, user_id: str | None = None)

 def get_conversation_stats_filename(sid: str, user_id: str | None = None) -> str:
    return f'{get_conversation_dir(sid, user_id)}conversation_stats.pkl'
-
-
-def get_experiment_config_filename(sid: str, user_id: str | None = None) -> str:
-    return f'{get_conversation_dir(sid, user_id)}exp_config.json'
--- a/scripts/update_openapi.py
+++ b/scripts/update_openapi.py
@@ -129,9 +129,8 @@ def generate_openapi_spec():
    """Generate the OpenAPI specification from the FastAPI app."""
    spec = app.openapi()

-    # Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
+    # Explicitly exclude certain endpoints that are operational or UI-only convenience
    excluded_endpoints = [
-        '/api/conversations/{conversation_id}/exp-config',  # Internal experimentation endpoint
        '/server_info',  # Operational/system diagnostics
        '/api/conversations/{conversation_id}/vscode-url',  # UI/runtime convenience
        '/api/conversations/{conversation_id}/web-hosts',  # UI/runtime convenience
--- a/tests/unit/app_server/test_live_status_app_conversation_service.py
+++ b/tests/unit/app_server/test_live_status_app_conversation_service.py
@@ -1032,27 +1032,17 @@ class TestLiveStatusAppConversationService:
            assert agent_context.system_message_suffix is None

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_with_skills(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_with_skills(self):
        """Test _finalize_conversation_request with skills loading."""
-        # Arrange
-        mock_agent = Mock(spec=Agent)
-
        # Create mock LLM with required attributes for _update_agent_with_llm_metadata
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'  # Non-openhands model, so no metadata update
        mock_llm.usage_id = 'agent'

-        mock_updated_agent = Mock(spec=Agent)
-        mock_updated_agent.llm = mock_llm
-        mock_updated_agent.condenser = None  # No condenser
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        # Arrange
+        mock_agent = Mock(spec=Agent)
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None  # No condenser

        conversation_id = uuid4()
        workspace = LocalWorkspace(working_dir='/test')
@@ -1061,9 +1051,7 @@ class TestLiveStatusAppConversationService:
        remote_workspace = Mock(spec=AsyncRemoteWorkspace)

        # Mock the skills loading method
-        self.service._load_skills_and_update_agent = AsyncMock(
-            return_value=mock_updated_agent
-        )
+        self.service._load_skills_and_update_agent = AsyncMock(return_value=mock_agent)

        # Act
        result = await self.service._finalize_conversation_request(
@@ -1082,44 +1070,24 @@ class TestLiveStatusAppConversationService:
        # Assert
        assert isinstance(result, StartConversationRequest)
        assert result.conversation_id == conversation_id
-        assert result.agent == mock_updated_agent
        assert result.workspace == workspace
        assert result.initial_message == initial_message
        assert result.secrets == secrets

-        mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
-            self.mock_user.id, conversation_id, mock_agent
-        )
-        self.service._load_skills_and_update_agent.assert_called_once_with(
-            self.mock_sandbox,
-            mock_updated_agent,
-            remote_workspace,
-            'test_repo',
-            '/test/dir',
-        )
+        self.service._load_skills_and_update_agent.assert_called_once()

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_without_skills(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_without_skills(self):
        """Test _finalize_conversation_request without remote workspace (no skills)."""
-        # Arrange
-        mock_agent = Mock(spec=Agent)
-
        # Create mock LLM with required attributes for _update_agent_with_llm_metadata
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'  # Non-openhands model, so no metadata update
        mock_llm.usage_id = 'agent'

-        mock_updated_agent = Mock(spec=Agent)
-        mock_updated_agent.llm = mock_llm
-        mock_updated_agent.condenser = None  # No condenser
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        # Arrange
+        mock_agent = Mock(spec=Agent)
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None  # No condenser

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {'test': StaticSecret(value='secret')}
@@ -1141,31 +1109,18 @@ class TestLiveStatusAppConversationService:
        # Assert
        assert isinstance(result, StartConversationRequest)
        assert isinstance(result.conversation_id, UUID)
-        assert result.agent == mock_updated_agent
-        mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once()

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_skills_loading_fails(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_skills_loading_fails(self):
        """Test _finalize_conversation_request when skills loading fails."""
-        # Arrange
-        mock_agent = Mock(spec=Agent)
-
        # Create mock LLM with required attributes for _update_agent_with_llm_metadata
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'  # Non-openhands model, so no metadata update
        mock_llm.usage_id = 'agent'

-        mock_updated_agent = Mock(spec=Agent)
-        mock_updated_agent.llm = mock_llm
-        mock_updated_agent.condenser = None  # No condenser
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent = Mock(spec=Agent)
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None  # No condenser

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {'test': StaticSecret(value='secret')}
@@ -1195,9 +1150,6 @@ class TestLiveStatusAppConversationService:

            # Assert
            assert isinstance(result, StartConversationRequest)
-            assert (
-                result.agent == mock_updated_agent
-            )  # Should still use the experiment-modified agent
            mock_logger.warning.assert_called_once()

    @pytest.mark.asyncio
@@ -2266,12 +2218,7 @@ class TestPluginHandling:
        assert 'key2: value2' in text

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_with_plugins(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_with_plugins(self):
        """Test _finalize_conversation_request passes plugins list to StartConversationRequest."""
        from openhands.app_server.app_conversation.app_conversation_models import (
            PluginSpec,
@@ -2282,13 +2229,13 @@ class TestPluginHandling:
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'
        mock_llm.usage_id = 'agent'
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None

        mock_updated_agent = Mock(spec=Agent)
        mock_updated_agent.llm = mock_llm
        mock_updated_agent.condenser = None
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent.model_copy = Mock(return_value=mock_updated_agent)

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {'test': StaticSecret(value='secret')}
@@ -2330,25 +2277,20 @@ class TestPluginHandling:
        assert '- api_key: test123' in result.initial_message.content[0].text

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_without_plugins(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_without_plugins(self):
        """Test _finalize_conversation_request without plugins sets plugins to None."""
        # Arrange
        mock_agent = Mock(spec=Agent)
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'
        mock_llm.usage_id = 'agent'
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None

        mock_updated_agent = Mock(spec=Agent)
        mock_updated_agent.llm = mock_llm
        mock_updated_agent.condenser = None
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent.model_copy = Mock(return_value=mock_updated_agent)

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {}
@@ -2373,12 +2315,7 @@ class TestPluginHandling:
        assert result.plugins is None

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_plugin_without_ref(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_plugin_without_ref(self):
        """Test _finalize_conversation_request with plugin that has no ref."""
        from openhands.app_server.app_conversation.app_conversation_models import (
            PluginSpec,
@@ -2389,13 +2326,13 @@ class TestPluginHandling:
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'
        mock_llm.usage_id = 'agent'
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None

        mock_updated_agent = Mock(spec=Agent)
        mock_updated_agent.llm = mock_llm
        mock_updated_agent.condenser = None
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent.model_copy = Mock(return_value=mock_updated_agent)

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {}
@@ -2428,12 +2365,7 @@ class TestPluginHandling:
        assert result.initial_message is None

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_plugin_with_repo_path(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_plugin_with_repo_path(self):
        """Test _finalize_conversation_request passes repo_path to PluginSource."""
        from openhands.app_server.app_conversation.app_conversation_models import (
            PluginSpec,
@@ -2444,13 +2376,13 @@ class TestPluginHandling:
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'
        mock_llm.usage_id = 'agent'
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None

        mock_updated_agent = Mock(spec=Agent)
        mock_updated_agent.llm = mock_llm
        mock_updated_agent.condenser = None
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent.model_copy = Mock(return_value=mock_updated_agent)

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {}
@@ -2488,12 +2420,7 @@ class TestPluginHandling:
        assert result.plugins[0].repo_path == 'plugins/city-weather'

    @pytest.mark.asyncio
-    @patch(
-        'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-    )
-    async def test_finalize_conversation_request_multiple_plugins(
-        self, mock_experiment_manager
-    ):
+    async def test_finalize_conversation_request_multiple_plugins(self):
        """Test _finalize_conversation_request with multiple plugins."""
        from openhands.app_server.app_conversation.app_conversation_models import (
            PluginSpec,
@@ -2504,13 +2431,13 @@ class TestPluginHandling:
        mock_llm = Mock(spec=LLM)
        mock_llm.model = 'gpt-4'
        mock_llm.usage_id = 'agent'
+        mock_agent.llm = mock_llm
+        mock_agent.condenser = None

        mock_updated_agent = Mock(spec=Agent)
        mock_updated_agent.llm = mock_llm
        mock_updated_agent.condenser = None
-        mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-            mock_updated_agent
-        )
+        mock_agent.model_copy = Mock(return_value=mock_updated_agent)

        workspace = LocalWorkspace(working_dir='/test')
        secrets = {}
--- a/tests/unit/experiments/init.py
+++ b/tests/unit/experiments/init.py
--- a/tests/unit/experiments/test_experiment_manager.py
+++ b/tests/unit/experiments/test_experiment_manager.py
@@ -1,264 +0,0 @@
-"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
-
-from types import SimpleNamespace
-from unittest.mock import Mock, patch
-from uuid import UUID, uuid4
-
-import pytest
-
-from openhands.app_server.app_conversation.live_status_app_conversation_service import (
-    LiveStatusAppConversationService,
-)
-from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
-from openhands.experiments.experiment_manager import ExperimentManager
-from openhands.sdk import Agent
-from openhands.sdk.llm import LLM
-
-
-class TestExperimentManager:
-    """Test cases for ExperimentManager class."""
-
-    def setup_method(self):
-        """Set up test fixtures."""
-        self.user_id = 'test_user_123'
-        self.conversation_id = uuid4()
-
-        # Create a mock LLM
-        self.mock_llm = Mock(spec=LLM)
-        self.mock_llm.model = 'gpt-4'
-        self.mock_llm.usage_id = 'agent'
-
-        # Create a mock Agent
-        self.mock_agent = Mock(spec=Agent)
-        self.mock_agent.llm = self.mock_llm
-        self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
-        self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
-
-    def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
-        """Test that the base ExperimentManager returns the agent unchanged."""
-        result = ExperimentManager.run_agent_variant_tests__v1(
-            self.user_id, self.conversation_id, self.mock_agent
-        )
-
-        assert result is self.mock_agent
-        assert result == self.mock_agent
-
-    def test_run_agent_variant_tests__v1_with_none_user_id(self):
-        """Test that the method works with None user_id."""
-        # Act
-        result = ExperimentManager.run_agent_variant_tests__v1(
-            None, self.conversation_id, self.mock_agent
-        )
-
-        # Assert
-        assert result is self.mock_agent
-
-    def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
-        """Test that the method works with different conversation IDs."""
-        conversation_id_1 = uuid4()
-        conversation_id_2 = uuid4()
-
-        # Act
-        result_1 = ExperimentManager.run_agent_variant_tests__v1(
-            self.user_id, conversation_id_1, self.mock_agent
-        )
-        result_2 = ExperimentManager.run_agent_variant_tests__v1(
-            self.user_id, conversation_id_2, self.mock_agent
-        )
-
-        # Assert
-        assert result_1 is self.mock_agent
-        assert result_2 is self.mock_agent
-
-
-class TestExperimentManagerIntegration:
-    """Integration tests for ExperimentManager with start_app_conversation."""
-
-    def setup_method(self):
-        """Set up test fixtures."""
-        self.user_id = 'test_user_123'
-        self.conversation_id = uuid4()
-
-        # Create a mock LLM
-        self.mock_llm = Mock(spec=LLM)
-        self.mock_llm.model = 'gpt-4'
-        self.mock_llm.usage_id = 'agent'
-
-        # Create a mock Agent
-        self.mock_agent = Mock(spec=Agent)
-        self.mock_agent.llm = self.mock_llm
-        self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
-        self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
-
-    @patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
-    def test_start_app_conversation_calls_experiment_manager_v1(
-        self, mock_experiment_manager_impl
-    ):
-        """Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
-        # Arrange
-        mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
-            self.mock_agent
-        )
-
-        # Create a mock service instance
-        mock_service = Mock(spec=LiveStatusAppConversationService)
-
-        # Mock the _build_start_conversation_request_for_user method to simulate the call
-        with patch.object(mock_service, '_build_start_conversation_request_for_user'):
-            # Simulate the part of the code that calls the experiment manager
-            from uuid import uuid4
-
-            conversation_id = uuid4()
-
-            # This simulates the call that happens in the actual service
-            result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
-                self.user_id, conversation_id, self.mock_agent
-            )
-
-            # Assert
-            mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
-                self.user_id, conversation_id, self.mock_agent
-            )
-            assert result_agent == self.mock_agent
-
-    @pytest.mark.asyncio
-    async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
-        self,
-    ):
-        """
-        Test that ExperimentManagerImpl.run_agent_variant_tests__v1 is called with correct parameters
-        and returns the same agent instance (no copy/mutation) when building a StartConversationRequest.
-        """
-        # --- Arrange: fixed UUID to assert call parameters deterministically
-        fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
-
-        # Create a stable Agent (and LLM) we can identity-check later
-        mock_llm = Mock(spec=LLM)
-        mock_llm.model = 'gpt-4'
-        mock_llm.usage_id = 'agent'
-
-        mock_agent = Mock(spec=Agent)
-        mock_agent.llm = mock_llm
-        mock_agent.condenser = None  # No condenser for this test
-        mock_agent.system_prompt_filename = 'default_system_prompt.j2'
-        mock_agent.model_copy = Mock(return_value=mock_agent)
-
-        # Minimal, real-ish user context used by the service
-        class DummyUserContext:
-            async def get_user_info(self):
-                # confirmation_mode=False -> NeverConfirm()
-                return SimpleNamespace(
-                    id='test_user_123',
-                    llm_model='gpt-4',
-                    llm_base_url=None,
-                    llm_api_key=None,
-                    confirmation_mode=False,
-                    condenser_max_size=None,
-                    security_analyzer=None,
-                )
-
-            async def get_secrets(self):
-                return {}
-
-            async def get_latest_token(self, provider):
-                return None
-
-            async def get_user_id(self):
-                return 'test_user_123'
-
-        user_context = DummyUserContext()
-
-        # The service requires a lot of deps, but for this test we won't exercise them.
-        app_conversation_info_service = Mock()
-        app_conversation_start_task_service = Mock()
-        event_callback_service = Mock()
-        sandbox_service = Mock()
-        sandbox_spec_service = Mock()
-        jwt_service = Mock()
-        httpx_client = Mock()
-
-        event_service = Mock()
-
-        service = LiveStatusAppConversationService(
-            init_git_in_empty_workspace=False,
-            user_context=user_context,
-            app_conversation_info_service=app_conversation_info_service,
-            app_conversation_start_task_service=app_conversation_start_task_service,
-            event_callback_service=event_callback_service,
-            event_service=event_service,
-            sandbox_service=sandbox_service,
-            sandbox_spec_service=sandbox_spec_service,
-            jwt_service=jwt_service,
-            sandbox_startup_timeout=30,
-            sandbox_startup_poll_frequency=1,
-            httpx_client=httpx_client,
-            web_url=None,
-            openhands_provider_base_url=None,
-            access_token_hard_timeout=None,
-        )
-
-        sandbox = SandboxInfo(
-            id='mock-sandbox-id',
-            created_by_user_id='mock-user-id',
-            sandbox_spec_id='mock-sandbox-spec-id',
-            status=SandboxStatus.RUNNING,
-            session_api_key='mock-session-api-key',
-        )
-
-        # Patch the pieces invoked by the service
-        with (
-            patch.object(
-                service,
-                '_setup_secrets_for_git_providers',
-                return_value={},
-            ),
-            patch.object(
-                service,
-                '_configure_llm_and_mcp',
-                return_value=(mock_llm, {}),
-            ),
-            patch.object(
-                service,
-                '_create_agent_with_context',
-                return_value=mock_agent,
-            ),
-            patch.object(
-                service,
-                '_load_skills_and_update_agent',
-                return_value=mock_agent,
-            ),
-            patch(
-                'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
-                return_value=fixed_conversation_id,
-            ),
-            patch(
-                'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
-            ) as mock_experiment_manager,
-        ):
-            # Configure the experiment manager mock to return the same agent
-            mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
-                mock_agent
-            )
-
-            # --- Act: build the start request
-            start_req = await service._build_start_conversation_request_for_user(
-                sandbox=sandbox,
-                initial_message=None,
-                system_message_suffix=None,  # No additional system message suffix
-                git_provider=None,  # Keep secrets path simple
-                working_dir='/tmp/project',  # Arbitrary path
-            )
-
-            # --- Assert: verify experiment manager was called with correct parameters
-            mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
-                'test_user_123',  # user_id
-                fixed_conversation_id,  # conversation_id
-                mock_agent,  # agent (after model_copy with agent_context)
-            )
-
-            # The agent in the StartConversationRequest is the *same* object returned by experiment manager
-            assert start_req.agent is mock_agent
-
-            # No tweaks to agent fields by the experiment manager (noop)
-            assert start_req.agent.llm is mock_llm
-            assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'