mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
Remove ExperimentManager concept from codebase (#13215)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -53,7 +53,7 @@ repos:
|
||||
# Use -p (package) to avoid dual module name conflict when using MYPYPATH
|
||||
# MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
|
||||
# Note: tests package excluded to avoid conflict with core openhands tests
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
files: ^enterprise/
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
import os
|
||||
|
||||
import posthog
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# Initialize PostHog
|
||||
posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
|
||||
posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
|
||||
|
||||
# Log PostHog configuration with masked API key for security
|
||||
api_key = posthog.api_key
|
||||
if api_key and len(api_key) > 8:
|
||||
masked_key = f'{api_key[:4]}...{api_key[-4:]}'
|
||||
else:
|
||||
masked_key = 'not_set_or_too_short'
|
||||
logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
|
||||
|
||||
# Global toggle for the experiment manager
|
||||
ENABLE_EXPERIMENT_MANAGER = (
|
||||
os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
|
||||
)
|
||||
|
||||
# Get the current experiment type from environment variable
|
||||
# If None, no experiment is running
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
|
||||
)
|
||||
# System prompt experiment toggle
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
|
||||
)
|
||||
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
|
||||
|
||||
EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
|
||||
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:experiment_config',
|
||||
extra={
|
||||
'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
|
||||
'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
@@ -1,99 +0,0 @@
|
||||
from uuid import UUID
|
||||
|
||||
from experiments.constants import (
|
||||
ENABLE_EXPERIMENT_MANAGER,
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
)
|
||||
from experiments.experiment_versions import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
class SaaSExperimentManager(ExperimentManager):
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return agent
|
||||
|
||||
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
# Skip experiment for planning agents which require their specialized prompt
|
||||
if agent.system_prompt_filename != 'system_prompt_planning.j2':
|
||||
agent = agent.model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_settings
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Run conversation variant test and potentially modify the conversation settings
|
||||
based on the PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings that may include convo_id and llm_model
|
||||
|
||||
Returns:
|
||||
The modified conversation settings
|
||||
"""
|
||||
logger.debug(
|
||||
'experiment_manager:run_conversation_variant_test:started',
|
||||
extra={'user_id': user_id, 'conversation_id': conversation_id},
|
||||
)
|
||||
|
||||
return conversation_settings
|
||||
|
||||
@staticmethod
|
||||
def run_config_variant_test(
|
||||
user_id: str | None, conversation_id: str, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Run agent config variant test and potentially modify the OpenHands config
|
||||
based on the current experiment type and PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
The modified OpenHands configuration
|
||||
"""
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:started',
|
||||
extra={'user_id': user_id},
|
||||
)
|
||||
|
||||
# Skip all experiment processing if the experiment manager is disabled
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return config
|
||||
|
||||
# Pass the entire OpenHands config to the system prompt experiment
|
||||
# Let the experiment handler directly modify the config as needed
|
||||
modified_config = handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config
|
||||
)
|
||||
|
||||
# Condenser max step experiment is applied via conversation variant test,
|
||||
# not config variant test. Return modified config from system prompt only.
|
||||
return modified_config
|
||||
@@ -1,107 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def handle_litellm_default_model_experiment(
|
||||
user_id, conversation_id, conversation_settings
|
||||
):
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='model_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'model_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'claude37':
|
||||
# Use the shared utility to construct the LiteLLM proxy model path
|
||||
model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
|
||||
# Update the conversation settings with the selected model
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
# Update the conversation settings with the default model for the current version
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,181 +0,0 @@
|
||||
"""
|
||||
System prompt experiment handler.
|
||||
|
||||
This module contains the handler for the system prompt experiment that uses
|
||||
the PostHog variant as the system prompt filename.
|
||||
"""
|
||||
|
||||
import copy
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def _get_system_prompt_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the system prompt variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager_002:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='system_prompt_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='system_prompt_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'system_prompt_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Handle the system prompt experiment for OpenHands config.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
Modified OpenHands configuration
|
||||
"""
|
||||
enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
|
||||
|
||||
# If variant is None, experiment is not enabled or there was an error
|
||||
if enabled_variant is None:
|
||||
return config
|
||||
|
||||
# Deep copy the config to avoid modifying the original
|
||||
modified_config = copy.deepcopy(config)
|
||||
|
||||
# Set the system prompt filename based on the variant
|
||||
if enabled_variant == 'control':
|
||||
# Use the long-horizon system prompt for the control variant
|
||||
agent_config = modified_config.get_agent_config(modified_config.default_agent)
|
||||
agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
|
||||
agent_config.enable_plan_mode = True
|
||||
elif enabled_variant == 'interactive':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt_interactive.j2'
|
||||
elif enabled_variant == 'no_tools':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt.j2'
|
||||
else:
|
||||
logger.error(
|
||||
'system_prompt_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'no explicit mapping; returning original config',
|
||||
},
|
||||
)
|
||||
return config
|
||||
|
||||
# Log which prompt is being used
|
||||
logger.info(
|
||||
'system_prompt_experiment:prompt_selected',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'system_prompt_filename': modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return modified_config
|
||||
@@ -1,137 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
|
||||
if not EXPERIMENT_CLAUDE4_VS_GPT5:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='claude4_vs_gpt5_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='claude4_or_gpt5_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'claude4_or_gpt5_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_claude4_vs_gpt5_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
|
||||
enabled_variant = _get_model_variant(user_id, conversation_id)
|
||||
|
||||
if not enabled_variant:
|
||||
return conversation_settings
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'gpt5':
|
||||
model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,232 +0,0 @@
|
||||
"""
|
||||
Condenser max step experiment handler.
|
||||
|
||||
This module contains the handler for the condenser max step experiment that tests
|
||||
different max_size values for the condenser configuration.
|
||||
"""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.context.condenser import (
|
||||
LLMSummarizingCondenser,
|
||||
)
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_condenser_max_step_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the condenser max step variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_CONDENSER_MAX_STEP:
|
||||
logger.info(
|
||||
'experiment_manager_004:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='condenser_max_step_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='condenser_max_step_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'condenser_max_step_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the condenser max step experiment for conversation settings.
|
||||
|
||||
We should not modify persistent user settings. Instead, apply the experiment
|
||||
variant to the conversation's in-memory settings object for this session only.
|
||||
|
||||
Variants:
|
||||
- control -> condenser_max_size = 120
|
||||
- treatment -> condenser_max_size = 80
|
||||
|
||||
Returns the (potentially) modified conversation_settings.
|
||||
"""
|
||||
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
|
||||
|
||||
if enabled_variant is None:
|
||||
return conversation_settings
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
try:
|
||||
# Apply the variant to this conversation only; do not persist to DB.
|
||||
# Not all OpenHands versions expose `condenser_max_size` on settings.
|
||||
if hasattr(conversation_settings, 'condenser_max_size'):
|
||||
conversation_settings.condenser_max_size = condenser_max_size
|
||||
logger.info(
|
||||
'condenser_max_step_experiment:conversation_settings_applied',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'condenser_max_size': condenser_max_size,
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
'condenser_max_step_experiment:field_missing_on_settings',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'condenser_max_size not present on ConversationInitData',
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:apply_failed',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
return conversation_settings
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment__v1(
|
||||
user_id: str | None,
|
||||
conversation_id: UUID,
|
||||
agent: Agent,
|
||||
) -> Agent:
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
|
||||
|
||||
if enabled_variant is None:
|
||||
return agent
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return agent
|
||||
|
||||
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
|
||||
condenser = LLMSummarizingCondenser(
|
||||
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
|
||||
)
|
||||
|
||||
return agent.model_copy(update={'condenser': condenser})
|
||||
@@ -1,25 +0,0 @@
|
||||
"""
|
||||
Experiment versions package.
|
||||
|
||||
This package contains handlers for different experiment versions.
|
||||
"""
|
||||
|
||||
from experiments.experiment_versions._001_litellm_default_model_experiment import (
|
||||
handle_litellm_default_model_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._002_system_prompt_experiment import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
|
||||
handle_claude4_vs_gpt5_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'handle_litellm_default_model_experiment',
|
||||
'handle_system_prompt_experiment',
|
||||
'handle_claude4_vs_gpt5_experiment',
|
||||
'handle_condenser_max_step_experiment',
|
||||
]
|
||||
@@ -17,7 +17,6 @@ packages = [
|
||||
{ include = "storage" },
|
||||
{ include = "sync" },
|
||||
{ include = "integrations" },
|
||||
{ include = "experiments" },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
|
||||
@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
|
||||
# No action required
|
||||
continue
|
||||
|
||||
if subpath == 'exp_config.json':
|
||||
# No action required
|
||||
continue
|
||||
|
||||
# Log unhandled paths for future implementation
|
||||
logger.warning(
|
||||
'unknown_path_in_batch_webhook',
|
||||
|
||||
@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
|
||||
await self._setup_nested_settings(client, api_url, settings)
|
||||
await self._setup_provider_tokens(client, api_url, settings)
|
||||
await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore
|
||||
await self._setup_experiment_config(client, api_url, sid, user_id)
|
||||
await self._create_nested_conversation(
|
||||
client, api_url, sid, user_id, settings, initial_user_msg, replay_json
|
||||
)
|
||||
await self._wait_for_conversation_ready(client, api_url, sid)
|
||||
|
||||
async def _setup_experiment_config(
|
||||
self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
|
||||
):
|
||||
# Prevent circular import
|
||||
from openhands.experiments.experiment_manager import (
|
||||
ExperimentConfig,
|
||||
ExperimentManagerImpl,
|
||||
)
|
||||
|
||||
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
experiment_config = ExperimentConfig(
|
||||
config={
|
||||
'system_prompt_filename': config.get_agent_config(
|
||||
config.default_agent
|
||||
).system_prompt_filename
|
||||
}
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
f'{api_url}/api/conversations/{sid}/exp-config',
|
||||
json=experiment_config.model_dump(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
async def _setup_nested_settings(
|
||||
self, client: httpx.AsyncClient, api_url: str, settings: Settings
|
||||
) -> None:
|
||||
|
||||
@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
|
||||
from storage.billing_session_type import BillingSessionType
|
||||
from storage.conversation_callback import CallbackStatus, ConversationCallback
|
||||
from storage.conversation_work import ConversationWork
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
from storage.feedback import ConversationFeedback, Feedback
|
||||
from storage.github_app_installation import GithubAppInstallation
|
||||
from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
|
||||
@@ -50,7 +49,6 @@ __all__ = [
|
||||
'ConversationFeedback',
|
||||
'StoredConversationMetadataSaas',
|
||||
'ConversationWork',
|
||||
'ExperimentAssignment',
|
||||
'Feedback',
|
||||
'GithubAppInstallation',
|
||||
'GitlabWebhook',
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Database model for experiment assignments.
|
||||
|
||||
This model tracks which experiments a conversation is assigned to and what variant
|
||||
they received from PostHog feature flags.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import Column, DateTime, String, UniqueConstraint
|
||||
from storage.base import Base
|
||||
|
||||
|
||||
class ExperimentAssignment(Base): # type: ignore
|
||||
__tablename__ = 'experiment_assignments'
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
conversation_id = Column(String, nullable=True, index=True)
|
||||
experiment_name = Column(String, nullable=False)
|
||||
variant = Column(String, nullable=False)
|
||||
|
||||
created_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
'conversation_id',
|
||||
'experiment_name',
|
||||
name='uq_experiment_assignments_conversation_experiment',
|
||||
),
|
||||
)
|
||||
@@ -1,52 +0,0 @@
|
||||
"""
|
||||
Store for managing experiment assignments.
|
||||
|
||||
This store handles creating and updating experiment assignments for conversations.
|
||||
"""
|
||||
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from storage.database import session_maker
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class ExperimentAssignmentStore:
|
||||
"""Store for managing experiment assignments."""
|
||||
|
||||
def update_experiment_variant(
|
||||
self,
|
||||
conversation_id: str,
|
||||
experiment_name: str,
|
||||
variant: str,
|
||||
) -> None:
|
||||
"""
|
||||
Update the variant for a specific experiment.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation ID
|
||||
experiment_name: The name of the experiment
|
||||
variant: The variant assigned
|
||||
"""
|
||||
with session_maker() as session:
|
||||
# Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
|
||||
stmt = insert(ExperimentAssignment).values(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name=experiment_name,
|
||||
variant=variant,
|
||||
)
|
||||
stmt = stmt.on_conflict_do_nothing(
|
||||
constraint='uq_experiment_assignments_conversation_experiment'
|
||||
)
|
||||
|
||||
session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
logger.info(
|
||||
'experiment_assignment_store:upserted_variant',
|
||||
extra={
|
||||
'conversation_id': conversation_id,
|
||||
'experiment_name': experiment_name,
|
||||
'variant': variant,
|
||||
},
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
"""Unit tests for experiments module."""
|
||||
@@ -1,149 +0,0 @@
|
||||
# tests/test_condenser_max_step_experiment_v1.py
|
||||
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
from experiments.experiment_manager import SaaSExperimentManager
|
||||
|
||||
# SUT imports (update the module path if needed)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment__v1,
|
||||
)
|
||||
from pydantic import SecretStr
|
||||
|
||||
from openhands.sdk import LLM, Agent
|
||||
from openhands.sdk.context.condenser import LLMSummarizingCondenser
|
||||
|
||||
|
||||
def make_agent() -> Agent:
|
||||
"""Build a minimal valid Agent."""
|
||||
llm = LLM(
|
||||
usage_id='primary-llm',
|
||||
model='provider/model',
|
||||
api_key=SecretStr('sk-test'),
|
||||
)
|
||||
return Agent(llm=llm)
|
||||
|
||||
|
||||
def _patch_variant(monkeypatch, return_value):
|
||||
"""Patch the internal variant getter to return a specific value."""
|
||||
monkeypatch.setattr(
|
||||
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
|
||||
lambda user_id, conv_id: return_value,
|
||||
raising=True,
|
||||
)
|
||||
|
||||
|
||||
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'control')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
|
||||
|
||||
# Should be a new Agent instance with a condenser installed
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
|
||||
# The condenser should have its own LLM (usage_id overridden to "condenser")
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
# The original agent LLM remains unchanged
|
||||
assert agent.llm.usage_id == 'primary-llm'
|
||||
|
||||
# Control: max_size = 120, keep_first = 4
|
||||
assert result.condenser.max_size == 120
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
|
||||
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
assert result.condenser.max_size == 80
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, None)
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
|
||||
|
||||
# No changes—same instance and no condenser attribute added
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'weird-variant')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
|
||||
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
|
||||
def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
|
||||
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-123',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Same object returned (no copy)
|
||||
assert result is agent
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
|
||||
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-abc',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Should be a different instance than the original (copied after handler runs)
|
||||
assert result is not agent
|
||||
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
|
||||
"""Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
|
||||
# Arrange
|
||||
planning_agent = make_agent().model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_planning.j2'}
|
||||
)
|
||||
conv_id = uuid4()
|
||||
|
||||
# Act
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-planning',
|
||||
conversation_id=conv_id,
|
||||
agent=planning_agent,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result.system_prompt_filename == 'system_prompt_planning.j2'
|
||||
Reference in New Issue
Block a user