Remove ExperimentManager concept from codebase (#13215)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Tim O'Farrell
2026-03-04 13:41:18 -07:00
committed by GitHub
parent 3e15b849a3
commit 15e9435b35
28 changed files with 42 additions and 1618 deletions

View File

@@ -53,7 +53,7 @@ repos:
# Use -p (package) to avoid dual module name conflict when using MYPYPATH
# MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
# Note: tests package excluded to avoid conflict with core openhands tests
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
always_run: true
pass_filenames: false
files: ^enterprise/

View File

@@ -1,47 +0,0 @@
import os
import posthog
from openhands.core.logger import openhands_logger as logger
# Initialize PostHog
posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
# Log PostHog configuration with masked API key for security
api_key = posthog.api_key
if api_key and len(api_key) > 8:
masked_key = f'{api_key[:4]}...{api_key[-4:]}'
else:
masked_key = 'not_set_or_too_short'
logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
# Global toggle for the experiment manager
ENABLE_EXPERIMENT_MANAGER = (
os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
)
# Get the current experiment type from environment variable
# If None, no experiment is running
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
)
# System prompt experiment toggle
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
)
EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
logger.info(
'experiment_manager:run_conversation_variant_test:experiment_config',
extra={
'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
},
)

View File

@@ -1,99 +0,0 @@
from uuid import UUID
from experiments.constants import (
ENABLE_EXPERIMENT_MANAGER,
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
)
from experiments.experiment_versions import (
handle_system_prompt_experiment,
)
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.experiments.experiment_manager import ExperimentManager
from openhands.sdk import Agent
from openhands.server.session.conversation_init_data import ConversationInitData
class SaaSExperimentManager(ExperimentManager):
@staticmethod
def run_agent_variant_tests__v1(
user_id: str | None, conversation_id: UUID, agent: Agent
) -> Agent:
if not ENABLE_EXPERIMENT_MANAGER:
logger.info(
'experiment_manager:run_conversation_variant_test:skipped',
extra={'reason': 'experiment_manager_disabled'},
)
return agent
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
# Skip experiment for planning agents which require their specialized prompt
if agent.system_prompt_filename != 'system_prompt_planning.j2':
agent = agent.model_copy(
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
)
return agent
@staticmethod
def run_conversation_variant_test(
user_id, conversation_id, conversation_settings
) -> ConversationInitData:
"""
Run conversation variant test and potentially modify the conversation settings
based on the PostHog feature flags.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings that may include convo_id and llm_model
Returns:
The modified conversation settings
"""
logger.debug(
'experiment_manager:run_conversation_variant_test:started',
extra={'user_id': user_id, 'conversation_id': conversation_id},
)
return conversation_settings
@staticmethod
def run_config_variant_test(
user_id: str | None, conversation_id: str, config: OpenHandsConfig
) -> OpenHandsConfig:
"""
Run agent config variant test and potentially modify the OpenHands config
based on the current experiment type and PostHog feature flags.
Args:
user_id: The user ID
conversation_id: The conversation ID
config: The OpenHands configuration
Returns:
The modified OpenHands configuration
"""
logger.info(
'experiment_manager:run_config_variant_test:started',
extra={'user_id': user_id},
)
# Skip all experiment processing if the experiment manager is disabled
if not ENABLE_EXPERIMENT_MANAGER:
logger.info(
'experiment_manager:run_config_variant_test:skipped',
extra={'reason': 'experiment_manager_disabled'},
)
return config
# Pass the entire OpenHands config to the system prompt experiment
# Let the experiment handler directly modify the config as needed
modified_config = handle_system_prompt_experiment(
user_id, conversation_id, config
)
# Condenser max step experiment is applied via conversation variant test,
# not config variant test. Return modified config from system prompt only.
return modified_config

View File

@@ -1,107 +0,0 @@
"""
LiteLLM model experiment handler.
This module contains the handler for the LiteLLM model experiment.
"""
import posthog
from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
from server.constants import (
IS_FEATURE_ENV,
build_litellm_proxy_model_path,
get_default_litellm_model,
)
from openhands.core.logger import openhands_logger as logger
def handle_litellm_default_model_experiment(
user_id, conversation_id, conversation_settings
):
"""
Handle the LiteLLM model experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings
Returns:
Modified conversation settings
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
logger.info(
'experiment_manager:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
},
)
return conversation_settings
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'error': str(e),
},
)
return conversation_settings
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='model_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'model_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
# Set the model based on the feature flag variant
if enabled_variant == 'claude37':
# Use the shared utility to construct the LiteLLM proxy model path
model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
# Update the conversation settings with the selected model
conversation_settings.llm_model = model
else:
# Update the conversation settings with the default model for the current version
conversation_settings.llm_model = get_default_litellm_model()
return conversation_settings

View File

@@ -1,181 +0,0 @@
"""
System prompt experiment handler.
This module contains the handler for the system prompt experiment that uses
the PostHog variant as the system prompt filename.
"""
import copy
import posthog
from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
from server.constants import IS_FEATURE_ENV
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
def _get_system_prompt_variant(user_id, conversation_id):
"""
Get the system prompt variant for the experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
Returns:
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
logger.info(
'experiment_manager_002:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
},
)
return None
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='system_prompt_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='system_prompt_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'system_prompt_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_system_prompt_experiment(
user_id, conversation_id, config: OpenHandsConfig
) -> OpenHandsConfig:
"""
Handle the system prompt experiment for OpenHands config.
Args:
user_id: The user ID
conversation_id: The conversation ID
config: The OpenHands configuration
Returns:
Modified OpenHands configuration
"""
enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
# If variant is None, experiment is not enabled or there was an error
if enabled_variant is None:
return config
# Deep copy the config to avoid modifying the original
modified_config = copy.deepcopy(config)
# Set the system prompt filename based on the variant
if enabled_variant == 'control':
# Use the long-horizon system prompt for the control variant
agent_config = modified_config.get_agent_config(modified_config.default_agent)
agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
agent_config.enable_plan_mode = True
elif enabled_variant == 'interactive':
modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename = 'system_prompt_interactive.j2'
elif enabled_variant == 'no_tools':
modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename = 'system_prompt.j2'
else:
logger.error(
'system_prompt_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'no explicit mapping; returning original config',
},
)
return config
# Log which prompt is being used
logger.info(
'system_prompt_experiment:prompt_selected',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'system_prompt_filename': modified_config.get_agent_config(
modified_config.default_agent
).system_prompt_filename,
'variant': enabled_variant,
},
)
return modified_config

View File

@@ -1,137 +0,0 @@
"""
LiteLLM model experiment handler.
This module contains the handler for the LiteLLM model experiment.
"""
import posthog
from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
from server.constants import (
IS_FEATURE_ENV,
build_litellm_proxy_model_path,
get_default_litellm_model,
)
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.logger import openhands_logger as logger
from openhands.server.session.conversation_init_data import ConversationInitData
def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
if not EXPERIMENT_CLAUDE4_VS_GPT5:
logger.info(
'experiment_manager:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
},
)
return None
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='claude4_vs_gpt5_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='claude4_or_gpt5_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'claude4_or_gpt5_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_claude4_vs_gpt5_experiment(
user_id: str | None,
conversation_id: str,
conversation_settings: ConversationInitData,
) -> ConversationInitData:
"""
Handle the LiteLLM model experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
conversation_settings: The conversation settings
Returns:
Modified conversation settings
"""
enabled_variant = _get_model_variant(user_id, conversation_id)
if not enabled_variant:
return conversation_settings
# Set the model based on the feature flag variant
if enabled_variant == 'gpt5':
model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
conversation_settings.llm_model = model
else:
conversation_settings.llm_model = get_default_litellm_model()
return conversation_settings

View File

@@ -1,232 +0,0 @@
"""
Condenser max step experiment handler.
This module contains the handler for the condenser max step experiment that tests
different max_size values for the condenser configuration.
"""
from uuid import UUID
import posthog
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
from server.constants import IS_FEATURE_ENV
from storage.experiment_assignment_store import ExperimentAssignmentStore
from openhands.core.logger import openhands_logger as logger
from openhands.sdk import Agent
from openhands.sdk.context.condenser import (
LLMSummarizingCondenser,
)
from openhands.server.session.conversation_init_data import ConversationInitData
def _get_condenser_max_step_variant(user_id, conversation_id):
"""
Get the condenser max step variant for the experiment.
Args:
user_id: The user ID
conversation_id: The conversation ID
Returns:
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
"""
# No-op if the specific experiment is not enabled
if not EXPERIMENT_CONDENSER_MAX_STEP:
logger.info(
'experiment_manager_004:ab_testing:skipped',
extra={
'convo_id': conversation_id,
'reason': 'experiment_not_enabled',
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
},
)
return None
# Use experiment name as the flag key
try:
enabled_variant = posthog.get_feature_flag(
EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
)
except Exception as e:
logger.error(
'experiment_manager:get_feature_flag:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'error': str(e),
},
)
return None
# Store the experiment assignment in the database
try:
experiment_store = ExperimentAssignmentStore()
experiment_store.update_experiment_variant(
conversation_id=conversation_id,
experiment_name='condenser_max_step_experiment',
variant=enabled_variant,
)
except Exception as e:
logger.error(
'experiment_manager:store_assignment:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'variant': enabled_variant,
'error': str(e),
},
)
# Fail the experiment if we cannot track the splits - results would not be explainable
return None
# Log the experiment event
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
try:
posthog.capture(
distinct_id=posthog_user_id,
event='condenser_max_step_set',
properties={
'conversation_id': conversation_id,
'variant': enabled_variant,
'original_user_id': user_id,
'is_feature_env': IS_FEATURE_ENV,
},
)
except Exception as e:
logger.error(
'experiment_manager:posthog_capture:failed',
extra={
'convo_id': conversation_id,
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
'error': str(e),
},
)
# Continue execution as this is not critical
logger.info(
'posthog_capture',
extra={
'event': 'condenser_max_step_set',
'posthog_user_id': posthog_user_id,
'is_feature_env': IS_FEATURE_ENV,
'conversation_id': conversation_id,
'variant': enabled_variant,
},
)
return enabled_variant
def handle_condenser_max_step_experiment(
user_id: str | None,
conversation_id: str,
conversation_settings: ConversationInitData,
) -> ConversationInitData:
"""
Handle the condenser max step experiment for conversation settings.
We should not modify persistent user settings. Instead, apply the experiment
variant to the conversation's in-memory settings object for this session only.
Variants:
- control -> condenser_max_size = 120
- treatment -> condenser_max_size = 80
Returns the (potentially) modified conversation_settings.
"""
enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
if enabled_variant is None:
return conversation_settings
if enabled_variant == 'control':
condenser_max_size = 120
elif enabled_variant == 'treatment':
condenser_max_size = 80
else:
logger.error(
'condenser_max_step_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'unknown variant; returning original conversation settings',
},
)
return conversation_settings
try:
# Apply the variant to this conversation only; do not persist to DB.
# Not all OpenHands versions expose `condenser_max_size` on settings.
if hasattr(conversation_settings, 'condenser_max_size'):
conversation_settings.condenser_max_size = condenser_max_size
logger.info(
'condenser_max_step_experiment:conversation_settings_applied',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'condenser_max_size': condenser_max_size,
},
)
else:
logger.warning(
'condenser_max_step_experiment:field_missing_on_settings',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'condenser_max_size not present on ConversationInitData',
},
)
except Exception as e:
logger.error(
'condenser_max_step_experiment:apply_failed',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'error': str(e),
},
)
return conversation_settings
return conversation_settings
def handle_condenser_max_step_experiment__v1(
user_id: str | None,
conversation_id: UUID,
agent: Agent,
) -> Agent:
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
if enabled_variant is None:
return agent
if enabled_variant == 'control':
condenser_max_size = 120
elif enabled_variant == 'treatment':
condenser_max_size = 80
else:
logger.error(
'condenser_max_step_experiment:unknown_variant',
extra={
'user_id': user_id,
'convo_id': conversation_id,
'variant': enabled_variant,
'reason': 'unknown variant; returning original conversation settings',
},
)
return agent
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
condenser = LLMSummarizingCondenser(
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
)
return agent.model_copy(update={'condenser': condenser})

View File

@@ -1,25 +0,0 @@
"""
Experiment versions package.
This package contains handlers for different experiment versions.
"""
from experiments.experiment_versions._001_litellm_default_model_experiment import (
handle_litellm_default_model_experiment,
)
from experiments.experiment_versions._002_system_prompt_experiment import (
handle_system_prompt_experiment,
)
from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
handle_claude4_vs_gpt5_experiment,
)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment,
)
__all__ = [
'handle_litellm_default_model_experiment',
'handle_system_prompt_experiment',
'handle_claude4_vs_gpt5_experiment',
'handle_condenser_max_step_experiment',
]

View File

@@ -17,7 +17,6 @@ packages = [
{ include = "storage" },
{ include = "sync" },
{ include = "integrations" },
{ include = "experiments" },
]
[tool.poetry.dependencies]

View File

@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
# No action required
continue
if subpath == 'exp_config.json':
# No action required
continue
# Log unhandled paths for future implementation
logger.warning(
'unknown_path_in_batch_webhook',

View File

@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
await self._setup_nested_settings(client, api_url, settings)
await self._setup_provider_tokens(client, api_url, settings)
await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore
await self._setup_experiment_config(client, api_url, sid, user_id)
await self._create_nested_conversation(
client, api_url, sid, user_id, settings, initial_user_msg, replay_json
)
await self._wait_for_conversation_ready(client, api_url, sid)
async def _setup_experiment_config(
self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
):
# Prevent circular import
from openhands.experiments.experiment_manager import (
ExperimentConfig,
ExperimentManagerImpl,
)
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
user_id, sid, self.config
)
experiment_config = ExperimentConfig(
config={
'system_prompt_filename': config.get_agent_config(
config.default_agent
).system_prompt_filename
}
)
response = await client.post(
f'{api_url}/api/conversations/{sid}/exp-config',
json=experiment_config.model_dump(),
)
response.raise_for_status()
async def _setup_nested_settings(
self, client: httpx.AsyncClient, api_url: str, settings: Settings
) -> None:

View File

@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
from storage.billing_session_type import BillingSessionType
from storage.conversation_callback import CallbackStatus, ConversationCallback
from storage.conversation_work import ConversationWork
from storage.experiment_assignment import ExperimentAssignment
from storage.feedback import ConversationFeedback, Feedback
from storage.github_app_installation import GithubAppInstallation
from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
@@ -50,7 +49,6 @@ __all__ = [
'ConversationFeedback',
'StoredConversationMetadataSaas',
'ConversationWork',
'ExperimentAssignment',
'Feedback',
'GithubAppInstallation',
'GitlabWebhook',

View File

@@ -1,41 +0,0 @@
"""
Database model for experiment assignments.
This model tracks which experiments a conversation is assigned to and what variant
they received from PostHog feature flags.
"""
import uuid
from datetime import UTC, datetime
from sqlalchemy import Column, DateTime, String, UniqueConstraint
from storage.base import Base
class ExperimentAssignment(Base): # type: ignore
__tablename__ = 'experiment_assignments'
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
conversation_id = Column(String, nullable=True, index=True)
experiment_name = Column(String, nullable=False)
variant = Column(String, nullable=False)
created_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
nullable=False,
)
updated_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined]
nullable=False,
)
__table_args__ = (
UniqueConstraint(
'conversation_id',
'experiment_name',
name='uq_experiment_assignments_conversation_experiment',
),
)

View File

@@ -1,52 +0,0 @@
"""
Store for managing experiment assignments.
This store handles creating and updating experiment assignments for conversations.
"""
from sqlalchemy.dialects.postgresql import insert
from storage.database import session_maker
from storage.experiment_assignment import ExperimentAssignment
from openhands.core.logger import openhands_logger as logger
class ExperimentAssignmentStore:
"""Store for managing experiment assignments."""
def update_experiment_variant(
self,
conversation_id: str,
experiment_name: str,
variant: str,
) -> None:
"""
Update the variant for a specific experiment.
Args:
conversation_id: The conversation ID
experiment_name: The name of the experiment
variant: The variant assigned
"""
with session_maker() as session:
# Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
stmt = insert(ExperimentAssignment).values(
conversation_id=conversation_id,
experiment_name=experiment_name,
variant=variant,
)
stmt = stmt.on_conflict_do_nothing(
constraint='uq_experiment_assignments_conversation_experiment'
)
session.execute(stmt)
session.commit()
logger.info(
'experiment_assignment_store:upserted_variant',
extra={
'conversation_id': conversation_id,
'experiment_name': experiment_name,
'variant': variant,
},
)

View File

@@ -1 +0,0 @@
"""Unit tests for experiments module."""

View File

@@ -1,149 +0,0 @@
# tests/test_condenser_max_step_experiment_v1.py
from unittest.mock import patch
from uuid import uuid4
from experiments.experiment_manager import SaaSExperimentManager
# SUT imports (update the module path if needed)
from experiments.experiment_versions._004_condenser_max_step_experiment import (
handle_condenser_max_step_experiment__v1,
)
from pydantic import SecretStr
from openhands.sdk import LLM, Agent
from openhands.sdk.context.condenser import LLMSummarizingCondenser
def make_agent() -> Agent:
"""Build a minimal valid Agent."""
llm = LLM(
usage_id='primary-llm',
model='provider/model',
api_key=SecretStr('sk-test'),
)
return Agent(llm=llm)
def _patch_variant(monkeypatch, return_value):
"""Patch the internal variant getter to return a specific value."""
monkeypatch.setattr(
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
lambda user_id, conv_id: return_value,
raising=True,
)
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
_patch_variant(monkeypatch, 'control')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
# Should be a new Agent instance with a condenser installed
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
# The condenser should have its own LLM (usage_id overridden to "condenser")
assert result.condenser.llm.usage_id == 'condenser'
# The original agent LLM remains unchanged
assert agent.llm.usage_id == 'primary-llm'
# Control: max_size = 120, keep_first = 4
assert result.condenser.max_size == 120
assert result.condenser.keep_first == 4
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
_patch_variant(monkeypatch, 'treatment')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
assert result is not agent
assert isinstance(result.condenser, LLMSummarizingCondenser)
assert result.condenser.llm.usage_id == 'condenser'
assert result.condenser.max_size == 80
assert result.condenser.keep_first == 4
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, None)
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
# No changes—same instance and no condenser attribute added
assert result is agent
assert getattr(result, 'condenser', None) is None
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
_patch_variant(monkeypatch, 'weird-variant')
agent = make_agent()
conv_id = uuid4()
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
assert result is agent
assert getattr(result, 'condenser', None) is None
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
agent = make_agent()
conv_id = uuid4()
result = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-123',
conversation_id=conv_id,
agent=agent,
)
# Same object returned (no copy)
assert result is agent
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
agent = make_agent()
conv_id = uuid4()
_patch_variant(monkeypatch, 'treatment')
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-abc',
conversation_id=conv_id,
agent=agent,
)
# Should be a different instance than the original (copied after handler runs)
assert result is not agent
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
"""Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
# Arrange
planning_agent = make_agent().model_copy(
update={'system_prompt_filename': 'system_prompt_planning.j2'}
)
conv_id = uuid4()
# Act
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
user_id='user-planning',
conversation_id=conv_id,
agent=planning_agent,
)
# Assert
assert result.system_prompt_filename == 'system_prompt_planning.j2'