mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
Remove ExperimentManager concept from codebase (#13215)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -53,7 +53,7 @@ repos:
|
||||
# Use -p (package) to avoid dual module name conflict when using MYPYPATH
|
||||
# MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
|
||||
# Note: tests package excluded to avoid conflict with core openhands tests
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
files: ^enterprise/
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
import os
|
||||
|
||||
import posthog
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# Initialize PostHog
|
||||
posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
|
||||
posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
|
||||
|
||||
# Log PostHog configuration with masked API key for security
|
||||
api_key = posthog.api_key
|
||||
if api_key and len(api_key) > 8:
|
||||
masked_key = f'{api_key[:4]}...{api_key[-4:]}'
|
||||
else:
|
||||
masked_key = 'not_set_or_too_short'
|
||||
logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
|
||||
|
||||
# Global toggle for the experiment manager
|
||||
ENABLE_EXPERIMENT_MANAGER = (
|
||||
os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
|
||||
)
|
||||
|
||||
# Get the current experiment type from environment variable
|
||||
# If None, no experiment is running
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
|
||||
)
|
||||
# System prompt experiment toggle
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
|
||||
)
|
||||
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
|
||||
|
||||
EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
|
||||
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:experiment_config',
|
||||
extra={
|
||||
'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
|
||||
'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
@@ -1,99 +0,0 @@
|
||||
from uuid import UUID
|
||||
|
||||
from experiments.constants import (
|
||||
ENABLE_EXPERIMENT_MANAGER,
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
)
|
||||
from experiments.experiment_versions import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
class SaaSExperimentManager(ExperimentManager):
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return agent
|
||||
|
||||
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
# Skip experiment for planning agents which require their specialized prompt
|
||||
if agent.system_prompt_filename != 'system_prompt_planning.j2':
|
||||
agent = agent.model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_settings
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Run conversation variant test and potentially modify the conversation settings
|
||||
based on the PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings that may include convo_id and llm_model
|
||||
|
||||
Returns:
|
||||
The modified conversation settings
|
||||
"""
|
||||
logger.debug(
|
||||
'experiment_manager:run_conversation_variant_test:started',
|
||||
extra={'user_id': user_id, 'conversation_id': conversation_id},
|
||||
)
|
||||
|
||||
return conversation_settings
|
||||
|
||||
@staticmethod
|
||||
def run_config_variant_test(
|
||||
user_id: str | None, conversation_id: str, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Run agent config variant test and potentially modify the OpenHands config
|
||||
based on the current experiment type and PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
The modified OpenHands configuration
|
||||
"""
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:started',
|
||||
extra={'user_id': user_id},
|
||||
)
|
||||
|
||||
# Skip all experiment processing if the experiment manager is disabled
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return config
|
||||
|
||||
# Pass the entire OpenHands config to the system prompt experiment
|
||||
# Let the experiment handler directly modify the config as needed
|
||||
modified_config = handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config
|
||||
)
|
||||
|
||||
# Condenser max step experiment is applied via conversation variant test,
|
||||
# not config variant test. Return modified config from system prompt only.
|
||||
return modified_config
|
||||
@@ -1,107 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def handle_litellm_default_model_experiment(
|
||||
user_id, conversation_id, conversation_settings
|
||||
):
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='model_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'model_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'claude37':
|
||||
# Use the shared utility to construct the LiteLLM proxy model path
|
||||
model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
|
||||
# Update the conversation settings with the selected model
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
# Update the conversation settings with the default model for the current version
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,181 +0,0 @@
|
||||
"""
|
||||
System prompt experiment handler.
|
||||
|
||||
This module contains the handler for the system prompt experiment that uses
|
||||
the PostHog variant as the system prompt filename.
|
||||
"""
|
||||
|
||||
import copy
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def _get_system_prompt_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the system prompt variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager_002:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='system_prompt_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='system_prompt_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'system_prompt_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Handle the system prompt experiment for OpenHands config.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
Modified OpenHands configuration
|
||||
"""
|
||||
enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
|
||||
|
||||
# If variant is None, experiment is not enabled or there was an error
|
||||
if enabled_variant is None:
|
||||
return config
|
||||
|
||||
# Deep copy the config to avoid modifying the original
|
||||
modified_config = copy.deepcopy(config)
|
||||
|
||||
# Set the system prompt filename based on the variant
|
||||
if enabled_variant == 'control':
|
||||
# Use the long-horizon system prompt for the control variant
|
||||
agent_config = modified_config.get_agent_config(modified_config.default_agent)
|
||||
agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
|
||||
agent_config.enable_plan_mode = True
|
||||
elif enabled_variant == 'interactive':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt_interactive.j2'
|
||||
elif enabled_variant == 'no_tools':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt.j2'
|
||||
else:
|
||||
logger.error(
|
||||
'system_prompt_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'no explicit mapping; returning original config',
|
||||
},
|
||||
)
|
||||
return config
|
||||
|
||||
# Log which prompt is being used
|
||||
logger.info(
|
||||
'system_prompt_experiment:prompt_selected',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'system_prompt_filename': modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return modified_config
|
||||
@@ -1,137 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
|
||||
if not EXPERIMENT_CLAUDE4_VS_GPT5:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='claude4_vs_gpt5_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='claude4_or_gpt5_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'claude4_or_gpt5_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_claude4_vs_gpt5_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
|
||||
enabled_variant = _get_model_variant(user_id, conversation_id)
|
||||
|
||||
if not enabled_variant:
|
||||
return conversation_settings
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'gpt5':
|
||||
model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,232 +0,0 @@
|
||||
"""
|
||||
Condenser max step experiment handler.
|
||||
|
||||
This module contains the handler for the condenser max step experiment that tests
|
||||
different max_size values for the condenser configuration.
|
||||
"""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.context.condenser import (
|
||||
LLMSummarizingCondenser,
|
||||
)
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_condenser_max_step_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the condenser max step variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_CONDENSER_MAX_STEP:
|
||||
logger.info(
|
||||
'experiment_manager_004:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='condenser_max_step_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='condenser_max_step_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'condenser_max_step_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the condenser max step experiment for conversation settings.
|
||||
|
||||
We should not modify persistent user settings. Instead, apply the experiment
|
||||
variant to the conversation's in-memory settings object for this session only.
|
||||
|
||||
Variants:
|
||||
- control -> condenser_max_size = 120
|
||||
- treatment -> condenser_max_size = 80
|
||||
|
||||
Returns the (potentially) modified conversation_settings.
|
||||
"""
|
||||
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
|
||||
|
||||
if enabled_variant is None:
|
||||
return conversation_settings
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
try:
|
||||
# Apply the variant to this conversation only; do not persist to DB.
|
||||
# Not all OpenHands versions expose `condenser_max_size` on settings.
|
||||
if hasattr(conversation_settings, 'condenser_max_size'):
|
||||
conversation_settings.condenser_max_size = condenser_max_size
|
||||
logger.info(
|
||||
'condenser_max_step_experiment:conversation_settings_applied',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'condenser_max_size': condenser_max_size,
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
'condenser_max_step_experiment:field_missing_on_settings',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'condenser_max_size not present on ConversationInitData',
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:apply_failed',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
return conversation_settings
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment__v1(
|
||||
user_id: str | None,
|
||||
conversation_id: UUID,
|
||||
agent: Agent,
|
||||
) -> Agent:
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
|
||||
|
||||
if enabled_variant is None:
|
||||
return agent
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return agent
|
||||
|
||||
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
|
||||
condenser = LLMSummarizingCondenser(
|
||||
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
|
||||
)
|
||||
|
||||
return agent.model_copy(update={'condenser': condenser})
|
||||
@@ -1,25 +0,0 @@
|
||||
"""
|
||||
Experiment versions package.
|
||||
|
||||
This package contains handlers for different experiment versions.
|
||||
"""
|
||||
|
||||
from experiments.experiment_versions._001_litellm_default_model_experiment import (
|
||||
handle_litellm_default_model_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._002_system_prompt_experiment import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
|
||||
handle_claude4_vs_gpt5_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'handle_litellm_default_model_experiment',
|
||||
'handle_system_prompt_experiment',
|
||||
'handle_claude4_vs_gpt5_experiment',
|
||||
'handle_condenser_max_step_experiment',
|
||||
]
|
||||
@@ -17,7 +17,6 @@ packages = [
|
||||
{ include = "storage" },
|
||||
{ include = "sync" },
|
||||
{ include = "integrations" },
|
||||
{ include = "experiments" },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
|
||||
@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
|
||||
# No action required
|
||||
continue
|
||||
|
||||
if subpath == 'exp_config.json':
|
||||
# No action required
|
||||
continue
|
||||
|
||||
# Log unhandled paths for future implementation
|
||||
logger.warning(
|
||||
'unknown_path_in_batch_webhook',
|
||||
|
||||
@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
|
||||
await self._setup_nested_settings(client, api_url, settings)
|
||||
await self._setup_provider_tokens(client, api_url, settings)
|
||||
await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore
|
||||
await self._setup_experiment_config(client, api_url, sid, user_id)
|
||||
await self._create_nested_conversation(
|
||||
client, api_url, sid, user_id, settings, initial_user_msg, replay_json
|
||||
)
|
||||
await self._wait_for_conversation_ready(client, api_url, sid)
|
||||
|
||||
async def _setup_experiment_config(
|
||||
self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
|
||||
):
|
||||
# Prevent circular import
|
||||
from openhands.experiments.experiment_manager import (
|
||||
ExperimentConfig,
|
||||
ExperimentManagerImpl,
|
||||
)
|
||||
|
||||
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
experiment_config = ExperimentConfig(
|
||||
config={
|
||||
'system_prompt_filename': config.get_agent_config(
|
||||
config.default_agent
|
||||
).system_prompt_filename
|
||||
}
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
f'{api_url}/api/conversations/{sid}/exp-config',
|
||||
json=experiment_config.model_dump(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
async def _setup_nested_settings(
|
||||
self, client: httpx.AsyncClient, api_url: str, settings: Settings
|
||||
) -> None:
|
||||
|
||||
@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
|
||||
from storage.billing_session_type import BillingSessionType
|
||||
from storage.conversation_callback import CallbackStatus, ConversationCallback
|
||||
from storage.conversation_work import ConversationWork
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
from storage.feedback import ConversationFeedback, Feedback
|
||||
from storage.github_app_installation import GithubAppInstallation
|
||||
from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
|
||||
@@ -50,7 +49,6 @@ __all__ = [
|
||||
'ConversationFeedback',
|
||||
'StoredConversationMetadataSaas',
|
||||
'ConversationWork',
|
||||
'ExperimentAssignment',
|
||||
'Feedback',
|
||||
'GithubAppInstallation',
|
||||
'GitlabWebhook',
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Database model for experiment assignments.
|
||||
|
||||
This model tracks which experiments a conversation is assigned to and what variant
|
||||
they received from PostHog feature flags.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import Column, DateTime, String, UniqueConstraint
|
||||
from storage.base import Base
|
||||
|
||||
|
||||
class ExperimentAssignment(Base): # type: ignore
|
||||
__tablename__ = 'experiment_assignments'
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
conversation_id = Column(String, nullable=True, index=True)
|
||||
experiment_name = Column(String, nullable=False)
|
||||
variant = Column(String, nullable=False)
|
||||
|
||||
created_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
'conversation_id',
|
||||
'experiment_name',
|
||||
name='uq_experiment_assignments_conversation_experiment',
|
||||
),
|
||||
)
|
||||
@@ -1,52 +0,0 @@
|
||||
"""
|
||||
Store for managing experiment assignments.
|
||||
|
||||
This store handles creating and updating experiment assignments for conversations.
|
||||
"""
|
||||
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from storage.database import session_maker
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class ExperimentAssignmentStore:
|
||||
"""Store for managing experiment assignments."""
|
||||
|
||||
def update_experiment_variant(
|
||||
self,
|
||||
conversation_id: str,
|
||||
experiment_name: str,
|
||||
variant: str,
|
||||
) -> None:
|
||||
"""
|
||||
Update the variant for a specific experiment.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation ID
|
||||
experiment_name: The name of the experiment
|
||||
variant: The variant assigned
|
||||
"""
|
||||
with session_maker() as session:
|
||||
# Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
|
||||
stmt = insert(ExperimentAssignment).values(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name=experiment_name,
|
||||
variant=variant,
|
||||
)
|
||||
stmt = stmt.on_conflict_do_nothing(
|
||||
constraint='uq_experiment_assignments_conversation_experiment'
|
||||
)
|
||||
|
||||
session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
logger.info(
|
||||
'experiment_assignment_store:upserted_variant',
|
||||
extra={
|
||||
'conversation_id': conversation_id,
|
||||
'experiment_name': experiment_name,
|
||||
'variant': variant,
|
||||
},
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
"""Unit tests for experiments module."""
|
||||
@@ -1,149 +0,0 @@
|
||||
# tests/test_condenser_max_step_experiment_v1.py
|
||||
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
from experiments.experiment_manager import SaaSExperimentManager
|
||||
|
||||
# SUT imports (update the module path if needed)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment__v1,
|
||||
)
|
||||
from pydantic import SecretStr
|
||||
|
||||
from openhands.sdk import LLM, Agent
|
||||
from openhands.sdk.context.condenser import LLMSummarizingCondenser
|
||||
|
||||
|
||||
def make_agent() -> Agent:
|
||||
"""Build a minimal valid Agent."""
|
||||
llm = LLM(
|
||||
usage_id='primary-llm',
|
||||
model='provider/model',
|
||||
api_key=SecretStr('sk-test'),
|
||||
)
|
||||
return Agent(llm=llm)
|
||||
|
||||
|
||||
def _patch_variant(monkeypatch, return_value):
|
||||
"""Patch the internal variant getter to return a specific value."""
|
||||
monkeypatch.setattr(
|
||||
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
|
||||
lambda user_id, conv_id: return_value,
|
||||
raising=True,
|
||||
)
|
||||
|
||||
|
||||
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'control')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
|
||||
|
||||
# Should be a new Agent instance with a condenser installed
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
|
||||
# The condenser should have its own LLM (usage_id overridden to "condenser")
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
# The original agent LLM remains unchanged
|
||||
assert agent.llm.usage_id == 'primary-llm'
|
||||
|
||||
# Control: max_size = 120, keep_first = 4
|
||||
assert result.condenser.max_size == 120
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
|
||||
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
assert result.condenser.max_size == 80
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, None)
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
|
||||
|
||||
# No changes—same instance and no condenser attribute added
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'weird-variant')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
|
||||
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
|
||||
def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
|
||||
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-123',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Same object returned (no copy)
|
||||
assert result is agent
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
|
||||
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-abc',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Should be a different instance than the original (copied after handler runs)
|
||||
assert result is not agent
|
||||
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
|
||||
"""Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
|
||||
# Arrange
|
||||
planning_agent = make_agent().model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_planning.j2'}
|
||||
)
|
||||
conv_id = uuid4()
|
||||
|
||||
# Act
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-planning',
|
||||
conversation_id=conv_id,
|
||||
agent=planning_agent,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result.system_prompt_filename == 'system_prompt_planning.j2'
|
||||
@@ -77,7 +77,6 @@ from openhands.app_server.utils.llm_metadata import (
|
||||
get_llm_metadata,
|
||||
should_set_litellm_extra_body,
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import ProviderType
|
||||
from openhands.integrations.service_types import SuggestedTask
|
||||
from openhands.sdk import Agent, AgentContext, LocalWorkspace
|
||||
@@ -1140,7 +1139,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
working_dir: str,
|
||||
plugins: list[PluginSpec] | None = None,
|
||||
) -> StartConversationRequest:
|
||||
"""Finalize the conversation request with experiment variants and skills.
|
||||
"""Finalize the conversation request with skills and metadata.
|
||||
|
||||
Args:
|
||||
agent: The configured agent
|
||||
@@ -1161,13 +1160,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
# Generate conversation ID if not provided
|
||||
conversation_id = conversation_id or uuid4()
|
||||
|
||||
# Apply experiment variants
|
||||
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
|
||||
user.id, conversation_id, agent
|
||||
)
|
||||
|
||||
# Update agent's LLM with litellm_extra_body metadata for tracing
|
||||
# This is done after experiment variants to ensure the final LLM config is used
|
||||
agent = self._update_agent_with_llm_metadata(agent, conversation_id, user.id)
|
||||
|
||||
# Load and merge skills if remote workspace is available
|
||||
@@ -1230,7 +1223,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
1. Setting up git provider secrets
|
||||
2. Configuring LLM and MCP settings
|
||||
3. Creating an agent with appropriate context
|
||||
4. Finalizing the request with skills and experiment variants
|
||||
4. Finalizing the request with skills and metadata
|
||||
5. Passing plugins to the agent server for remote plugin loading
|
||||
"""
|
||||
user = await self.user_context.get_user_info()
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
from openhands.server.shared import file_store
|
||||
from openhands.storage.locations import get_experiment_config_filename
|
||||
from openhands.utils.import_utils import get_impl
|
||||
|
||||
|
||||
class ExperimentConfig(BaseModel):
|
||||
config: dict[str, str] | None = None
|
||||
|
||||
|
||||
def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
|
||||
try:
|
||||
file_path = get_experiment_config_filename(conversation_id)
|
||||
exp_config = file_store.read(file_path)
|
||||
return ExperimentConfig.model_validate_json(exp_config)
|
||||
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed to load experiment config: {e}')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ExperimentManager:
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
return conversation_settings
|
||||
|
||||
@staticmethod
|
||||
def run_config_variant_test(
|
||||
user_id: str | None, conversation_id: str, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
exp_config = load_experiment_config(conversation_id)
|
||||
if exp_config and exp_config.config:
|
||||
agent_cfg = config.get_agent_config(config.default_agent)
|
||||
try:
|
||||
for attr, value in exp_config.config.items():
|
||||
if hasattr(agent_cfg, attr):
|
||||
logger.info(
|
||||
f'Set attrib {attr} to {value} for {conversation_id}'
|
||||
)
|
||||
setattr(agent_cfg, attr, value)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error processing exp config: {e}')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
experiment_manager_cls = os.environ.get(
|
||||
'OPENHANDS_EXPERIMENT_MANAGER_CLS',
|
||||
'openhands.experiments.experiment_manager.ExperimentManager',
|
||||
)
|
||||
ExperimentManagerImpl = get_impl(ExperimentManager, experiment_manager_cls)
|
||||
@@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import MessageAction
|
||||
from openhands.events.nested_event_store import NestedEventStore
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler
|
||||
from openhands.runtime import get_runtime_cls
|
||||
from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
|
||||
@@ -551,12 +550,8 @@ class DockerNestedConversationManager(ConversationManager):
|
||||
# This session is created here only because it is the easiest way to get a runtime, which
|
||||
# is the easiest way to create the needed docker container
|
||||
|
||||
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
llm_registry, conversation_stats, config = (
|
||||
create_registry_and_conversation_stats(config, sid, user_id, settings)
|
||||
create_registry_and_conversation_stats(self.config, sid, user_id, settings)
|
||||
)
|
||||
|
||||
session = Session(
|
||||
|
||||
@@ -60,7 +60,6 @@ from openhands.events.observation import (
|
||||
AgentStateChangedObservation,
|
||||
NullObservation,
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentConfig
|
||||
from openhands.integrations.provider import (
|
||||
PROVIDER_TOKEN_TYPE,
|
||||
ProviderHandler,
|
||||
@@ -109,7 +108,6 @@ from openhands.storage.data_models.conversation_metadata import (
|
||||
from openhands.storage.data_models.conversation_status import ConversationStatus
|
||||
from openhands.storage.data_models.secrets import Secrets
|
||||
from openhands.storage.data_models.settings import Settings
|
||||
from openhands.storage.locations import get_experiment_config_filename
|
||||
from openhands.storage.settings.settings_store import SettingsStore
|
||||
from openhands.utils.async_utils import wait_all
|
||||
from openhands.utils.conversation_summary import get_default_conversation_title
|
||||
@@ -1240,32 +1238,6 @@ async def update_conversation(
|
||||
)
|
||||
|
||||
|
||||
@app.post('/conversations/{conversation_id}/exp-config')
|
||||
def add_experiment_config_for_conversation(
|
||||
exp_config: ExperimentConfig,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
) -> bool:
|
||||
exp_config_filepath = get_experiment_config_filename(conversation_id)
|
||||
exists = False
|
||||
try:
|
||||
file_store.read(exp_config_filepath)
|
||||
exists = True
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
# Don't modify again if it already exists
|
||||
if exists:
|
||||
return False
|
||||
|
||||
try:
|
||||
file_store.write(exp_config_filepath, exp_config.model_dump_json())
|
||||
except Exception as e:
|
||||
logger.info(f'Failed to write experiment config for {conversation_id}: {e}')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_combined_page_id(page_id: str | None) -> tuple[str | None, str | None]:
|
||||
"""Parse combined page_id to extract separate V0 and V1 page_ids.
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ from typing import Any
|
||||
from openhands.core.config.mcp_config import MCPConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.message import MessageAction
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import (
|
||||
CUSTOM_SECRETS_TYPE,
|
||||
PROVIDER_TOKEN_TYPE,
|
||||
@@ -142,10 +141,6 @@ async def start_conversation(
|
||||
|
||||
conversation_init_data = ConversationInitData(**session_init_args)
|
||||
|
||||
conversation_init_data = ExperimentManagerImpl.run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_init_data
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f'Starting agent loop for conversation {conversation_id}',
|
||||
extra={'user_id': user_id, 'session_id': conversation_id},
|
||||
@@ -281,8 +276,4 @@ async def setup_init_conversation_settings(
|
||||
if user_secrets:
|
||||
session_init_args['custom_secrets'] = user_secrets.custom_secrets
|
||||
|
||||
conversation_init_data = ConversationInitData(**session_init_args)
|
||||
# We should recreate the same experiment conditions when restarting a conversation
|
||||
return ExperimentManagerImpl.run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_init_data
|
||||
)
|
||||
return ConversationInitData(**session_init_args)
|
||||
|
||||
@@ -108,13 +108,6 @@ class WebSession:
|
||||
EventStreamSubscriber.SERVER, self.on_event, self.sid
|
||||
)
|
||||
self.config = config
|
||||
|
||||
# Lazy import to avoid circular dependency
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
|
||||
self.config = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
self.loop = asyncio.get_event_loop()
|
||||
self.user_id = user_id
|
||||
|
||||
|
||||
@@ -36,7 +36,3 @@ def get_conversation_llm_registry_filename(sid: str, user_id: str | None = None)
|
||||
|
||||
def get_conversation_stats_filename(sid: str, user_id: str | None = None) -> str:
|
||||
return f'{get_conversation_dir(sid, user_id)}conversation_stats.pkl'
|
||||
|
||||
|
||||
def get_experiment_config_filename(sid: str, user_id: str | None = None) -> str:
|
||||
return f'{get_conversation_dir(sid, user_id)}exp_config.json'
|
||||
|
||||
@@ -129,9 +129,8 @@ def generate_openapi_spec():
|
||||
"""Generate the OpenAPI specification from the FastAPI app."""
|
||||
spec = app.openapi()
|
||||
|
||||
# Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
|
||||
# Explicitly exclude certain endpoints that are operational or UI-only convenience
|
||||
excluded_endpoints = [
|
||||
'/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint
|
||||
'/server_info', # Operational/system diagnostics
|
||||
'/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience
|
||||
'/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience
|
||||
|
||||
@@ -1032,27 +1032,17 @@ class TestLiveStatusAppConversationService:
|
||||
assert agent_context.system_message_suffix is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_with_skills(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_with_skills(self):
|
||||
"""Test _finalize_conversation_request with skills loading."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
conversation_id = uuid4()
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
@@ -1061,9 +1051,7 @@ class TestLiveStatusAppConversationService:
|
||||
remote_workspace = Mock(spec=AsyncRemoteWorkspace)
|
||||
|
||||
# Mock the skills loading method
|
||||
self.service._load_skills_and_update_agent = AsyncMock(
|
||||
return_value=mock_updated_agent
|
||||
)
|
||||
self.service._load_skills_and_update_agent = AsyncMock(return_value=mock_agent)
|
||||
|
||||
# Act
|
||||
result = await self.service._finalize_conversation_request(
|
||||
@@ -1082,44 +1070,24 @@ class TestLiveStatusAppConversationService:
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert result.conversation_id == conversation_id
|
||||
assert result.agent == mock_updated_agent
|
||||
assert result.workspace == workspace
|
||||
assert result.initial_message == initial_message
|
||||
assert result.secrets == secrets
|
||||
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
self.mock_user.id, conversation_id, mock_agent
|
||||
)
|
||||
self.service._load_skills_and_update_agent.assert_called_once_with(
|
||||
self.mock_sandbox,
|
||||
mock_updated_agent,
|
||||
remote_workspace,
|
||||
'test_repo',
|
||||
'/test/dir',
|
||||
)
|
||||
self.service._load_skills_and_update_agent.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_without_skills(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_without_skills(self):
|
||||
"""Test _finalize_conversation_request without remote workspace (no skills)."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -1141,31 +1109,18 @@ class TestLiveStatusAppConversationService:
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert isinstance(result.conversation_id, UUID)
|
||||
assert result.agent == mock_updated_agent
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_skills_loading_fails(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_skills_loading_fails(self):
|
||||
"""Test _finalize_conversation_request when skills loading fails."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -1195,9 +1150,6 @@ class TestLiveStatusAppConversationService:
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert (
|
||||
result.agent == mock_updated_agent
|
||||
) # Should still use the experiment-modified agent
|
||||
mock_logger.warning.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -2266,12 +2218,7 @@ class TestPluginHandling:
|
||||
assert 'key2: value2' in text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_with_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_with_plugins(self):
|
||||
"""Test _finalize_conversation_request passes plugins list to StartConversationRequest."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2282,13 +2229,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -2330,25 +2277,20 @@ class TestPluginHandling:
|
||||
assert '- api_key: test123' in result.initial_message.content[0].text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_without_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_without_plugins(self):
|
||||
"""Test _finalize_conversation_request without plugins sets plugins to None."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2373,12 +2315,7 @@ class TestPluginHandling:
|
||||
assert result.plugins is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_plugin_without_ref(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_plugin_without_ref(self):
|
||||
"""Test _finalize_conversation_request with plugin that has no ref."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2389,13 +2326,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2428,12 +2365,7 @@ class TestPluginHandling:
|
||||
assert result.initial_message is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_plugin_with_repo_path(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_plugin_with_repo_path(self):
|
||||
"""Test _finalize_conversation_request passes repo_path to PluginSource."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2444,13 +2376,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2488,12 +2420,7 @@ class TestPluginHandling:
|
||||
assert result.plugins[0].repo_path == 'plugins/city-weather'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_multiple_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_multiple_plugins(self):
|
||||
"""Test _finalize_conversation_request with multiple plugins."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2504,13 +2431,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
|
||||
@@ -1,264 +0,0 @@
|
||||
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock, patch
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
|
||||
LiveStatusAppConversationService,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.llm import LLM
|
||||
|
||||
|
||||
class TestExperimentManager:
|
||||
"""Test cases for ExperimentManager class."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
|
||||
"""Test that the base ExperimentManager returns the agent unchanged."""
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
assert result is self.mock_agent
|
||||
assert result == self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_none_user_id(self):
|
||||
"""Test that the method works with None user_id."""
|
||||
# Act
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
None, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
|
||||
"""Test that the method works with different conversation IDs."""
|
||||
conversation_id_1 = uuid4()
|
||||
conversation_id_2 = uuid4()
|
||||
|
||||
# Act
|
||||
result_1 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_1, self.mock_agent
|
||||
)
|
||||
result_2 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_2, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result_1 is self.mock_agent
|
||||
assert result_2 is self.mock_agent
|
||||
|
||||
|
||||
class TestExperimentManagerIntegration:
|
||||
"""Integration tests for ExperimentManager with start_app_conversation."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
|
||||
def test_start_app_conversation_calls_experiment_manager_v1(
|
||||
self, mock_experiment_manager_impl
|
||||
):
|
||||
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
|
||||
# Arrange
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
|
||||
self.mock_agent
|
||||
)
|
||||
|
||||
# Create a mock service instance
|
||||
mock_service = Mock(spec=LiveStatusAppConversationService)
|
||||
|
||||
# Mock the _build_start_conversation_request_for_user method to simulate the call
|
||||
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
|
||||
# Simulate the part of the code that calls the experiment manager
|
||||
from uuid import uuid4
|
||||
|
||||
conversation_id = uuid4()
|
||||
|
||||
# This simulates the call that happens in the actual service
|
||||
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
assert result_agent == self.mock_agent
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
|
||||
self,
|
||||
):
|
||||
"""
|
||||
Test that ExperimentManagerImpl.run_agent_variant_tests__v1 is called with correct parameters
|
||||
and returns the same agent instance (no copy/mutation) when building a StartConversationRequest.
|
||||
"""
|
||||
# --- Arrange: fixed UUID to assert call parameters deterministically
|
||||
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
|
||||
|
||||
# Create a stable Agent (and LLM) we can identity-check later
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser for this test
|
||||
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
mock_agent.model_copy = Mock(return_value=mock_agent)
|
||||
|
||||
# Minimal, real-ish user context used by the service
|
||||
class DummyUserContext:
|
||||
async def get_user_info(self):
|
||||
# confirmation_mode=False -> NeverConfirm()
|
||||
return SimpleNamespace(
|
||||
id='test_user_123',
|
||||
llm_model='gpt-4',
|
||||
llm_base_url=None,
|
||||
llm_api_key=None,
|
||||
confirmation_mode=False,
|
||||
condenser_max_size=None,
|
||||
security_analyzer=None,
|
||||
)
|
||||
|
||||
async def get_secrets(self):
|
||||
return {}
|
||||
|
||||
async def get_latest_token(self, provider):
|
||||
return None
|
||||
|
||||
async def get_user_id(self):
|
||||
return 'test_user_123'
|
||||
|
||||
user_context = DummyUserContext()
|
||||
|
||||
# The service requires a lot of deps, but for this test we won't exercise them.
|
||||
app_conversation_info_service = Mock()
|
||||
app_conversation_start_task_service = Mock()
|
||||
event_callback_service = Mock()
|
||||
sandbox_service = Mock()
|
||||
sandbox_spec_service = Mock()
|
||||
jwt_service = Mock()
|
||||
httpx_client = Mock()
|
||||
|
||||
event_service = Mock()
|
||||
|
||||
service = LiveStatusAppConversationService(
|
||||
init_git_in_empty_workspace=False,
|
||||
user_context=user_context,
|
||||
app_conversation_info_service=app_conversation_info_service,
|
||||
app_conversation_start_task_service=app_conversation_start_task_service,
|
||||
event_callback_service=event_callback_service,
|
||||
event_service=event_service,
|
||||
sandbox_service=sandbox_service,
|
||||
sandbox_spec_service=sandbox_spec_service,
|
||||
jwt_service=jwt_service,
|
||||
sandbox_startup_timeout=30,
|
||||
sandbox_startup_poll_frequency=1,
|
||||
httpx_client=httpx_client,
|
||||
web_url=None,
|
||||
openhands_provider_base_url=None,
|
||||
access_token_hard_timeout=None,
|
||||
)
|
||||
|
||||
sandbox = SandboxInfo(
|
||||
id='mock-sandbox-id',
|
||||
created_by_user_id='mock-user-id',
|
||||
sandbox_spec_id='mock-sandbox-spec-id',
|
||||
status=SandboxStatus.RUNNING,
|
||||
session_api_key='mock-session-api-key',
|
||||
)
|
||||
|
||||
# Patch the pieces invoked by the service
|
||||
with (
|
||||
patch.object(
|
||||
service,
|
||||
'_setup_secrets_for_git_providers',
|
||||
return_value={},
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_configure_llm_and_mcp',
|
||||
return_value=(mock_llm, {}),
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_create_agent_with_context',
|
||||
return_value=mock_agent,
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_load_skills_and_update_agent',
|
||||
return_value=mock_agent,
|
||||
),
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
|
||||
return_value=fixed_conversation_id,
|
||||
),
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
) as mock_experiment_manager,
|
||||
):
|
||||
# Configure the experiment manager mock to return the same agent
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_agent
|
||||
)
|
||||
|
||||
# --- Act: build the start request
|
||||
start_req = await service._build_start_conversation_request_for_user(
|
||||
sandbox=sandbox,
|
||||
initial_message=None,
|
||||
system_message_suffix=None, # No additional system message suffix
|
||||
git_provider=None, # Keep secrets path simple
|
||||
working_dir='/tmp/project', # Arbitrary path
|
||||
)
|
||||
|
||||
# --- Assert: verify experiment manager was called with correct parameters
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
'test_user_123', # user_id
|
||||
fixed_conversation_id, # conversation_id
|
||||
mock_agent, # agent (after model_copy with agent_context)
|
||||
)
|
||||
|
||||
# The agent in the StartConversationRequest is the *same* object returned by experiment manager
|
||||
assert start_req.agent is mock_agent
|
||||
|
||||
# No tweaks to agent fields by the experiment manager (noop)
|
||||
assert start_req.agent.llm is mock_llm
|
||||
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'
|
||||
Reference in New Issue
Block a user