mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
V1: Experiment manager (#11388)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -1,18 +1,47 @@
|
|||||||
|
from uuid import UUID
|
||||||
|
|
||||||
from experiments.constants import (
|
from experiments.constants import (
|
||||||
ENABLE_EXPERIMENT_MANAGER,
|
ENABLE_EXPERIMENT_MANAGER,
|
||||||
|
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||||
)
|
)
|
||||||
from experiments.experiment_versions import (
|
from experiments.experiment_versions import (
|
||||||
handle_condenser_max_step_experiment,
|
handle_condenser_max_step_experiment,
|
||||||
handle_system_prompt_experiment,
|
handle_system_prompt_experiment,
|
||||||
)
|
)
|
||||||
|
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||||
|
handle_condenser_max_step_experiment__v1,
|
||||||
|
)
|
||||||
|
|
||||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
from openhands.experiments.experiment_manager import ExperimentManager
|
from openhands.experiments.experiment_manager import ExperimentManager
|
||||||
|
from openhands.sdk import Agent
|
||||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||||
|
|
||||||
|
|
||||||
class SaaSExperimentManager(ExperimentManager):
|
class SaaSExperimentManager(ExperimentManager):
|
||||||
|
@staticmethod
|
||||||
|
def run_agent_variant_tests__v1(
|
||||||
|
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||||
|
) -> Agent:
|
||||||
|
if not ENABLE_EXPERIMENT_MANAGER:
|
||||||
|
logger.info(
|
||||||
|
'experiment_manager:run_conversation_variant_test:skipped',
|
||||||
|
extra={'reason': 'experiment_manager_disabled'},
|
||||||
|
)
|
||||||
|
return agent
|
||||||
|
|
||||||
|
agent = handle_condenser_max_step_experiment__v1(
|
||||||
|
user_id, conversation_id, agent
|
||||||
|
)
|
||||||
|
|
||||||
|
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||||
|
agent = agent.model_copy(
|
||||||
|
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
|
||||||
|
)
|
||||||
|
|
||||||
|
return agent
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run_conversation_variant_test(
|
def run_conversation_variant_test(
|
||||||
user_id, conversation_id, conversation_settings
|
user_id, conversation_id, conversation_settings
|
||||||
|
|||||||
@@ -5,12 +5,18 @@ This module contains the handler for the condenser max step experiment that test
|
|||||||
different max_size values for the condenser configuration.
|
different max_size values for the condenser configuration.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
import posthog
|
import posthog
|
||||||
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
||||||
from server.constants import IS_FEATURE_ENV
|
from server.constants import IS_FEATURE_ENV
|
||||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||||
|
|
||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
|
from openhands.sdk import Agent
|
||||||
|
from openhands.sdk.context.condenser import (
|
||||||
|
LLMSummarizingCondenser,
|
||||||
|
)
|
||||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||||
|
|
||||||
|
|
||||||
@@ -190,3 +196,37 @@ def handle_condenser_max_step_experiment(
|
|||||||
return conversation_settings
|
return conversation_settings
|
||||||
|
|
||||||
return conversation_settings
|
return conversation_settings
|
||||||
|
|
||||||
|
|
||||||
|
def handle_condenser_max_step_experiment__v1(
|
||||||
|
user_id: str | None,
|
||||||
|
conversation_id: UUID,
|
||||||
|
agent: Agent,
|
||||||
|
) -> Agent:
|
||||||
|
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
|
||||||
|
|
||||||
|
if enabled_variant is None:
|
||||||
|
return agent
|
||||||
|
|
||||||
|
if enabled_variant == 'control':
|
||||||
|
condenser_max_size = 120
|
||||||
|
elif enabled_variant == 'treatment':
|
||||||
|
condenser_max_size = 80
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
'condenser_max_step_experiment:unknown_variant',
|
||||||
|
extra={
|
||||||
|
'user_id': user_id,
|
||||||
|
'convo_id': conversation_id,
|
||||||
|
'variant': enabled_variant,
|
||||||
|
'reason': 'unknown variant; returning original conversation settings',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return agent
|
||||||
|
|
||||||
|
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
|
||||||
|
condenser = LLMSummarizingCondenser(
|
||||||
|
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
|
||||||
|
)
|
||||||
|
|
||||||
|
return agent.model_copy(update={'condenser': condenser})
|
||||||
|
|||||||
24
enterprise/poetry.lock
generated
24
enterprise/poetry.lock
generated
@@ -5737,7 +5737,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-agent-server"
|
name = "openhands-agent-server"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -5759,8 +5759,8 @@ wsproto = ">=1.2.0"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-agent-server"
|
subdirectory = "openhands-agent-server"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5805,9 +5805,9 @@ memory-profiler = "^0.61.0"
|
|||||||
numpy = "*"
|
numpy = "*"
|
||||||
openai = "1.99.9"
|
openai = "1.99.9"
|
||||||
openhands-aci = "0.3.2"
|
openhands-aci = "0.3.2"
|
||||||
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-agent-server"}
|
openhands-agent-server = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-agent-server"}
|
||||||
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-sdk"}
|
openhands-sdk = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-sdk"}
|
||||||
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "512399d896521aee3131eea4bb59087fb9dfa243", subdirectory = "openhands-tools"}
|
openhands-tools = {git = "https://github.com/All-Hands-AI/agent-sdk.git", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e", subdirectory = "openhands-tools"}
|
||||||
opentelemetry-api = "^1.33.1"
|
opentelemetry-api = "^1.33.1"
|
||||||
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
|
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
|
||||||
pathspec = "^0.12.1"
|
pathspec = "^0.12.1"
|
||||||
@@ -5863,7 +5863,7 @@ url = ".."
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-sdk"
|
name = "openhands-sdk"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands SDK - Core functionality for building AI agents"
|
description = "OpenHands SDK - Core functionality for building AI agents"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -5887,13 +5887,13 @@ boto3 = ["boto3 (>=1.35.0)"]
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-sdk"
|
subdirectory = "openhands-sdk"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-tools"
|
name = "openhands-tools"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands Tools - Runtime tools for AI agents"
|
description = "OpenHands Tools - Runtime tools for AI agents"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -5914,8 +5914,8 @@ pydantic = ">=2.11.7"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-tools"
|
subdirectory = "openhands-tools"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
1
enterprise/tests/unit/experiments/__init__.py
Normal file
1
enterprise/tests/unit/experiments/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Unit tests for experiments module."""
|
||||||
@@ -0,0 +1,137 @@
|
|||||||
|
# tests/test_condenser_max_step_experiment_v1.py
|
||||||
|
|
||||||
|
from unittest.mock import patch
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from experiments.experiment_manager import SaaSExperimentManager
|
||||||
|
|
||||||
|
# SUT imports (update the module path if needed)
|
||||||
|
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||||
|
handle_condenser_max_step_experiment__v1,
|
||||||
|
)
|
||||||
|
from pydantic import SecretStr
|
||||||
|
|
||||||
|
from openhands.sdk import LLM, Agent
|
||||||
|
from openhands.sdk.context.condenser import LLMSummarizingCondenser
|
||||||
|
|
||||||
|
|
||||||
|
def make_agent() -> Agent:
|
||||||
|
"""Build a minimal valid Agent."""
|
||||||
|
llm = LLM(
|
||||||
|
usage_id='primary-llm',
|
||||||
|
model='provider/model',
|
||||||
|
api_key=SecretStr('sk-test'),
|
||||||
|
)
|
||||||
|
return Agent(llm=llm)
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_variant(monkeypatch, return_value):
|
||||||
|
"""Patch the internal variant getter to return a specific value."""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
|
||||||
|
lambda user_id, conv_id: return_value,
|
||||||
|
raising=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
|
||||||
|
_patch_variant(monkeypatch, 'control')
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
|
||||||
|
|
||||||
|
# Should be a new Agent instance with a condenser installed
|
||||||
|
assert result is not agent
|
||||||
|
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||||
|
|
||||||
|
# The condenser should have its own LLM (usage_id overridden to "condenser")
|
||||||
|
assert result.condenser.llm.usage_id == 'condenser'
|
||||||
|
# The original agent LLM remains unchanged
|
||||||
|
assert agent.llm.usage_id == 'primary-llm'
|
||||||
|
|
||||||
|
# Control: max_size = 120, keep_first = 4
|
||||||
|
assert result.condenser.max_size == 120
|
||||||
|
assert result.condenser.keep_first == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
|
||||||
|
_patch_variant(monkeypatch, 'treatment')
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
|
||||||
|
|
||||||
|
assert result is not agent
|
||||||
|
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||||
|
assert result.condenser.llm.usage_id == 'condenser'
|
||||||
|
assert result.condenser.max_size == 80
|
||||||
|
assert result.condenser.keep_first == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
|
||||||
|
_patch_variant(monkeypatch, None)
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
|
||||||
|
|
||||||
|
# No changes—same instance and no condenser attribute added
|
||||||
|
assert result is agent
|
||||||
|
assert getattr(result, 'condenser', None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
|
||||||
|
_patch_variant(monkeypatch, 'weird-variant')
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
|
||||||
|
|
||||||
|
assert result is agent
|
||||||
|
assert getattr(result, 'condenser', None) is None
|
||||||
|
|
||||||
|
|
||||||
|
@patch('experiments.experiment_manager.handle_condenser_max_step_experiment__v1')
|
||||||
|
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
|
||||||
|
def test_run_agent_variant_tests_v1_noop_when_manager_disabled(
|
||||||
|
mock_handle_condenser,
|
||||||
|
):
|
||||||
|
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
result = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
user_id='user-123',
|
||||||
|
conversation_id=conv_id,
|
||||||
|
agent=agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Same object returned (no copy)
|
||||||
|
assert result is agent
|
||||||
|
# Handler should not have been called
|
||||||
|
mock_handle_condenser.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||||
|
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||||
|
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
|
||||||
|
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
|
||||||
|
agent = make_agent()
|
||||||
|
conv_id = uuid4()
|
||||||
|
|
||||||
|
_patch_variant(monkeypatch, 'treatment')
|
||||||
|
|
||||||
|
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
user_id='user-abc',
|
||||||
|
conversation_id=conv_id,
|
||||||
|
agent=agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should be a different instance than the original (copied after handler runs)
|
||||||
|
assert result is not agent
|
||||||
|
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
|
||||||
|
|
||||||
|
# The condenser returned by the handler must be preserved after the system-prompt override copy
|
||||||
|
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||||
|
assert result.condenser.max_size == 80
|
||||||
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from time import time
|
from time import time
|
||||||
from typing import AsyncGenerator, Sequence
|
from typing import AsyncGenerator, Sequence
|
||||||
from uuid import UUID
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
@@ -52,6 +52,7 @@ from openhands.app_server.services.injector import InjectorState
|
|||||||
from openhands.app_server.services.jwt_service import JwtService
|
from openhands.app_server.services.jwt_service import JwtService
|
||||||
from openhands.app_server.user.user_context import UserContext
|
from openhands.app_server.user.user_context import UserContext
|
||||||
from openhands.app_server.utils.async_remote_workspace import AsyncRemoteWorkspace
|
from openhands.app_server.utils.async_remote_workspace import AsyncRemoteWorkspace
|
||||||
|
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||||
from openhands.integrations.provider import ProviderType
|
from openhands.integrations.provider import ProviderType
|
||||||
from openhands.sdk import LocalWorkspace
|
from openhands.sdk import LocalWorkspace
|
||||||
from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret
|
from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret
|
||||||
@@ -458,10 +459,17 @@ class LiveStatusAppConversationService(GitAppConversationService):
|
|||||||
model=user.llm_model,
|
model=user.llm_model,
|
||||||
base_url=user.llm_base_url,
|
base_url=user.llm_base_url,
|
||||||
api_key=user.llm_api_key,
|
api_key=user.llm_api_key,
|
||||||
service_id='agent',
|
usage_id='agent',
|
||||||
)
|
)
|
||||||
agent = get_default_agent(llm=llm)
|
agent = get_default_agent(llm=llm)
|
||||||
|
|
||||||
|
conversation_id = uuid4()
|
||||||
|
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
|
||||||
|
user.id, conversation_id, agent
|
||||||
|
)
|
||||||
|
|
||||||
start_conversation_request = StartConversationRequest(
|
start_conversation_request = StartConversationRequest(
|
||||||
|
conversation_id=conversation_id,
|
||||||
agent=agent,
|
agent=agent,
|
||||||
workspace=workspace,
|
workspace=workspace,
|
||||||
confirmation_policy=AlwaysConfirm()
|
confirmation_policy=AlwaysConfirm()
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
|
from openhands.sdk import Agent
|
||||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||||
from openhands.server.shared import file_store
|
from openhands.server.shared import file_store
|
||||||
from openhands.storage.locations import get_experiment_config_filename
|
from openhands.storage.locations import get_experiment_config_filename
|
||||||
@@ -29,6 +31,12 @@ def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
|
|||||||
|
|
||||||
|
|
||||||
class ExperimentManager:
|
class ExperimentManager:
|
||||||
|
@staticmethod
|
||||||
|
def run_agent_variant_tests__v1(
|
||||||
|
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||||
|
) -> Agent:
|
||||||
|
return agent
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run_conversation_variant_test(
|
def run_conversation_variant_test(
|
||||||
user_id: str | None,
|
user_id: str | None,
|
||||||
|
|||||||
23
poetry.lock
generated
23
poetry.lock
generated
@@ -5711,8 +5711,11 @@ files = [
|
|||||||
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
|
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
|
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
|
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
|
||||||
|
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
|
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
|
||||||
|
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
|
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
|
||||||
|
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
|
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
|
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
|
||||||
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
|
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
|
||||||
@@ -7272,7 +7275,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-agent-server"
|
name = "openhands-agent-server"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -7294,13 +7297,13 @@ wsproto = ">=1.2.0"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-agent-server"
|
subdirectory = "openhands-agent-server"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-sdk"
|
name = "openhands-sdk"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands SDK - Core functionality for building AI agents"
|
description = "OpenHands SDK - Core functionality for building AI agents"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -7324,13 +7327,13 @@ boto3 = ["boto3 (>=1.35.0)"]
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-sdk"
|
subdirectory = "openhands-sdk"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openhands-tools"
|
name = "openhands-tools"
|
||||||
version = "1.0.0a2"
|
version = "1.0.0a3"
|
||||||
description = "OpenHands Tools - Runtime tools for AI agents"
|
description = "OpenHands Tools - Runtime tools for AI agents"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.12"
|
python-versions = ">=3.12"
|
||||||
@@ -7351,8 +7354,8 @@ pydantic = ">=2.11.7"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
url = "https://github.com/All-Hands-AI/agent-sdk.git"
|
||||||
reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
resolved_reference = "512399d896521aee3131eea4bb59087fb9dfa243"
|
resolved_reference = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e"
|
||||||
subdirectory = "openhands-tools"
|
subdirectory = "openhands-tools"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -16521,4 +16524,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = "^3.12,<3.14"
|
python-versions = "^3.12,<3.14"
|
||||||
content-hash = "03639ad9782d05163b25c507e7232d797572902ee57408bf999b72c21e3adf5e"
|
content-hash = "fd68ed845befeb646ee910db46f1ef9c5a1fd2e6d1ac6189c04864e0665f66ed"
|
||||||
|
|||||||
@@ -113,10 +113,10 @@ e2b-code-interpreter = { version = "^2.0.0", optional = true }
|
|||||||
pybase62 = "^1.0.0"
|
pybase62 = "^1.0.0"
|
||||||
|
|
||||||
# V1 dependencies
|
# V1 dependencies
|
||||||
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
openhands-agent-server = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-agent-server", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||||
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-sdk", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||||
# This refuses to install
|
# This refuses to install
|
||||||
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "512399d896521aee3131eea4bb59087fb9dfa243" }
|
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands-tools", rev = "8d8134ca5a87cc3e90e3ff968327a7f4c961e22e" }
|
||||||
python-jose = { version = ">=3.3", extras = [ "cryptography" ] }
|
python-jose = { version = ">=3.3", extras = [ "cryptography" ] }
|
||||||
sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" }
|
sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" }
|
||||||
pg8000 = "^1.31.5"
|
pg8000 = "^1.31.5"
|
||||||
|
|||||||
0
tests/unit/experiments/__init__.py
Normal file
0
tests/unit/experiments/__init__.py
Normal file
215
tests/unit/experiments/test_experiment_manager.py
Normal file
215
tests/unit/experiments/test_experiment_manager.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
|
||||||
|
LiveStatusAppConversationService,
|
||||||
|
)
|
||||||
|
from openhands.experiments.experiment_manager import ExperimentManager
|
||||||
|
from openhands.sdk import Agent
|
||||||
|
from openhands.sdk.llm import LLM
|
||||||
|
|
||||||
|
|
||||||
|
class TestExperimentManager:
|
||||||
|
"""Test cases for ExperimentManager class."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""Set up test fixtures."""
|
||||||
|
self.user_id = 'test_user_123'
|
||||||
|
self.conversation_id = uuid4()
|
||||||
|
|
||||||
|
# Create a mock LLM
|
||||||
|
self.mock_llm = Mock(spec=LLM)
|
||||||
|
self.mock_llm.model = 'gpt-4'
|
||||||
|
self.mock_llm.usage_id = 'agent'
|
||||||
|
|
||||||
|
# Create a mock Agent
|
||||||
|
self.mock_agent = Mock(spec=Agent)
|
||||||
|
self.mock_agent.llm = self.mock_llm
|
||||||
|
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||||
|
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||||
|
|
||||||
|
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
|
||||||
|
"""Test that the base ExperimentManager returns the agent unchanged."""
|
||||||
|
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
self.user_id, self.conversation_id, self.mock_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is self.mock_agent
|
||||||
|
assert result == self.mock_agent
|
||||||
|
|
||||||
|
def test_run_agent_variant_tests__v1_with_none_user_id(self):
|
||||||
|
"""Test that the method works with None user_id."""
|
||||||
|
# Act
|
||||||
|
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
None, self.conversation_id, self.mock_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is self.mock_agent
|
||||||
|
|
||||||
|
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
|
||||||
|
"""Test that the method works with different conversation IDs."""
|
||||||
|
conversation_id_1 = uuid4()
|
||||||
|
conversation_id_2 = uuid4()
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result_1 = ExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
self.user_id, conversation_id_1, self.mock_agent
|
||||||
|
)
|
||||||
|
result_2 = ExperimentManager.run_agent_variant_tests__v1(
|
||||||
|
self.user_id, conversation_id_2, self.mock_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result_1 is self.mock_agent
|
||||||
|
assert result_2 is self.mock_agent
|
||||||
|
|
||||||
|
|
||||||
|
class TestExperimentManagerIntegration:
|
||||||
|
"""Integration tests for ExperimentManager with start_app_conversation."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""Set up test fixtures."""
|
||||||
|
self.user_id = 'test_user_123'
|
||||||
|
self.conversation_id = uuid4()
|
||||||
|
|
||||||
|
# Create a mock LLM
|
||||||
|
self.mock_llm = Mock(spec=LLM)
|
||||||
|
self.mock_llm.model = 'gpt-4'
|
||||||
|
self.mock_llm.usage_id = 'agent'
|
||||||
|
|
||||||
|
# Create a mock Agent
|
||||||
|
self.mock_agent = Mock(spec=Agent)
|
||||||
|
self.mock_agent.llm = self.mock_llm
|
||||||
|
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||||
|
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||||
|
|
||||||
|
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
|
||||||
|
def test_start_app_conversation_calls_experiment_manager_v1(
|
||||||
|
self, mock_experiment_manager_impl
|
||||||
|
):
|
||||||
|
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
|
||||||
|
# Arrange
|
||||||
|
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
|
||||||
|
self.mock_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a mock service instance
|
||||||
|
mock_service = Mock(spec=LiveStatusAppConversationService)
|
||||||
|
|
||||||
|
# Mock the _build_start_conversation_request_for_user method to simulate the call
|
||||||
|
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
|
||||||
|
# Simulate the part of the code that calls the experiment manager
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
conversation_id = uuid4()
|
||||||
|
|
||||||
|
# This simulates the call that happens in the actual service
|
||||||
|
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
|
||||||
|
self.user_id, conversation_id, self.mock_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
|
||||||
|
self.user_id, conversation_id, self.mock_agent
|
||||||
|
)
|
||||||
|
assert result_agent == self.mock_agent
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
|
||||||
|
self,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Use the real LiveStatusAppConversationService to build a StartConversationRequest,
|
||||||
|
and verify ExperimentManagerImpl.run_agent_variant_tests__v1:
|
||||||
|
- is called exactly once with the (user_id, generated conversation_id, agent)
|
||||||
|
- returns the *same* agent instance (no copy/mutation)
|
||||||
|
- does not tweak agent fields (LLM, system prompt, etc.)
|
||||||
|
"""
|
||||||
|
# --- Arrange: fixed UUID to assert call parameters deterministically
|
||||||
|
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
|
||||||
|
|
||||||
|
# Create a stable Agent (and LLM) we can identity-check later
|
||||||
|
mock_llm = Mock(spec=LLM)
|
||||||
|
mock_llm.model = 'gpt-4'
|
||||||
|
mock_llm.usage_id = 'agent'
|
||||||
|
|
||||||
|
mock_agent = Mock(spec=Agent)
|
||||||
|
mock_agent.llm = mock_llm
|
||||||
|
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||||
|
|
||||||
|
# Minimal, real-ish user context used by the service
|
||||||
|
class DummyUserContext:
|
||||||
|
async def get_user_info(self):
|
||||||
|
# confirmation_mode=False -> NeverConfirm()
|
||||||
|
return SimpleNamespace(
|
||||||
|
id='test_user_123',
|
||||||
|
llm_model='gpt-4',
|
||||||
|
llm_base_url=None,
|
||||||
|
llm_api_key=None,
|
||||||
|
confirmation_mode=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def get_secrets(self):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def get_latest_token(self, provider):
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_user_id(self):
|
||||||
|
return 'test_user_123'
|
||||||
|
|
||||||
|
user_context = DummyUserContext()
|
||||||
|
|
||||||
|
# The service requires a lot of deps, but for this test we won't exercise them.
|
||||||
|
app_conversation_info_service = Mock()
|
||||||
|
app_conversation_start_task_service = Mock()
|
||||||
|
sandbox_service = Mock()
|
||||||
|
sandbox_spec_service = Mock()
|
||||||
|
jwt_service = Mock()
|
||||||
|
httpx_client = Mock()
|
||||||
|
|
||||||
|
service = LiveStatusAppConversationService(
|
||||||
|
init_git_in_empty_workspace=False,
|
||||||
|
user_context=user_context,
|
||||||
|
app_conversation_info_service=app_conversation_info_service,
|
||||||
|
app_conversation_start_task_service=app_conversation_start_task_service,
|
||||||
|
sandbox_service=sandbox_service,
|
||||||
|
sandbox_spec_service=sandbox_spec_service,
|
||||||
|
jwt_service=jwt_service,
|
||||||
|
sandbox_startup_timeout=30,
|
||||||
|
sandbox_startup_poll_frequency=1,
|
||||||
|
httpx_client=httpx_client,
|
||||||
|
web_url=None,
|
||||||
|
access_token_hard_timeout=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Patch the pieces invoked by the service
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
'openhands.app_server.app_conversation.live_status_app_conversation_service.get_default_agent',
|
||||||
|
return_value=mock_agent,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
|
||||||
|
return_value=fixed_conversation_id,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
# --- Act: build the start request
|
||||||
|
start_req = await service._build_start_conversation_request_for_user(
|
||||||
|
initial_message=None,
|
||||||
|
git_provider=None, # Keep secrets path simple
|
||||||
|
working_dir='/tmp/project', # Arbitrary path
|
||||||
|
)
|
||||||
|
|
||||||
|
# The agent in the StartConversationRequest is the *same* object we provided
|
||||||
|
assert start_req.agent is mock_agent
|
||||||
|
|
||||||
|
# No tweaks to agent fields by the experiment manager (noop)
|
||||||
|
assert start_req.agent.llm is mock_llm
|
||||||
|
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'
|
||||||
Reference in New Issue
Block a user