mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
from typing import Any, Self
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from openhands_cli.runner import ConversationRunner
|
|
from openhands_cli.user_actions.types import UserConfirmation
|
|
from pydantic import ConfigDict, SecretStr, model_validator
|
|
|
|
from openhands.sdk import Conversation, ConversationCallbackType, LocalConversation
|
|
from openhands.sdk.agent.base import AgentBase
|
|
from openhands.sdk.conversation import ConversationState
|
|
from openhands.sdk.conversation.state import ConversationExecutionStatus
|
|
from openhands.sdk.llm import LLM
|
|
from openhands.sdk.security.confirmation_policy import AlwaysConfirm, NeverConfirm
|
|
from unittest.mock import MagicMock
|
|
|
|
class FakeLLM(LLM):
|
|
@model_validator(mode='after')
|
|
def _set_env_side_effects(self) -> Self:
|
|
return self
|
|
|
|
|
|
def default_config() -> dict[str, Any]:
|
|
return {
|
|
'model': 'gpt-4o',
|
|
'api_key': SecretStr('test_key'),
|
|
'num_retries': 2,
|
|
'retry_min_wait': 1,
|
|
'retry_max_wait': 2,
|
|
}
|
|
|
|
|
|
class FakeAgent(AgentBase):
|
|
model_config = ConfigDict(frozen=False)
|
|
step_count: int = 0
|
|
finish_on_step: int | None = None
|
|
|
|
def init_state(
|
|
self, state: ConversationState, on_event: ConversationCallbackType
|
|
) -> None:
|
|
pass
|
|
|
|
def step(
|
|
self, conversation: LocalConversation, on_event: ConversationCallbackType
|
|
) -> None:
|
|
self.step_count += 1
|
|
if self.step_count == self.finish_on_step:
|
|
conversation.state.execution_status = ConversationExecutionStatus.FINISHED
|
|
|
|
|
|
@pytest.fixture()
|
|
def agent() -> FakeAgent:
|
|
llm = LLM(**default_config(), usage_id='test-service')
|
|
return FakeAgent(llm=llm, tools=[])
|
|
|
|
|
|
class TestConversationRunner:
|
|
@pytest.mark.parametrize(
|
|
'agent_status', [ConversationExecutionStatus.RUNNING, ConversationExecutionStatus.PAUSED]
|
|
)
|
|
def test_non_confirmation_mode_runs_once(
|
|
self, agent: FakeAgent, agent_status: ConversationExecutionStatus
|
|
) -> None:
|
|
"""
|
|
1. Confirmation mode is not on
|
|
2. Process message resumes paused conversation or continues running conversation
|
|
"""
|
|
|
|
convo = Conversation(agent)
|
|
convo.max_iteration_per_run = 1
|
|
convo.state.execution_status = agent_status
|
|
cr = ConversationRunner(convo)
|
|
cr.set_confirmation_policy(NeverConfirm())
|
|
cr.process_message(message=None)
|
|
|
|
assert agent.step_count == 1
|
|
assert (
|
|
convo.state.execution_status != ConversationExecutionStatus.PAUSED
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
'confirmation, final_status, expected_run_calls',
|
|
[
|
|
# Case 1: Agent waiting for confirmation; user DEFERS -> early return, no run()
|
|
(
|
|
UserConfirmation.DEFER,
|
|
ConversationExecutionStatus.WAITING_FOR_CONFIRMATION,
|
|
0,
|
|
),
|
|
# Case 2: Agent waiting for confirmation; user ACCEPTS -> run() once, break (finished=True)
|
|
(
|
|
UserConfirmation.ACCEPT,
|
|
ConversationExecutionStatus.FINISHED,
|
|
1,
|
|
),
|
|
],
|
|
)
|
|
def test_confirmation_mode_waiting_and_user_decision_controls_run(
|
|
self,
|
|
agent: FakeAgent,
|
|
confirmation: UserConfirmation,
|
|
final_status: ConversationExecutionStatus,
|
|
expected_run_calls: int,
|
|
) -> None:
|
|
"""
|
|
1. Agent may be paused but is waiting for consent on actions
|
|
2. If paused, we should have asked for confirmation on action
|
|
3. If not paused, we should still ask for confirmation on actions
|
|
4. If deferred no run call to agent should be made
|
|
5. If accepted, run call to agent should be made
|
|
"""
|
|
if final_status == ConversationExecutionStatus.FINISHED:
|
|
agent.finish_on_step = 1
|
|
|
|
convo = Conversation(agent)
|
|
|
|
# Set security analyzer using the new API to enable confirmation mode
|
|
convo.set_security_analyzer(MagicMock())
|
|
|
|
convo.state.execution_status = (
|
|
ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
|
|
)
|
|
cr = ConversationRunner(convo)
|
|
cr.set_confirmation_policy(AlwaysConfirm())
|
|
|
|
with patch.object(
|
|
cr, '_handle_confirmation_request', return_value=confirmation
|
|
) as mock_confirmation_request:
|
|
cr.process_message(message=None)
|
|
|
|
mock_confirmation_request.assert_called_once()
|
|
assert agent.step_count == expected_run_calls
|
|
assert convo.state.execution_status == final_status
|
|
|
|
def test_confirmation_mode_not_waiting__runs_once_when_finished_true(
|
|
self, agent: FakeAgent
|
|
) -> None:
|
|
"""
|
|
1. Agent was not waiting
|
|
2. Agent finished without any actions
|
|
3. Conversation should finished without asking user for instructions
|
|
"""
|
|
agent.finish_on_step = 1
|
|
convo = Conversation(agent)
|
|
convo.state.execution_status = ConversationExecutionStatus.PAUSED
|
|
|
|
cr = ConversationRunner(convo)
|
|
cr.set_confirmation_policy(AlwaysConfirm())
|
|
|
|
with patch.object(cr, '_handle_confirmation_request') as _mock_h:
|
|
cr.process_message(message=None)
|
|
|
|
# No confirmation was needed up front; we still expect exactly one run.
|
|
assert agent.step_count == 1
|
|
_mock_h.assert_not_called()
|