OpenHands/openhands-cli/tests/test_conversation_runner.py

156 lines
5.3 KiB
Python

from typing import Any, Self
from unittest.mock import patch
import pytest
from openhands_cli.runner import ConversationRunner
from openhands_cli.user_actions.types import UserConfirmation
from pydantic import ConfigDict, SecretStr, model_validator
from openhands.sdk import Conversation, ConversationCallbackType, LocalConversation
from openhands.sdk.agent.base import AgentBase
from openhands.sdk.conversation import ConversationState
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.llm import LLM
from openhands.sdk.security.confirmation_policy import AlwaysConfirm, NeverConfirm
from unittest.mock import MagicMock
class FakeLLM(LLM):
@model_validator(mode='after')
def _set_env_side_effects(self) -> Self:
return self
def default_config() -> dict[str, Any]:
return {
'model': 'gpt-4o',
'api_key': SecretStr('test_key'),
'num_retries': 2,
'retry_min_wait': 1,
'retry_max_wait': 2,
}
class FakeAgent(AgentBase):
model_config = ConfigDict(frozen=False)
step_count: int = 0
finish_on_step: int | None = None
def init_state(
self, state: ConversationState, on_event: ConversationCallbackType
) -> None:
pass
def step(
self, conversation: LocalConversation, on_event: ConversationCallbackType
) -> None:
self.step_count += 1
if self.step_count == self.finish_on_step:
conversation.state.execution_status = ConversationExecutionStatus.FINISHED
@pytest.fixture()
def agent() -> FakeAgent:
llm = LLM(**default_config(), usage_id='test-service')
return FakeAgent(llm=llm, tools=[])
class TestConversationRunner:
@pytest.mark.parametrize(
'agent_status', [ConversationExecutionStatus.RUNNING, ConversationExecutionStatus.PAUSED]
)
def test_non_confirmation_mode_runs_once(
self, agent: FakeAgent, agent_status: ConversationExecutionStatus
) -> None:
"""
1. Confirmation mode is not on
2. Process message resumes paused conversation or continues running conversation
"""
convo = Conversation(agent)
convo.max_iteration_per_run = 1
convo.state.execution_status = agent_status
cr = ConversationRunner(convo)
cr.set_confirmation_policy(NeverConfirm())
cr.process_message(message=None)
assert agent.step_count == 1
assert (
convo.state.execution_status != ConversationExecutionStatus.PAUSED
)
@pytest.mark.parametrize(
'confirmation, final_status, expected_run_calls',
[
# Case 1: Agent waiting for confirmation; user DEFERS -> early return, no run()
(
UserConfirmation.DEFER,
ConversationExecutionStatus.WAITING_FOR_CONFIRMATION,
0,
),
# Case 2: Agent waiting for confirmation; user ACCEPTS -> run() once, break (finished=True)
(
UserConfirmation.ACCEPT,
ConversationExecutionStatus.FINISHED,
1,
),
],
)
def test_confirmation_mode_waiting_and_user_decision_controls_run(
self,
agent: FakeAgent,
confirmation: UserConfirmation,
final_status: ConversationExecutionStatus,
expected_run_calls: int,
) -> None:
"""
1. Agent may be paused but is waiting for consent on actions
2. If paused, we should have asked for confirmation on action
3. If not paused, we should still ask for confirmation on actions
4. If deferred no run call to agent should be made
5. If accepted, run call to agent should be made
"""
if final_status == ConversationExecutionStatus.FINISHED:
agent.finish_on_step = 1
convo = Conversation(agent)
# Set security analyzer using the new API to enable confirmation mode
convo.set_security_analyzer(MagicMock())
convo.state.execution_status = (
ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
)
cr = ConversationRunner(convo)
cr.set_confirmation_policy(AlwaysConfirm())
with patch.object(
cr, '_handle_confirmation_request', return_value=confirmation
) as mock_confirmation_request:
cr.process_message(message=None)
mock_confirmation_request.assert_called_once()
assert agent.step_count == expected_run_calls
assert convo.state.execution_status == final_status
def test_confirmation_mode_not_waiting__runs_once_when_finished_true(
self, agent: FakeAgent
) -> None:
"""
1. Agent was not waiting
2. Agent finished without any actions
3. Conversation should finished without asking user for instructions
"""
agent.finish_on_step = 1
convo = Conversation(agent)
convo.state.execution_status = ConversationExecutionStatus.PAUSED
cr = ConversationRunner(convo)
cr.set_confirmation_policy(AlwaysConfirm())
with patch.object(cr, '_handle_confirmation_request') as _mock_h:
cr.process_message(message=None)
# No confirmation was needed up front; we still expect exactly one run.
assert agent.step_count == 1
_mock_h.assert_not_called()