diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index d411df2bae..f16abc307b 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -67,7 +67,6 @@ from openhands.events.observation import ( ) from openhands.events.serialization.event import event_to_trajectory, truncate_content from openhands.llm.llm import LLM -from openhands.llm.metrics import Metrics # note: RESUME is only available on web GUI TRAFFIC_CONTROL_REMINDER = ( @@ -1142,15 +1141,21 @@ class AgentController: - accumulated_cost: The current total cost - accumulated_token_usage: Accumulated token statistics across all API calls + This includes metrics from both the agent's LLM and the condenser's LLM if it exists. + Args: action: The action to attach metrics to """ + metrics = self.agent.llm.metrics.copy() + + # Include condenser metrics if they exist + if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'): + metrics.merge(self.agent.condenser.llm.metrics) + # Create a minimal metrics object with just what the frontend needs - metrics = Metrics(model_name=self.agent.llm.metrics.model_name) - metrics.accumulated_cost = self.agent.llm.metrics.accumulated_cost - metrics._accumulated_token_usage = ( - self.agent.llm.metrics.accumulated_token_usage - ) + metrics._token_usages = [] + metrics._response_latencies = [] + metrics._costs = [] action.llm_metrics = metrics diff --git a/openhands/llm/metrics.py b/openhands/llm/metrics.py index 3dfa963bf1..6230c05c28 100644 --- a/openhands/llm/metrics.py +++ b/openhands/llm/metrics.py @@ -1,3 +1,4 @@ +import copy import time from pydantic import BaseModel, Field @@ -200,5 +201,9 @@ class Metrics: logs += f'{key}: {value}\n' return logs + def copy(self) -> 'Metrics': + """Create a deep copy of the Metrics object.""" + return copy.deepcopy(self) + def __repr__(self) -> str: return f'Metrics({self.get()}' diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 0722becd3c..67c870c7bd 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -1195,6 +1195,102 @@ async def test_action_metrics_copy(mock_agent): mock_agent.llm.metrics.accumulated_cost = 0.1 assert last_action.llm_metrics.accumulated_cost == 0.07 + +@pytest.mark.asyncio +async def test_condenser_metrics_included(): + """Test that metrics from the condenser's LLM are included in the action metrics.""" + # Setup + file_store = InMemoryFileStore({}) + event_stream = EventStream(sid='test', file_store=file_store) + + # Create agent with metrics + agent = MagicMock(spec=Agent) + agent.llm = MagicMock(spec=LLM) + agent_metrics = Metrics(model_name='agent-model') + agent_metrics.accumulated_cost = 0.05 + agent_metrics._accumulated_token_usage = TokenUsage( + model='agent-model', + prompt_tokens=100, + completion_tokens=50, + cache_read_tokens=10, + cache_write_tokens=10, + response_id='agent-accumulated', + ) + agent.llm.metrics = agent_metrics + + # Create condenser with its own metrics + condenser = MagicMock() + condenser.llm = MagicMock(spec=LLM) + condenser_metrics = Metrics(model_name='condenser-model') + condenser_metrics.accumulated_cost = 0.03 + condenser_metrics._accumulated_token_usage = TokenUsage( + model='condenser-model', + prompt_tokens=200, + completion_tokens=100, + cache_read_tokens=20, + cache_write_tokens=5000, # High cache_write value that should be preserved + response_id='condenser-accumulated', + ) + condenser.llm.metrics = condenser_metrics + + # Attach the condenser to the agent + agent.condenser = condenser + + # Mock agent step to return a CondensationAction + action = CondensationAction( + forgotten_events_start_id=1, + forgotten_events_end_id=5, + summary='Test summary', + summary_offset=1, + ) + + def agent_step_fn(state): + return action + + agent.step = agent_step_fn + + # Create controller with correct parameters + controller = AgentController( + agent=agent, + event_stream=event_stream, + max_iterations=10, + sid='test', + confirmation_mode=False, + headless_mode=True, + ) + + # Execute one step + controller.state.agent_state = AgentState.RUNNING + await controller._step() + + # Get the last event from event stream + events = list(event_stream.get_events()) + assert len(events) > 0 + last_action = events[-1] + + # Verify metrics were copied correctly + assert last_action.llm_metrics is not None + + # With the current implementation, only agent.llm.metrics are included + # This test will fail until we fix the implementation + assert ( + last_action.llm_metrics.accumulated_cost == 0.08 + ) # 0.05 from agent + 0.03 from condenser + + # The accumulated token usage should include both agent and condenser metrics + assert ( + last_action.llm_metrics.accumulated_token_usage.prompt_tokens == 300 + ) # 100 + 200 + assert ( + last_action.llm_metrics.accumulated_token_usage.completion_tokens == 150 + ) # 50 + 100 + assert ( + last_action.llm_metrics.accumulated_token_usage.cache_read_tokens == 30 + ) # 10 + 20 + assert ( + last_action.llm_metrics.accumulated_token_usage.cache_write_tokens == 5010 + ) # 10 + 5000 + await controller.close()