fix(metrics) Merge metrics of agent LLM and condenser LLM (#7890)

Co-authored-by: Carlos Freund <carlosfreund@gmail.com>
2025-12-26 05:48:36 +08:00 · 2025-04-17 19:15:14 +02:00 · 2025-04-17 19:15:14 +02:00 · cc8b677f3e
commit cc8b677f3e
parent 78e3f82de1
3 changed files with 112 additions and 6 deletions
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -67,7 +67,6 @@ from openhands.events.observation import (
 )
 from openhands.events.serialization.event import event_to_trajectory, truncate_content
 from openhands.llm.llm import LLM
-from openhands.llm.metrics import Metrics

 # note: RESUME is only available on web GUI
 TRAFFIC_CONTROL_REMINDER = (
@ -1142,15 +1141,21 @@ class AgentController:
        - accumulated_cost: The current total cost
        - accumulated_token_usage: Accumulated token statistics across all API calls

+        This includes metrics from both the agent's LLM and the condenser's LLM if it exists.
+
        Args:
            action: The action to attach metrics to
        """
+        metrics = self.agent.llm.metrics.copy()
+
+        # Include condenser metrics if they exist
+        if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
+            metrics.merge(self.agent.condenser.llm.metrics)
+
        # Create a minimal metrics object with just what the frontend needs
-        metrics = Metrics(model_name=self.agent.llm.metrics.model_name)
-        metrics.accumulated_cost = self.agent.llm.metrics.accumulated_cost
-        metrics._accumulated_token_usage = (
-            self.agent.llm.metrics.accumulated_token_usage
-        )
+        metrics._token_usages = []
+        metrics._response_latencies = []
+        metrics._costs = []

        action.llm_metrics = metrics

--- a/openhands/llm/metrics.py
+++ b/openhands/llm/metrics.py
@ -1,3 +1,4 @@
+import copy
 import time

 from pydantic import BaseModel, Field
@ -200,5 +201,9 @@ class Metrics:
            logs += f'{key}: {value}\n'
        return logs

+    def copy(self) -> 'Metrics':
+        """Create a deep copy of the Metrics object."""
+        return copy.deepcopy(self)
+
    def __repr__(self) -> str:
        return f'Metrics({self.get()}'
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@ -1195,6 +1195,102 @@ async def test_action_metrics_copy(mock_agent):
    mock_agent.llm.metrics.accumulated_cost = 0.1
    assert last_action.llm_metrics.accumulated_cost == 0.07

+
+@pytest.mark.asyncio
+async def test_condenser_metrics_included():
+    """Test that metrics from the condenser's LLM are included in the action metrics."""
+    # Setup
+    file_store = InMemoryFileStore({})
+    event_stream = EventStream(sid='test', file_store=file_store)
+
+    # Create agent with metrics
+    agent = MagicMock(spec=Agent)
+    agent.llm = MagicMock(spec=LLM)
+    agent_metrics = Metrics(model_name='agent-model')
+    agent_metrics.accumulated_cost = 0.05
+    agent_metrics._accumulated_token_usage = TokenUsage(
+        model='agent-model',
+        prompt_tokens=100,
+        completion_tokens=50,
+        cache_read_tokens=10,
+        cache_write_tokens=10,
+        response_id='agent-accumulated',
+    )
+    agent.llm.metrics = agent_metrics
+
+    # Create condenser with its own metrics
+    condenser = MagicMock()
+    condenser.llm = MagicMock(spec=LLM)
+    condenser_metrics = Metrics(model_name='condenser-model')
+    condenser_metrics.accumulated_cost = 0.03
+    condenser_metrics._accumulated_token_usage = TokenUsage(
+        model='condenser-model',
+        prompt_tokens=200,
+        completion_tokens=100,
+        cache_read_tokens=20,
+        cache_write_tokens=5000,  # High cache_write value that should be preserved
+        response_id='condenser-accumulated',
+    )
+    condenser.llm.metrics = condenser_metrics
+
+    # Attach the condenser to the agent
+    agent.condenser = condenser
+
+    # Mock agent step to return a CondensationAction
+    action = CondensationAction(
+        forgotten_events_start_id=1,
+        forgotten_events_end_id=5,
+        summary='Test summary',
+        summary_offset=1,
+    )
+
+    def agent_step_fn(state):
+        return action
+
+    agent.step = agent_step_fn
+
+    # Create controller with correct parameters
+    controller = AgentController(
+        agent=agent,
+        event_stream=event_stream,
+        max_iterations=10,
+        sid='test',
+        confirmation_mode=False,
+        headless_mode=True,
+    )
+
+    # Execute one step
+    controller.state.agent_state = AgentState.RUNNING
+    await controller._step()
+
+    # Get the last event from event stream
+    events = list(event_stream.get_events())
+    assert len(events) > 0
+    last_action = events[-1]
+
+    # Verify metrics were copied correctly
+    assert last_action.llm_metrics is not None
+
+    # With the current implementation, only agent.llm.metrics are included
+    # This test will fail until we fix the implementation
+    assert (
+        last_action.llm_metrics.accumulated_cost == 0.08
+    )  # 0.05 from agent + 0.03 from condenser
+
+    # The accumulated token usage should include both agent and condenser metrics
+    assert (
+        last_action.llm_metrics.accumulated_token_usage.prompt_tokens == 300
+    )  # 100 + 200
+    assert (
+        last_action.llm_metrics.accumulated_token_usage.completion_tokens == 150
+    )  # 50 + 100
+    assert (
+        last_action.llm_metrics.accumulated_token_usage.cache_read_tokens == 30
+    )  # 10 + 20
+    assert (
+        last_action.llm_metrics.accumulated_token_usage.cache_write_tokens == 5010
+    )  # 10 + 5000
+
    await controller.close()