From 6171395ef9194c0d3757ca6ad0c65997c95b47eb Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Thu, 17 Apr 2025 19:08:35 -0400
Subject: [PATCH] Refactor metrics handling to include condenser metrics
 (#7907)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../components/features/auth-modal.test.tsx   |  4 +-
 frontend/src/assets/branding/gitlab-logo.svg  |  2 +-
 openhands/controller/agent_controller.py      | 31 +++++++++++----
 tests/unit/test_agent_controller.py           | 38 ++++++++-----------
 4 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/frontend/__tests__/components/features/auth-modal.test.tsx b/frontend/__tests__/components/features/auth-modal.test.tsx
index 9a4d092ae8..3a2ff65ecb 100644
--- a/frontend/__tests__/components/features/auth-modal.test.tsx
+++ b/frontend/__tests__/components/features/auth-modal.test.tsx
@@ -31,7 +31,7 @@ describe("AuthModal", () => {
   it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
     const user = userEvent.setup();
     render(<AuthModal githubAuthUrl={null} appMode="saas" />);
-    
+
     const checkbox = screen.getByRole("checkbox");
     const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
     const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
@@ -62,4 +62,4 @@ describe("AuthModal", () => {
 
     expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
   });
-});
\ No newline at end of file
+});
diff --git a/frontend/src/assets/branding/gitlab-logo.svg b/frontend/src/assets/branding/gitlab-logo.svg
index bfe9f3a56f..ef9600c980 100644
--- a/frontend/src/assets/branding/gitlab-logo.svg
+++ b/frontend/src/assets/branding/gitlab-logo.svg
@@ -3,4 +3,4 @@
   <path d="M1 8L3.5 15.5L11 21L18.5 15.5L21 8" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
   <path d="M1 8L5.5 8L8.25 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
   <path d="M21 8L16.5 8L13.75 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 784c5b0da3..362e6d0c92 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -68,6 +68,7 @@ from openhands.events.observation import (
 )
 from openhands.events.serialization.event import event_to_trajectory, truncate_content
 from openhands.llm.llm import LLM
+from openhands.llm.metrics import Metrics, TokenUsage
 
 # note: RESUME is only available on web GUI
 TRAFFIC_CONTROL_REMINDER = (
@@ -1229,16 +1230,32 @@ class AgentController:
         Args:
             action: The action to attach metrics to
         """
-        metrics = self.agent.llm.metrics.copy()
+        # Get metrics from agent LLM
+        agent_metrics = self.agent.llm.metrics
 
-        # Include condenser metrics if they exist
+        # Get metrics from condenser LLM if it exists
+        condenser_metrics: TokenUsage | None = None
         if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
-            metrics.merge(self.agent.condenser.llm.metrics)
+            condenser_metrics = self.agent.condenser.llm.metrics
 
-        # Create a minimal metrics object with just what the frontend needs
-        metrics._token_usages = []
-        metrics._response_latencies = []
-        metrics._costs = []
+        # Create a new minimal metrics object with just what the frontend needs
+        metrics = Metrics(model_name=agent_metrics.model_name)
+
+        # Set accumulated cost (sum of agent and condenser costs)
+        metrics.accumulated_cost = agent_metrics.accumulated_cost
+        if condenser_metrics:
+            metrics.accumulated_cost += condenser_metrics.accumulated_cost
+
+        # Set accumulated token usage (sum of agent and condenser token usage)
+        # Use a deep copy to ensure we don't modify the original object
+        metrics._accumulated_token_usage = (
+            agent_metrics.accumulated_token_usage.model_copy(deep=True)
+        )
+        if condenser_metrics:
+            metrics._accumulated_token_usage = (
+                metrics._accumulated_token_usage
+                + condenser_metrics.accumulated_token_usage
+            )
 
         action.llm_metrics = metrics
 
diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 23c8792738..c44ee05108 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -1195,17 +1195,14 @@ async def test_action_metrics_copy(mock_agent):
     mock_agent.llm.metrics.accumulated_cost = 0.1
     assert last_action.llm_metrics.accumulated_cost == 0.07
 
+    await controller.close()
+
 
 @pytest.mark.asyncio
-async def test_condenser_metrics_included():
+async def test_condenser_metrics_included(mock_agent, test_event_stream):
     """Test that metrics from the condenser's LLM are included in the action metrics."""
-    # Setup
-    file_store = InMemoryFileStore({})
-    event_stream = EventStream(sid='test', file_store=file_store)
 
-    # Create agent with metrics
-    agent = MagicMock(spec=Agent)
-    agent.llm = MagicMock(spec=LLM)
+    # Set up agent metrics
     agent_metrics = Metrics(model_name='agent-model')
     agent_metrics.accumulated_cost = 0.05
     agent_metrics._accumulated_token_usage = TokenUsage(
@@ -1216,7 +1213,8 @@ async def test_condenser_metrics_included():
         cache_write_tokens=10,
         response_id='agent-accumulated',
     )
-    agent.llm.metrics = agent_metrics
+    mock_agent.llm.metrics = agent_metrics
+    mock_agent.name = 'TestAgent'
 
     # Create condenser with its own metrics
     condenser = MagicMock()
@@ -1233,32 +1231,27 @@ async def test_condenser_metrics_included():
     )
     condenser.llm.metrics = condenser_metrics
 
-    # Attach the condenser to the agent
-    agent.condenser = condenser
+    # Attach the condenser to the mock_agent
+    mock_agent.condenser = condenser
 
-    # Mock the system message to avoid ID issues
-    system_message = SystemMessageAction(content='Test system message')
-    system_message._source = EventSource.AGENT
-    system_message._id = -1
-    agent.get_system_message.return_value = system_message
-
-    # Mock agent step to return a CondensationAction
+    # Create a real CondensationAction
     action = CondensationAction(
         forgotten_events_start_id=1,
         forgotten_events_end_id=5,
         summary='Test summary',
         summary_offset=1,
     )
+    action._source = EventSource.AGENT  # Required for event_stream.add_event
 
     def agent_step_fn(state):
         return action
 
-    agent.step = agent_step_fn
+    mock_agent.step = agent_step_fn
 
     # Create controller with correct parameters
     controller = AgentController(
-        agent=agent,
-        event_stream=event_stream,
+        agent=mock_agent,
+        event_stream=test_event_stream,
         max_iterations=10,
         sid='test',
         confirmation_mode=False,
@@ -1270,15 +1263,14 @@ async def test_condenser_metrics_included():
     await controller._step()
 
     # Get the last event from event stream
-    events = list(event_stream.get_events())
+    events = list(test_event_stream.get_events())
     assert len(events) > 0
     last_action = events[-1]
 
     # Verify metrics were copied correctly
     assert last_action.llm_metrics is not None
 
-    # With the current implementation, only agent.llm.metrics are included
-    # This test will fail until we fix the implementation
+    # Verify that both agent and condenser metrics are included
     assert (
         last_action.llm_metrics.accumulated_cost == 0.08
     )  # 0.05 from agent + 0.03 from condenser