Refactor metrics handling to include condenser metrics (#7907)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-04-17 19:08:35 -04:00 · 2025-04-17 19:08:35 -04:00 · 6171395ef9
commit 6171395ef9
parent d270476d6c
4 changed files with 42 additions and 33 deletions
--- a/frontend/tests/components/features/auth-modal.test.tsx
+++ b/frontend/tests/components/features/auth-modal.test.tsx
@ -31,7 +31,7 @@ describe("AuthModal", () => {
  it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
    const user = userEvent.setup();
    render(<AuthModal githubAuthUrl={null} appMode="saas" />);
-    
+
    const checkbox = screen.getByRole("checkbox");
    const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
    const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
@ -62,4 +62,4 @@ describe("AuthModal", () => {

    expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
  });
-});
+});
--- a/frontend/src/assets/branding/gitlab-logo.svg
+++ b/frontend/src/assets/branding/gitlab-logo.svg
@ -3,4 +3,4 @@
  <path d="M1 8L3.5 15.5L11 21L18.5 15.5L21 8" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
  <path d="M1 8L5.5 8L8.25 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
  <path d="M21 8L16.5 8L13.75 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
-</svg>
+</svg>
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -68,6 +68,7 @@ from openhands.events.observation import (
 )
 from openhands.events.serialization.event import event_to_trajectory, truncate_content
 from openhands.llm.llm import LLM
+from openhands.llm.metrics import Metrics, TokenUsage

 # note: RESUME is only available on web GUI
 TRAFFIC_CONTROL_REMINDER = (
@ -1229,16 +1230,32 @@ class AgentController:
        Args:
            action: The action to attach metrics to
        """
-        metrics = self.agent.llm.metrics.copy()
+        # Get metrics from agent LLM
+        agent_metrics = self.agent.llm.metrics

-        # Include condenser metrics if they exist
+        # Get metrics from condenser LLM if it exists
+        condenser_metrics: TokenUsage | None = None
        if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
-            metrics.merge(self.agent.condenser.llm.metrics)
+            condenser_metrics = self.agent.condenser.llm.metrics

-        # Create a minimal metrics object with just what the frontend needs
-        metrics._token_usages = []
-        metrics._response_latencies = []
-        metrics._costs = []
+        # Create a new minimal metrics object with just what the frontend needs
+        metrics = Metrics(model_name=agent_metrics.model_name)
+
+        # Set accumulated cost (sum of agent and condenser costs)
+        metrics.accumulated_cost = agent_metrics.accumulated_cost
+        if condenser_metrics:
+            metrics.accumulated_cost += condenser_metrics.accumulated_cost
+
+        # Set accumulated token usage (sum of agent and condenser token usage)
+        # Use a deep copy to ensure we don't modify the original object
+        metrics._accumulated_token_usage = (
+            agent_metrics.accumulated_token_usage.model_copy(deep=True)
+        )
+        if condenser_metrics:
+            metrics._accumulated_token_usage = (
+                metrics._accumulated_token_usage
+                + condenser_metrics.accumulated_token_usage
+            )

        action.llm_metrics = metrics

--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@ -1195,17 +1195,14 @@ async def test_action_metrics_copy(mock_agent):
    mock_agent.llm.metrics.accumulated_cost = 0.1
    assert last_action.llm_metrics.accumulated_cost == 0.07

+    await controller.close()
+

@pytest.mark.asyncio
-async def test_condenser_metrics_included():
+async def test_condenser_metrics_included(mock_agent, test_event_stream):
    """Test that metrics from the condenser's LLM are included in the action metrics."""
-    # Setup
-    file_store = InMemoryFileStore({})
-    event_stream = EventStream(sid='test', file_store=file_store)

-    # Create agent with metrics
-    agent = MagicMock(spec=Agent)
-    agent.llm = MagicMock(spec=LLM)
+    # Set up agent metrics
    agent_metrics = Metrics(model_name='agent-model')
    agent_metrics.accumulated_cost = 0.05
    agent_metrics._accumulated_token_usage = TokenUsage(
@ -1216,7 +1213,8 @@ async def test_condenser_metrics_included():
        cache_write_tokens=10,
        response_id='agent-accumulated',
    )
-    agent.llm.metrics = agent_metrics
+    mock_agent.llm.metrics = agent_metrics
+    mock_agent.name = 'TestAgent'

    # Create condenser with its own metrics
    condenser = MagicMock()
@ -1233,32 +1231,27 @@ async def test_condenser_metrics_included():
    )
    condenser.llm.metrics = condenser_metrics

-    # Attach the condenser to the agent
-    agent.condenser = condenser
+    # Attach the condenser to the mock_agent
+    mock_agent.condenser = condenser

-    # Mock the system message to avoid ID issues
-    system_message = SystemMessageAction(content='Test system message')
-    system_message._source = EventSource.AGENT
-    system_message._id = -1
-    agent.get_system_message.return_value = system_message
-
-    # Mock agent step to return a CondensationAction
+    # Create a real CondensationAction
    action = CondensationAction(
        forgotten_events_start_id=1,
        forgotten_events_end_id=5,
        summary='Test summary',
        summary_offset=1,
    )
+    action._source = EventSource.AGENT  # Required for event_stream.add_event

    def agent_step_fn(state):
        return action

-    agent.step = agent_step_fn
+    mock_agent.step = agent_step_fn

    # Create controller with correct parameters
    controller = AgentController(
-        agent=agent,
-        event_stream=event_stream,
+        agent=mock_agent,
+        event_stream=test_event_stream,
        max_iterations=10,
        sid='test',
        confirmation_mode=False,
@ -1270,15 +1263,14 @@ async def test_condenser_metrics_included():
    await controller._step()

    # Get the last event from event stream
-    events = list(event_stream.get_events())
+    events = list(test_event_stream.get_events())
    assert len(events) > 0
    last_action = events[-1]

    # Verify metrics were copied correctly
    assert last_action.llm_metrics is not None

-    # With the current implementation, only agent.llm.metrics are included
-    # This test will fail until we fix the implementation
+    # Verify that both agent and condenser metrics are included
    assert (
        last_action.llm_metrics.accumulated_cost == 0.08
    )  # 0.05 from agent + 0.03 from condenser