From 6171395ef9194c0d3757ca6ad0c65997c95b47eb Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 17 Apr 2025 19:08:35 -0400 Subject: [PATCH] Refactor metrics handling to include condenser metrics (#7907) Co-authored-by: openhands --- .../components/features/auth-modal.test.tsx | 4 +- frontend/src/assets/branding/gitlab-logo.svg | 2 +- openhands/controller/agent_controller.py | 31 +++++++++++---- tests/unit/test_agent_controller.py | 38 ++++++++----------- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/frontend/__tests__/components/features/auth-modal.test.tsx b/frontend/__tests__/components/features/auth-modal.test.tsx index 9a4d092ae8..3a2ff65ecb 100644 --- a/frontend/__tests__/components/features/auth-modal.test.tsx +++ b/frontend/__tests__/components/features/auth-modal.test.tsx @@ -31,7 +31,7 @@ describe("AuthModal", () => { it("should only enable the identity provider buttons if the tos checkbox is checked", async () => { const user = userEvent.setup(); render(); - + const checkbox = screen.getByRole("checkbox"); const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" }); const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" }); @@ -62,4 +62,4 @@ describe("AuthModal", () => { expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true); }); -}); \ No newline at end of file +}); diff --git a/frontend/src/assets/branding/gitlab-logo.svg b/frontend/src/assets/branding/gitlab-logo.svg index bfe9f3a56f..ef9600c980 100644 --- a/frontend/src/assets/branding/gitlab-logo.svg +++ b/frontend/src/assets/branding/gitlab-logo.svg @@ -3,4 +3,4 @@ - \ No newline at end of file + diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 784c5b0da3..362e6d0c92 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -68,6 +68,7 @@ from openhands.events.observation import ( ) from openhands.events.serialization.event import event_to_trajectory, truncate_content from openhands.llm.llm import LLM +from openhands.llm.metrics import Metrics, TokenUsage # note: RESUME is only available on web GUI TRAFFIC_CONTROL_REMINDER = ( @@ -1229,16 +1230,32 @@ class AgentController: Args: action: The action to attach metrics to """ - metrics = self.agent.llm.metrics.copy() + # Get metrics from agent LLM + agent_metrics = self.agent.llm.metrics - # Include condenser metrics if they exist + # Get metrics from condenser LLM if it exists + condenser_metrics: TokenUsage | None = None if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'): - metrics.merge(self.agent.condenser.llm.metrics) + condenser_metrics = self.agent.condenser.llm.metrics - # Create a minimal metrics object with just what the frontend needs - metrics._token_usages = [] - metrics._response_latencies = [] - metrics._costs = [] + # Create a new minimal metrics object with just what the frontend needs + metrics = Metrics(model_name=agent_metrics.model_name) + + # Set accumulated cost (sum of agent and condenser costs) + metrics.accumulated_cost = agent_metrics.accumulated_cost + if condenser_metrics: + metrics.accumulated_cost += condenser_metrics.accumulated_cost + + # Set accumulated token usage (sum of agent and condenser token usage) + # Use a deep copy to ensure we don't modify the original object + metrics._accumulated_token_usage = ( + agent_metrics.accumulated_token_usage.model_copy(deep=True) + ) + if condenser_metrics: + metrics._accumulated_token_usage = ( + metrics._accumulated_token_usage + + condenser_metrics.accumulated_token_usage + ) action.llm_metrics = metrics diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 23c8792738..c44ee05108 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -1195,17 +1195,14 @@ async def test_action_metrics_copy(mock_agent): mock_agent.llm.metrics.accumulated_cost = 0.1 assert last_action.llm_metrics.accumulated_cost == 0.07 + await controller.close() + @pytest.mark.asyncio -async def test_condenser_metrics_included(): +async def test_condenser_metrics_included(mock_agent, test_event_stream): """Test that metrics from the condenser's LLM are included in the action metrics.""" - # Setup - file_store = InMemoryFileStore({}) - event_stream = EventStream(sid='test', file_store=file_store) - # Create agent with metrics - agent = MagicMock(spec=Agent) - agent.llm = MagicMock(spec=LLM) + # Set up agent metrics agent_metrics = Metrics(model_name='agent-model') agent_metrics.accumulated_cost = 0.05 agent_metrics._accumulated_token_usage = TokenUsage( @@ -1216,7 +1213,8 @@ async def test_condenser_metrics_included(): cache_write_tokens=10, response_id='agent-accumulated', ) - agent.llm.metrics = agent_metrics + mock_agent.llm.metrics = agent_metrics + mock_agent.name = 'TestAgent' # Create condenser with its own metrics condenser = MagicMock() @@ -1233,32 +1231,27 @@ async def test_condenser_metrics_included(): ) condenser.llm.metrics = condenser_metrics - # Attach the condenser to the agent - agent.condenser = condenser + # Attach the condenser to the mock_agent + mock_agent.condenser = condenser - # Mock the system message to avoid ID issues - system_message = SystemMessageAction(content='Test system message') - system_message._source = EventSource.AGENT - system_message._id = -1 - agent.get_system_message.return_value = system_message - - # Mock agent step to return a CondensationAction + # Create a real CondensationAction action = CondensationAction( forgotten_events_start_id=1, forgotten_events_end_id=5, summary='Test summary', summary_offset=1, ) + action._source = EventSource.AGENT # Required for event_stream.add_event def agent_step_fn(state): return action - agent.step = agent_step_fn + mock_agent.step = agent_step_fn # Create controller with correct parameters controller = AgentController( - agent=agent, - event_stream=event_stream, + agent=mock_agent, + event_stream=test_event_stream, max_iterations=10, sid='test', confirmation_mode=False, @@ -1270,15 +1263,14 @@ async def test_condenser_metrics_included(): await controller._step() # Get the last event from event stream - events = list(event_stream.get_events()) + events = list(test_event_stream.get_events()) assert len(events) > 0 last_action = events[-1] # Verify metrics were copied correctly assert last_action.llm_metrics is not None - # With the current implementation, only agent.llm.metrics are included - # This test will fail until we fix the implementation + # Verify that both agent and condenser metrics are included assert ( last_action.llm_metrics.accumulated_cost == 0.08 ) # 0.05 from agent + 0.03 from condenser