fix(metrics) Merge metrics of agent LLM and condenser LLM (#7890)

Co-authored-by: Carlos Freund <carlosfreund@gmail.com>
This commit is contained in:
Carlos Freund 2025-04-17 19:15:14 +02:00 committed by GitHub
parent 78e3f82de1
commit cc8b677f3e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 112 additions and 6 deletions

View File

@ -67,7 +67,6 @@ from openhands.events.observation import (
)
from openhands.events.serialization.event import event_to_trajectory, truncate_content
from openhands.llm.llm import LLM
from openhands.llm.metrics import Metrics
# note: RESUME is only available on web GUI
TRAFFIC_CONTROL_REMINDER = (
@ -1142,15 +1141,21 @@ class AgentController:
- accumulated_cost: The current total cost
- accumulated_token_usage: Accumulated token statistics across all API calls
This includes metrics from both the agent's LLM and the condenser's LLM if it exists.
Args:
action: The action to attach metrics to
"""
metrics = self.agent.llm.metrics.copy()
# Include condenser metrics if they exist
if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
metrics.merge(self.agent.condenser.llm.metrics)
# Create a minimal metrics object with just what the frontend needs
metrics = Metrics(model_name=self.agent.llm.metrics.model_name)
metrics.accumulated_cost = self.agent.llm.metrics.accumulated_cost
metrics._accumulated_token_usage = (
self.agent.llm.metrics.accumulated_token_usage
)
metrics._token_usages = []
metrics._response_latencies = []
metrics._costs = []
action.llm_metrics = metrics

View File

@ -1,3 +1,4 @@
import copy
import time
from pydantic import BaseModel, Field
@ -200,5 +201,9 @@ class Metrics:
logs += f'{key}: {value}\n'
return logs
def copy(self) -> 'Metrics':
"""Create a deep copy of the Metrics object."""
return copy.deepcopy(self)
def __repr__(self) -> str:
return f'Metrics({self.get()}'

View File

@ -1195,6 +1195,102 @@ async def test_action_metrics_copy(mock_agent):
mock_agent.llm.metrics.accumulated_cost = 0.1
assert last_action.llm_metrics.accumulated_cost == 0.07
@pytest.mark.asyncio
async def test_condenser_metrics_included():
"""Test that metrics from the condenser's LLM are included in the action metrics."""
# Setup
file_store = InMemoryFileStore({})
event_stream = EventStream(sid='test', file_store=file_store)
# Create agent with metrics
agent = MagicMock(spec=Agent)
agent.llm = MagicMock(spec=LLM)
agent_metrics = Metrics(model_name='agent-model')
agent_metrics.accumulated_cost = 0.05
agent_metrics._accumulated_token_usage = TokenUsage(
model='agent-model',
prompt_tokens=100,
completion_tokens=50,
cache_read_tokens=10,
cache_write_tokens=10,
response_id='agent-accumulated',
)
agent.llm.metrics = agent_metrics
# Create condenser with its own metrics
condenser = MagicMock()
condenser.llm = MagicMock(spec=LLM)
condenser_metrics = Metrics(model_name='condenser-model')
condenser_metrics.accumulated_cost = 0.03
condenser_metrics._accumulated_token_usage = TokenUsage(
model='condenser-model',
prompt_tokens=200,
completion_tokens=100,
cache_read_tokens=20,
cache_write_tokens=5000, # High cache_write value that should be preserved
response_id='condenser-accumulated',
)
condenser.llm.metrics = condenser_metrics
# Attach the condenser to the agent
agent.condenser = condenser
# Mock agent step to return a CondensationAction
action = CondensationAction(
forgotten_events_start_id=1,
forgotten_events_end_id=5,
summary='Test summary',
summary_offset=1,
)
def agent_step_fn(state):
return action
agent.step = agent_step_fn
# Create controller with correct parameters
controller = AgentController(
agent=agent,
event_stream=event_stream,
max_iterations=10,
sid='test',
confirmation_mode=False,
headless_mode=True,
)
# Execute one step
controller.state.agent_state = AgentState.RUNNING
await controller._step()
# Get the last event from event stream
events = list(event_stream.get_events())
assert len(events) > 0
last_action = events[-1]
# Verify metrics were copied correctly
assert last_action.llm_metrics is not None
# With the current implementation, only agent.llm.metrics are included
# This test will fail until we fix the implementation
assert (
last_action.llm_metrics.accumulated_cost == 0.08
) # 0.05 from agent + 0.03 from condenser
# The accumulated token usage should include both agent and condenser metrics
assert (
last_action.llm_metrics.accumulated_token_usage.prompt_tokens == 300
) # 100 + 200
assert (
last_action.llm_metrics.accumulated_token_usage.completion_tokens == 150
) # 50 + 100
assert (
last_action.llm_metrics.accumulated_token_usage.cache_read_tokens == 30
) # 10 + 20
assert (
last_action.llm_metrics.accumulated_token_usage.cache_write_tokens == 5010
) # 10 + 5000
await controller.close()