mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix(metrics) Merge metrics of agent LLM and condenser LLM (#7890)
Co-authored-by: Carlos Freund <carlosfreund@gmail.com>
This commit is contained in:
parent
78e3f82de1
commit
cc8b677f3e
@ -67,7 +67,6 @@ from openhands.events.observation import (
|
||||
)
|
||||
from openhands.events.serialization.event import event_to_trajectory, truncate_content
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.llm.metrics import Metrics
|
||||
|
||||
# note: RESUME is only available on web GUI
|
||||
TRAFFIC_CONTROL_REMINDER = (
|
||||
@ -1142,15 +1141,21 @@ class AgentController:
|
||||
- accumulated_cost: The current total cost
|
||||
- accumulated_token_usage: Accumulated token statistics across all API calls
|
||||
|
||||
This includes metrics from both the agent's LLM and the condenser's LLM if it exists.
|
||||
|
||||
Args:
|
||||
action: The action to attach metrics to
|
||||
"""
|
||||
metrics = self.agent.llm.metrics.copy()
|
||||
|
||||
# Include condenser metrics if they exist
|
||||
if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
|
||||
metrics.merge(self.agent.condenser.llm.metrics)
|
||||
|
||||
# Create a minimal metrics object with just what the frontend needs
|
||||
metrics = Metrics(model_name=self.agent.llm.metrics.model_name)
|
||||
metrics.accumulated_cost = self.agent.llm.metrics.accumulated_cost
|
||||
metrics._accumulated_token_usage = (
|
||||
self.agent.llm.metrics.accumulated_token_usage
|
||||
)
|
||||
metrics._token_usages = []
|
||||
metrics._response_latencies = []
|
||||
metrics._costs = []
|
||||
|
||||
action.llm_metrics = metrics
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import time
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
@ -200,5 +201,9 @@ class Metrics:
|
||||
logs += f'{key}: {value}\n'
|
||||
return logs
|
||||
|
||||
def copy(self) -> 'Metrics':
|
||||
"""Create a deep copy of the Metrics object."""
|
||||
return copy.deepcopy(self)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'Metrics({self.get()}'
|
||||
|
||||
@ -1195,6 +1195,102 @@ async def test_action_metrics_copy(mock_agent):
|
||||
mock_agent.llm.metrics.accumulated_cost = 0.1
|
||||
assert last_action.llm_metrics.accumulated_cost == 0.07
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_condenser_metrics_included():
|
||||
"""Test that metrics from the condenser's LLM are included in the action metrics."""
|
||||
# Setup
|
||||
file_store = InMemoryFileStore({})
|
||||
event_stream = EventStream(sid='test', file_store=file_store)
|
||||
|
||||
# Create agent with metrics
|
||||
agent = MagicMock(spec=Agent)
|
||||
agent.llm = MagicMock(spec=LLM)
|
||||
agent_metrics = Metrics(model_name='agent-model')
|
||||
agent_metrics.accumulated_cost = 0.05
|
||||
agent_metrics._accumulated_token_usage = TokenUsage(
|
||||
model='agent-model',
|
||||
prompt_tokens=100,
|
||||
completion_tokens=50,
|
||||
cache_read_tokens=10,
|
||||
cache_write_tokens=10,
|
||||
response_id='agent-accumulated',
|
||||
)
|
||||
agent.llm.metrics = agent_metrics
|
||||
|
||||
# Create condenser with its own metrics
|
||||
condenser = MagicMock()
|
||||
condenser.llm = MagicMock(spec=LLM)
|
||||
condenser_metrics = Metrics(model_name='condenser-model')
|
||||
condenser_metrics.accumulated_cost = 0.03
|
||||
condenser_metrics._accumulated_token_usage = TokenUsage(
|
||||
model='condenser-model',
|
||||
prompt_tokens=200,
|
||||
completion_tokens=100,
|
||||
cache_read_tokens=20,
|
||||
cache_write_tokens=5000, # High cache_write value that should be preserved
|
||||
response_id='condenser-accumulated',
|
||||
)
|
||||
condenser.llm.metrics = condenser_metrics
|
||||
|
||||
# Attach the condenser to the agent
|
||||
agent.condenser = condenser
|
||||
|
||||
# Mock agent step to return a CondensationAction
|
||||
action = CondensationAction(
|
||||
forgotten_events_start_id=1,
|
||||
forgotten_events_end_id=5,
|
||||
summary='Test summary',
|
||||
summary_offset=1,
|
||||
)
|
||||
|
||||
def agent_step_fn(state):
|
||||
return action
|
||||
|
||||
agent.step = agent_step_fn
|
||||
|
||||
# Create controller with correct parameters
|
||||
controller = AgentController(
|
||||
agent=agent,
|
||||
event_stream=event_stream,
|
||||
max_iterations=10,
|
||||
sid='test',
|
||||
confirmation_mode=False,
|
||||
headless_mode=True,
|
||||
)
|
||||
|
||||
# Execute one step
|
||||
controller.state.agent_state = AgentState.RUNNING
|
||||
await controller._step()
|
||||
|
||||
# Get the last event from event stream
|
||||
events = list(event_stream.get_events())
|
||||
assert len(events) > 0
|
||||
last_action = events[-1]
|
||||
|
||||
# Verify metrics were copied correctly
|
||||
assert last_action.llm_metrics is not None
|
||||
|
||||
# With the current implementation, only agent.llm.metrics are included
|
||||
# This test will fail until we fix the implementation
|
||||
assert (
|
||||
last_action.llm_metrics.accumulated_cost == 0.08
|
||||
) # 0.05 from agent + 0.03 from condenser
|
||||
|
||||
# The accumulated token usage should include both agent and condenser metrics
|
||||
assert (
|
||||
last_action.llm_metrics.accumulated_token_usage.prompt_tokens == 300
|
||||
) # 100 + 200
|
||||
assert (
|
||||
last_action.llm_metrics.accumulated_token_usage.completion_tokens == 150
|
||||
) # 50 + 100
|
||||
assert (
|
||||
last_action.llm_metrics.accumulated_token_usage.cache_read_tokens == 30
|
||||
) # 10 + 20
|
||||
assert (
|
||||
last_action.llm_metrics.accumulated_token_usage.cache_write_tokens == 5010
|
||||
) # 10 + 5000
|
||||
|
||||
await controller.close()
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user