[Observability] add metadata to track llm request for sessions (#7381)

Co-authored-by: Robert Brennan <accounts@rbren.io>
2025-12-26 05:48:36 +08:00 · 2025-03-22 16:20:38 -04:00 · 2025-03-22 16:20:38 -04:00 · 6f9ced1c23
commit 6f9ced1c23
parent e255aa95fe
4 changed files with 25 additions and 0 deletions
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@ -119,6 +119,8 @@ class CodeActAgent(Agent):
            'messages': self.llm.format_messages_for_llm(messages),
        }
        params['tools'] = self.tools
+        # log to litellm proxy if possible
+        params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
        response = self.llm.completion(**params)
        actions = codeact_function_calling.response_to_actions(response)
        for action in actions:
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -605,6 +605,7 @@ class AgentController:
        llm = LLM(config=llm_config, retry_listener=self._notify_on_llm_retry)
        delegate_agent = agent_cls(llm=llm, config=agent_config)
        state = State(
+            session_id=self.id.removesuffix('-delegate'),
            inputs=action.inputs or {},
            local_iteration=0,
            iteration=self.state.iteration,
@ -873,6 +874,7 @@ class AgentController:
        # If state is None, we create a brand new state and still load the event stream so we can restore the history
        if state is None:
            self.state = State(
+                session_id=self.id.removesuffix('-delegate'),
                inputs={},
                max_iterations=max_iterations,
                confirmation_mode=confirmation_mode,
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@ -1,9 +1,11 @@
 import base64
+import os
 import pickle
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any

+import openhands
 from openhands.controller.state.task import RootTask
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.schema import AgentState
@ -71,6 +73,7 @@ class State:
    """

    root_task: RootTask = field(default_factory=RootTask)
+    session_id: str = ''
    # global iteration for the current task
    iteration: int = 0
    # local iteration for the current subtask
@ -201,3 +204,14 @@ class State:
            if isinstance(event, MessageAction) and event.source == EventSource.USER:
                return event
        return None
+
+    def to_llm_metadata(self, agent_name: str) -> dict:
+        return {
+            'session_id': self.session_id,
+            'trace_version': openhands.__version__,
+            'tags': [
+                f'agent:{agent_name}',
+                f'web_host:{os.environ.get("WEB_HOST", "unspecified")}',
+                f'openhands_version:{openhands.__version__}',
+            ],
+        }
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -235,9 +235,16 @@ class LLM(RetryMixin, DebugMixin):
            # NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial
            litellm.modify_params = self.config.modify_params

+            # if we're not using litellm proxy, remove the extra_body
+            if 'litellm_proxy' not in self.config.model:
+                kwargs.pop('extra_body', None)
+
            # Record start time for latency measurement
            start_time = time.time()
            # we don't support streaming here, thus we get a ModelResponse
+            logger.debug(
+                f'LLM: calling litellm completion with model: {self.config.model}, base_url: {self.config.base_url}, args: {args}, kwargs: {kwargs}'
+            )
            resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)

            # Calculate and record latency