From 3d4cb89441a73740df48c5217d8dd7414077d58e Mon Sep 17 00:00:00 2001
From: Tim O'Farrell <tofarr@gmail.com>
Date: Thu, 29 Jan 2026 15:03:06 -0700
Subject: [PATCH] fix(frontend): Support V1 conversations in MetricsModal
 (#12678)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../v1-conversation-service.api.ts            | 27 +++++++
 .../v1-conversation-service.types.ts          | 55 +++++++++++++-
 .../metrics-modal/metrics-modal.tsx           | 50 ++++++++++++-
 .../src/hooks/query/use-sandbox-metrics.ts    | 53 ++++++++++++++
 frontend/src/utils/conversation-metrics.ts    | 71 +++++++++++++++++++
 .../sandbox/remote_sandbox_service.py         | 12 +++-
 6 files changed, 265 insertions(+), 3 deletions(-)
 create mode 100644 frontend/src/hooks/query/use-sandbox-metrics.ts
 create mode 100644 frontend/src/utils/conversation-metrics.ts

diff --git a/frontend/src/api/conversation-service/v1-conversation-service.api.ts b/frontend/src/api/conversation-service/v1-conversation-service.api.ts
index e9e4315397..8f40424275 100644
--- a/frontend/src/api/conversation-service/v1-conversation-service.api.ts
+++ b/frontend/src/api/conversation-service/v1-conversation-service.api.ts
@@ -12,6 +12,7 @@ import type {
   V1AppConversationStartTaskPage,
   V1AppConversation,
   GetSkillsResponse,
+  V1RuntimeConversationInfo,
 } from "./v1-conversation-service.types";
 
 class V1ConversationService {
@@ -360,6 +361,32 @@ class V1ConversationService {
     );
     return data;
   }
+
+  /**
+   * Get conversation info directly from the runtime for a V1 conversation
+   * Uses the custom runtime URL from the conversation
+   *
+   * @param conversationId The conversation ID
+   * @param conversationUrl The conversation URL (e.g., "http://localhost:54928/api/conversations/...")
+   * @param sessionApiKey Session API key for authentication (required for V1)
+   * @returns Conversation info from the runtime
+   */
+  static async getRuntimeConversation(
+    conversationId: string,
+    conversationUrl: string | null | undefined,
+    sessionApiKey?: string | null,
+  ): Promise<V1RuntimeConversationInfo> {
+    const url = this.buildRuntimeUrl(
+      conversationUrl,
+      `/api/conversations/${conversationId}`,
+    );
+    const headers = buildSessionHeaders(sessionApiKey);
+
+    const { data } = await axios.get<V1RuntimeConversationInfo>(url, {
+      headers,
+    });
+    return data;
+  }
 }
 
 export default V1ConversationService;
diff --git a/frontend/src/api/conversation-service/v1-conversation-service.types.ts b/frontend/src/api/conversation-service/v1-conversation-service.types.ts
index 35684efcc0..bf37b8ef2d 100644
--- a/frontend/src/api/conversation-service/v1-conversation-service.types.ts
+++ b/frontend/src/api/conversation-service/v1-conversation-service.types.ts
@@ -2,6 +2,22 @@ import { ConversationTrigger } from "../open-hands.types";
 import { Provider } from "#/types/settings";
 import { V1SandboxStatus } from "../sandbox-service/sandbox-service.types";
 
+// V1 Metrics Types
+export interface V1TokenUsage {
+  prompt_tokens: number;
+  completion_tokens: number;
+  cache_read_tokens: number;
+  cache_write_tokens: number;
+  context_window: number;
+  per_turn_token: number;
+}
+
+export interface V1MetricsSnapshot {
+  accumulated_cost: number | null;
+  max_budget_per_task: number | null;
+  accumulated_token_usage: V1TokenUsage | null;
+}
+
 // V1 API Types for requests
 // These types match the SDK's TextContent and ImageContent formats
 export interface V1TextContent {
@@ -91,7 +107,7 @@ export interface V1AppConversation {
   trigger: ConversationTrigger | null;
   pr_number: number[];
   llm_model: string | null;
-  metrics: unknown | null;
+  metrics: V1MetricsSnapshot | null;
   created_at: string;
   updated_at: string;
   sandbox_status: V1SandboxStatus;
@@ -111,3 +127,40 @@ export interface Skill {
 export interface GetSkillsResponse {
   skills: Skill[];
 }
+
+// Runtime conversation types (from agent server)
+export interface V1RuntimeConversationStats {
+  usage_to_metrics: Record<string, V1RuntimeMetrics>;
+}
+
+export interface V1RuntimeMetrics {
+  model_name: string;
+  accumulated_cost: number;
+  max_budget_per_task: number | null;
+  accumulated_token_usage: V1TokenUsage | null;
+  costs: V1Cost[];
+  response_latencies: V1ResponseLatency[];
+  token_usages: V1TokenUsage[];
+}
+
+export interface V1Cost {
+  model: string;
+  cost: number;
+  timestamp: number;
+}
+
+export interface V1ResponseLatency {
+  model: string;
+  latency: number;
+  response_id: string;
+}
+
+export interface V1RuntimeConversationInfo {
+  id: string;
+  title: string | null;
+  metrics: V1MetricsSnapshot | null;
+  created_at: string;
+  updated_at: string;
+  status: V1ConversationExecutionStatus;
+  stats: V1RuntimeConversationStats;
+}
diff --git a/frontend/src/components/features/conversation/metrics-modal/metrics-modal.tsx b/frontend/src/components/features/conversation/metrics-modal/metrics-modal.tsx
index df794be537..42523a7c5c 100644
--- a/frontend/src/components/features/conversation/metrics-modal/metrics-modal.tsx
+++ b/frontend/src/components/features/conversation/metrics-modal/metrics-modal.tsx
@@ -1,3 +1,4 @@
+import { useMemo } from "react";
 import { useTranslation } from "react-i18next";
 import { BaseModalTitle } from "#/components/shared/modals/confirmation-modals/base-modal";
 import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
@@ -8,6 +9,8 @@ import { UsageSection } from "./usage-section";
 import { ContextWindowSection } from "./context-window-section";
 import { EmptyState } from "./empty-state";
 import useMetricsStore from "#/stores/metrics-store";
+import { useActiveConversation } from "#/hooks/query/use-active-conversation";
+import { useSandboxMetrics } from "#/hooks/query/use-sandbox-metrics";
 
 interface MetricsModalProps {
   isOpen: boolean;
@@ -16,7 +19,52 @@ interface MetricsModalProps {
 
 export function MetricsModal({ isOpen, onOpenChange }: MetricsModalProps) {
   const { t } = useTranslation();
-  const metrics = useMetricsStore();
+  const storeMetrics = useMetricsStore();
+  const { data: conversation } = useActiveConversation();
+
+  const isV1 = conversation?.conversation_version === "V1";
+  const conversationId = conversation?.conversation_id;
+  const conversationUrl = conversation?.url;
+  const sessionApiKey = conversation?.session_api_key;
+
+  // For V1 conversations, fetch metrics directly from the sandbox
+  // Only fetch when the modal is open to avoid unnecessary requests
+  const { data: sandboxMetrics } = useSandboxMetrics(
+    conversationId,
+    conversationUrl,
+    sessionApiKey,
+    isV1 && isOpen, // Only enable when modal is open
+  );
+
+  // Compute the metrics based on conversation version
+  const metrics = useMemo(() => {
+    if (isV1 && sandboxMetrics) {
+      return {
+        cost: sandboxMetrics.accumulated_cost,
+        max_budget_per_task: sandboxMetrics.max_budget_per_task,
+        usage: sandboxMetrics.accumulated_token_usage
+          ? {
+              prompt_tokens:
+                sandboxMetrics.accumulated_token_usage.prompt_tokens ?? 0,
+              completion_tokens:
+                sandboxMetrics.accumulated_token_usage.completion_tokens ?? 0,
+              cache_read_tokens:
+                sandboxMetrics.accumulated_token_usage.cache_read_tokens ?? 0,
+              cache_write_tokens:
+                sandboxMetrics.accumulated_token_usage.cache_write_tokens ?? 0,
+              context_window:
+                sandboxMetrics.accumulated_token_usage.context_window ?? 0,
+              per_turn_token:
+                sandboxMetrics.accumulated_token_usage.per_turn_token ?? 0,
+            }
+          : null,
+      };
+    }
+
+    // For non-V1 conversations, use the store metrics
+    return storeMetrics;
+  }, [isV1, sandboxMetrics, storeMetrics]);
+
   if (!isOpen) return null;
 
   return (
diff --git a/frontend/src/hooks/query/use-sandbox-metrics.ts b/frontend/src/hooks/query/use-sandbox-metrics.ts
new file mode 100644
index 0000000000..56053f0c33
--- /dev/null
+++ b/frontend/src/hooks/query/use-sandbox-metrics.ts
@@ -0,0 +1,53 @@
+import { useQuery } from "@tanstack/react-query";
+import V1ConversationService from "#/api/conversation-service/v1-conversation-service.api";
+import { getCombinedMetrics } from "#/utils/conversation-metrics";
+import type { V1MetricsSnapshot } from "#/api/conversation-service/v1-conversation-service.types";
+
+/**
+ * Hook to fetch metrics directly from the sandbox for V1 conversations
+ * @param conversationId The conversation ID
+ * @param conversationUrl The conversation URL from the active conversation
+ * @param sessionApiKey The session API key from the active conversation
+ * @param enabled Whether the query should be enabled (typically when modal is open and conversation is V1)
+ */
+export const useSandboxMetrics = (
+  conversationId: string | null | undefined,
+  conversationUrl: string | null | undefined,
+  sessionApiKey: string | null | undefined,
+  enabled: boolean = true,
+): {
+  data: V1MetricsSnapshot | undefined;
+  isLoading: boolean;
+  error: unknown;
+} => {
+  const query = useQuery({
+    queryKey: [
+      "sandbox-metrics",
+      conversationId,
+      conversationUrl,
+      sessionApiKey,
+    ],
+    queryFn: async () => {
+      if (!conversationId) throw new Error("Conversation ID is required");
+      const conversationInfo =
+        await V1ConversationService.getRuntimeConversation(
+          conversationId,
+          conversationUrl,
+          sessionApiKey,
+        );
+      return getCombinedMetrics(conversationInfo);
+    },
+    enabled:
+      enabled && !!conversationId && !!conversationUrl && !!sessionApiKey,
+    staleTime: 1000 * 30, // 30 seconds
+    gcTime: 1000 * 60 * 5, // 5 minutes
+    refetchInterval: 1000 * 30, // Refetch every 30 seconds
+    retry: false, // Don't retry on failure since this is a new endpoint
+  });
+
+  return {
+    data: query.data,
+    isLoading: query.isLoading,
+    error: query.error,
+  };
+};
diff --git a/frontend/src/utils/conversation-metrics.ts b/frontend/src/utils/conversation-metrics.ts
new file mode 100644
index 0000000000..69ef293174
--- /dev/null
+++ b/frontend/src/utils/conversation-metrics.ts
@@ -0,0 +1,71 @@
+import type {
+  V1MetricsSnapshot,
+  V1RuntimeConversationInfo,
+  V1TokenUsage,
+} from "#/api/conversation-service/v1-conversation-service.types";
+
+/**
+ * TypeScript equivalent of the get_combined_metrics method from the Python SDK
+ * Combines metrics from all LLM usage IDs in the conversation stats
+ */
+export function getCombinedMetrics(
+  conversationInfo: V1RuntimeConversationInfo,
+): V1MetricsSnapshot {
+  const { stats } = conversationInfo;
+
+  if (!stats?.usage_to_metrics) {
+    return {
+      accumulated_cost: 0,
+      max_budget_per_task: null,
+      accumulated_token_usage: null,
+    };
+  }
+
+  let totalCost = 0;
+  let maxBudgetPerTask: number | null = null;
+  let combinedTokenUsage: V1TokenUsage | null = null;
+
+  // Iterate through all metrics and combine them
+  for (const metrics of Object.values(stats.usage_to_metrics)) {
+    // Add up costs
+    totalCost += metrics.accumulated_cost;
+
+    // Keep the max budget per task if any is set
+    if (maxBudgetPerTask === null && metrics.max_budget_per_task !== null) {
+      maxBudgetPerTask = metrics.max_budget_per_task;
+    }
+
+    // Combine token usage
+    if (metrics.accumulated_token_usage) {
+      if (combinedTokenUsage === null) {
+        combinedTokenUsage = { ...metrics.accumulated_token_usage };
+      } else {
+        combinedTokenUsage = {
+          prompt_tokens:
+            combinedTokenUsage.prompt_tokens +
+            metrics.accumulated_token_usage.prompt_tokens,
+          completion_tokens:
+            combinedTokenUsage.completion_tokens +
+            metrics.accumulated_token_usage.completion_tokens,
+          cache_read_tokens:
+            combinedTokenUsage.cache_read_tokens +
+            metrics.accumulated_token_usage.cache_read_tokens,
+          cache_write_tokens:
+            combinedTokenUsage.cache_write_tokens +
+            metrics.accumulated_token_usage.cache_write_tokens,
+          context_window: Math.max(
+            combinedTokenUsage.context_window,
+            metrics.accumulated_token_usage.context_window,
+          ),
+          per_turn_token: metrics.accumulated_token_usage.per_turn_token, // Use the latest per_turn_token
+        };
+      }
+    }
+  }
+
+  return {
+    accumulated_cost: totalCost,
+    max_budget_per_task: maxBudgetPerTask,
+    accumulated_token_usage: combinedTokenUsage,
+  };
+}
diff --git a/openhands/app_server/sandbox/remote_sandbox_service.py b/openhands/app_server/sandbox/remote_sandbox_service.py
index 7249d2e38e..12284bf115 100644
--- a/openhands/app_server/sandbox/remote_sandbox_service.py
+++ b/openhands/app_server/sandbox/remote_sandbox_service.py
@@ -711,8 +711,18 @@ async def refresh_conversation(
 
         updated_conversation_info = ConversationInfo.model_validate(response.json())
 
-        # TODO: As of writing, ConversationInfo from AgentServer does not have a title to update...
         app_conversation_info.updated_at = updated_conversation_info.updated_at
+
+        # TODO: This is a temp fix - the agent server is storing metrics in a new format
+        # We should probably update the data structures and to store / display the more
+        # explicit metrics
+        try:
+            app_conversation_info.metrics = (
+                updated_conversation_info.stats.get_combined_metrics()
+            )
+        except Exception:
+            _logger.exception('error_updating_conversation_metrics', stack_info=True)
+
         # TODO: Update other appropriate attributes...
 
         await app_conversation_info_service.save_app_conversation_info(