From b7a61901338cf8e1c9e40dd77da5ee3d5a5c9b29 Mon Sep 17 00:00:00 2001 From: Rohit Malhotra Date: Wed, 18 Jun 2025 20:25:01 -0400 Subject: [PATCH] Add max_budget_per_task to settings (#8812) Co-authored-by: openhands Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> --- .../conversation-panel.test.tsx | 2 + .../conversation-panel/budget-display.tsx | 34 ++++++++ .../budget-progress-bar.tsx | 27 +++++++ .../conversation-panel/budget-usage-text.tsx | 25 ++++++ .../conversation-panel/conversation-card.tsx | 7 +- .../src/hooks/mutation/use-save-settings.ts | 1 + frontend/src/hooks/query/use-settings.ts | 1 + frontend/src/i18n/declaration.ts | 5 ++ frontend/src/i18n/translation.json | 80 +++++++++++++++++++ frontend/src/mocks/handlers.ts | 1 + frontend/src/routes/app-settings.tsx | 33 +++++++- frontend/src/services/actions.ts | 1 + frontend/src/services/settings.ts | 1 + frontend/src/state/metrics-slice.ts | 3 + frontend/src/types/message.tsx | 1 + frontend/src/types/settings.ts | 2 + .../utils/__tests__/settings-utils.test.ts | 49 ++++++++++++ frontend/src/utils/settings-utils.ts | 18 +++++ openhands/controller/agent_controller.py | 7 +- openhands/events/serialization/event.py | 2 + openhands/llm/metrics.py | 16 ++++ openhands/server/session/session.py | 9 ++- openhands/storage/data_models/settings.py | 2 + 23 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 frontend/src/components/features/conversation-panel/budget-display.tsx create mode 100644 frontend/src/components/features/conversation-panel/budget-progress-bar.tsx create mode 100644 frontend/src/components/features/conversation-panel/budget-usage-text.tsx create mode 100644 frontend/src/utils/__tests__/settings-utils.test.ts diff --git a/frontend/__tests__/components/features/conversation-panel/conversation-panel.test.tsx b/frontend/__tests__/components/features/conversation-panel/conversation-panel.test.tsx index 8219d66d50..bf5ba79e79 100644 --- a/frontend/__tests__/components/features/conversation-panel/conversation-panel.test.tsx +++ b/frontend/__tests__/components/features/conversation-panel/conversation-panel.test.tsx @@ -23,6 +23,7 @@ describe("ConversationPanel", () => { preloadedState: { metrics: { cost: null, + max_budget_per_task: null, usage: null, }, }, @@ -273,6 +274,7 @@ describe("ConversationPanel", () => { preloadedState: { metrics: { cost: null, + max_budget_per_task: null, usage: null, }, }, diff --git a/frontend/src/components/features/conversation-panel/budget-display.tsx b/frontend/src/components/features/conversation-panel/budget-display.tsx new file mode 100644 index 0000000000..249aeacc51 --- /dev/null +++ b/frontend/src/components/features/conversation-panel/budget-display.tsx @@ -0,0 +1,34 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { I18nKey } from "#/i18n/declaration"; +import { BudgetProgressBar } from "./budget-progress-bar"; +import { BudgetUsageText } from "./budget-usage-text"; + +interface BudgetDisplayProps { + cost: number | null; + maxBudgetPerTask: number | null; +} + +export function BudgetDisplay({ cost, maxBudgetPerTask }: BudgetDisplayProps) { + const { t } = useTranslation(); + + // Don't render anything if cost is not available + if (cost === null) { + return null; + } + + return ( +
+ {maxBudgetPerTask !== null && maxBudgetPerTask > 0 ? ( + <> + + + + ) : ( + + {t(I18nKey.CONVERSATION$NO_BUDGET_LIMIT)} + + )} +
+ ); +} diff --git a/frontend/src/components/features/conversation-panel/budget-progress-bar.tsx b/frontend/src/components/features/conversation-panel/budget-progress-bar.tsx new file mode 100644 index 0000000000..17f3dac6e2 --- /dev/null +++ b/frontend/src/components/features/conversation-panel/budget-progress-bar.tsx @@ -0,0 +1,27 @@ +import React from "react"; + +interface BudgetProgressBarProps { + currentCost: number; + maxBudget: number; +} + +export function BudgetProgressBar({ + currentCost, + maxBudget, +}: BudgetProgressBarProps) { + const usagePercentage = (currentCost / maxBudget) * 100; + const isNearLimit = usagePercentage > 80; + + return ( +
+
+
+ ); +} diff --git a/frontend/src/components/features/conversation-panel/budget-usage-text.tsx b/frontend/src/components/features/conversation-panel/budget-usage-text.tsx new file mode 100644 index 0000000000..57cd79e0a5 --- /dev/null +++ b/frontend/src/components/features/conversation-panel/budget-usage-text.tsx @@ -0,0 +1,25 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { I18nKey } from "#/i18n/declaration"; + +interface BudgetUsageTextProps { + currentCost: number; + maxBudget: number; +} + +export function BudgetUsageText({ + currentCost, + maxBudget, +}: BudgetUsageTextProps) { + const { t } = useTranslation(); + const usagePercentage = (currentCost / maxBudget) * 100; + + return ( +
+ + ${currentCost.toFixed(4)} / ${maxBudget.toFixed(4)} ( + {usagePercentage.toFixed(2)}% {t(I18nKey.CONVERSATION$USED)}) + +
+ ); +} diff --git a/frontend/src/components/features/conversation-panel/conversation-card.tsx b/frontend/src/components/features/conversation-panel/conversation-card.tsx index e9c275d16d..38f790f548 100644 --- a/frontend/src/components/features/conversation-panel/conversation-card.tsx +++ b/frontend/src/components/features/conversation-panel/conversation-card.tsx @@ -9,6 +9,7 @@ import { EllipsisButton } from "./ellipsis-button"; import { ConversationCardContextMenu } from "./conversation-card-context-menu"; import { SystemMessageModal } from "./system-message-modal"; import { MicroagentsModal } from "./microagents-modal"; +import { BudgetDisplay } from "./budget-display"; import { cn } from "#/utils/utils"; import { BaseModal } from "../../shared/modals/base-modal/base-modal"; import { RootState } from "#/store"; @@ -285,7 +286,7 @@ export function ConversationCard({
{metrics?.cost !== null && ( -
+
{t(I18nKey.CONVERSATION$TOTAL_COST)} @@ -294,6 +295,10 @@ export function ConversationCard({
)} + {metrics?.usage !== null && ( <> diff --git a/frontend/src/hooks/mutation/use-save-settings.ts b/frontend/src/hooks/mutation/use-save-settings.ts index 6a86ace9f6..8c6a1f6b46 100644 --- a/frontend/src/hooks/mutation/use-save-settings.ts +++ b/frontend/src/hooks/mutation/use-save-settings.ts @@ -26,6 +26,7 @@ const saveSettingsMutationFn = async (settings: Partial) => { enable_proactive_conversation_starters: settings.ENABLE_PROACTIVE_CONVERSATION_STARTERS, search_api_key: settings.SEARCH_API_KEY?.trim() || "", + max_budget_per_task: settings.MAX_BUDGET_PER_TASK, }; await OpenHands.saveSettings(apiSettings); diff --git a/frontend/src/hooks/query/use-settings.ts b/frontend/src/hooks/query/use-settings.ts index e47877b077..9ac38e04d8 100644 --- a/frontend/src/hooks/query/use-settings.ts +++ b/frontend/src/hooks/query/use-settings.ts @@ -27,6 +27,7 @@ const getSettingsQueryFn = async (): Promise => { apiSettings.enable_proactive_conversation_starters, USER_CONSENTS_TO_ANALYTICS: apiSettings.user_consents_to_analytics, SEARCH_API_KEY: apiSettings.search_api_key || "", + MAX_BUDGET_PER_TASK: apiSettings.max_budget_per_task, EMAIL: apiSettings.email || "", EMAIL_VERIFIED: apiSettings.email_verified, MCP_CONFIG: apiSettings.mcp_config, diff --git a/frontend/src/i18n/declaration.ts b/frontend/src/i18n/declaration.ts index 58d64aacb1..61c6071faf 100644 --- a/frontend/src/i18n/declaration.ts +++ b/frontend/src/i18n/declaration.ts @@ -121,6 +121,8 @@ export enum I18nKey { SETTINGS$LLM_SETTINGS = "SETTINGS$LLM_SETTINGS", SETTINGS$GIT_SETTINGS = "SETTINGS$GIT_SETTINGS", SETTINGS$SOUND_NOTIFICATIONS = "SETTINGS$SOUND_NOTIFICATIONS", + SETTINGS$MAX_BUDGET_PER_TASK = "SETTINGS$MAX_BUDGET_PER_TASK", + SETTINGS$MAX_BUDGET_PER_CONVERSATION = "SETTINGS$MAX_BUDGET_PER_CONVERSATION", SETTINGS$PROACTIVE_CONVERSATION_STARTERS = "SETTINGS$PROACTIVE_CONVERSATION_STARTERS", SETTINGS$SEARCH_API_KEY = "SETTINGS$SEARCH_API_KEY", SETTINGS$SEARCH_API_KEY_OPTIONAL = "SETTINGS$SEARCH_API_KEY_OPTIONAL", @@ -494,6 +496,9 @@ export enum I18nKey { CONVERSATION$DOWNLOAD_ERROR = "CONVERSATION$DOWNLOAD_ERROR", CONVERSATION$UPDATED = "CONVERSATION$UPDATED", CONVERSATION$TOTAL_COST = "CONVERSATION$TOTAL_COST", + CONVERSATION$BUDGET = "CONVERSATION$BUDGET", + CONVERSATION$BUDGET_USAGE = "CONVERSATION$BUDGET_USAGE", + CONVERSATION$NO_BUDGET_LIMIT = "CONVERSATION$NO_BUDGET_LIMIT", CONVERSATION$INPUT = "CONVERSATION$INPUT", CONVERSATION$OUTPUT = "CONVERSATION$OUTPUT", CONVERSATION$TOTAL = "CONVERSATION$TOTAL", diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json index 3dc2c32f3c..470ef7736f 100644 --- a/frontend/src/i18n/translation.json +++ b/frontend/src/i18n/translation.json @@ -1935,6 +1935,38 @@ "tr": "Ses Bildirimleri", "uk": "Звукові сповіщення" }, + "SETTINGS$MAX_BUDGET_PER_TASK": { + "en": "Maximum Budget Per Task", + "ja": "タスクごとの最大予算", + "zh-CN": "每个任务的最大预算", + "zh-TW": "每個任務的最大預算", + "ko-KR": "작업당 최대 예산", + "de": "Maximales Budget pro Aufgabe", + "no": "Maksimalt budsjett per oppgave", + "it": "Budget massimo per attività", + "pt": "Orçamento máximo por tarefa", + "es": "Presupuesto máximo por tarea", + "ar": "الميزانية القصوى لكل مهمة", + "fr": "Budget maximum par tâche", + "tr": "Görev Başına Maksimum Bütçe", + "uk": "Максимальний бюджет на завдання" + }, + "SETTINGS$MAX_BUDGET_PER_CONVERSATION": { + "en": "Maximum Budget Per Conversation", + "ja": "会話ごとの最大予算", + "zh-CN": "每次对话的最大预算", + "zh-TW": "每次對話的最大預算", + "ko-KR": "대화당 최대 예산", + "de": "Maximales Budget pro Konversation", + "no": "Maksimalt budsjett per samtale", + "it": "Budget massimo per conversazione", + "pt": "Orçamento máximo por conversa", + "es": "Presupuesto máximo por conversación", + "ar": "الميزانية القصوى لكل محادثة", + "fr": "Budget maximum par conversation", + "tr": "Konuşma Başına Maksimum Bütçe", + "uk": "Максимальний бюджет на розмову" + }, "SETTINGS$PROACTIVE_CONVERSATION_STARTERS": { "en": "Suggest Tasks on GitHub", "ja": "GitHubでタスクを提案", @@ -7903,6 +7935,54 @@ "tr": "Toplam Maliyet", "uk": "Загальна вартість" }, + "CONVERSATION$BUDGET": { + "en": "Budget", + "ja": "予算", + "zh-CN": "预算", + "zh-TW": "預算", + "ko-KR": "예산", + "de": "Budget", + "no": "Budsjett", + "it": "Budget", + "pt": "Orçamento", + "es": "Presupuesto", + "ar": "الميزانية", + "fr": "Budget", + "tr": "Bütçe", + "uk": "Бюджет" + }, + "CONVERSATION$BUDGET_USAGE": { + "en": "% used", + "ja": "% 使用済み", + "zh-CN": "% 已使用", + "zh-TW": "% 已使用", + "ko-KR": "% 사용됨", + "de": "% verwendet", + "no": "% brukt", + "it": "% utilizzato", + "pt": "% utilizado", + "es": "% utilizado", + "ar": "% مستخدم", + "fr": "% utilisé", + "tr": "% kullanıldı", + "uk": "% використано" + }, + "CONVERSATION$NO_BUDGET_LIMIT": { + "en": "No budget limit", + "ja": "予算制限なし", + "zh-CN": "无预算限制", + "zh-TW": "無預算限制", + "ko-KR": "예산 제한 없음", + "de": "Kein Budgetlimit", + "no": "Ingen budsjettgrense", + "it": "Nessun limite di budget", + "pt": "Sem limite de orçamento", + "es": "Sin límite de presupuesto", + "ar": "لا حد للميزانية", + "fr": "Pas de limite de budget", + "tr": "Bütçe limiti yok", + "uk": "Без обмеження бюджету" + }, "CONVERSATION$INPUT": { "en": "- Input:", "ja": "- 入力:", diff --git a/frontend/src/mocks/handlers.ts b/frontend/src/mocks/handlers.ts index c123319dc0..928f19c754 100644 --- a/frontend/src/mocks/handlers.ts +++ b/frontend/src/mocks/handlers.ts @@ -30,6 +30,7 @@ export const MOCK_DEFAULT_USER_SETTINGS: ApiSettings | PostApiSettings = { enable_proactive_conversation_starters: DEFAULT_SETTINGS.ENABLE_PROACTIVE_CONVERSATION_STARTERS, user_consents_to_analytics: DEFAULT_SETTINGS.USER_CONSENTS_TO_ANALYTICS, + max_budget_per_task: DEFAULT_SETTINGS.MAX_BUDGET_PER_TASK, }; const MOCK_USER_PREFERENCES: { diff --git a/frontend/src/routes/app-settings.tsx b/frontend/src/routes/app-settings.tsx index fe435d552a..c0021d0efb 100644 --- a/frontend/src/routes/app-settings.tsx +++ b/frontend/src/routes/app-settings.tsx @@ -6,6 +6,7 @@ import { AvailableLanguages } from "#/i18n"; import { DEFAULT_SETTINGS } from "#/services/settings"; import { BrandButton } from "#/components/features/settings/brand-button"; import { SettingsSwitch } from "#/components/features/settings/settings-switch"; +import { SettingsInput } from "#/components/features/settings/settings-input"; import { I18nKey } from "#/i18n/declaration"; import { LanguageInput } from "#/components/features/settings/app-settings/language-input"; import { handleCaptureConsent } from "#/utils/handle-capture-consent"; @@ -16,6 +17,7 @@ import { import { retrieveAxiosErrorMessage } from "#/utils/retrieve-axios-error-message"; import { AppSettingsInputsSkeleton } from "#/components/features/settings/app-settings/app-settings-inputs-skeleton"; import { useConfig } from "#/hooks/query/use-config"; +import { parseMaxBudgetPerTask } from "#/utils/settings-utils"; function AppSettingsScreen() { const { t } = useTranslation(); @@ -36,6 +38,8 @@ function AppSettingsScreen() { proactiveConversationsSwitchHasChanged, setProactiveConversationsSwitchHasChanged, ] = React.useState(false); + const [maxBudgetPerTaskHasChanged, setMaxBudgetPerTaskHasChanged] = + React.useState(false); const formAction = (formData: FormData) => { const languageLabel = formData.get("language-input")?.toString(); @@ -53,12 +57,18 @@ function AppSettingsScreen() { formData.get("enable-proactive-conversations-switch")?.toString() === "on"; + const maxBudgetPerTaskValue = formData + .get("max-budget-per-task-input") + ?.toString(); + const maxBudgetPerTask = parseMaxBudgetPerTask(maxBudgetPerTaskValue || ""); + saveSettings( { LANGUAGE: language, user_consents_to_analytics: enableAnalytics, ENABLE_SOUND_NOTIFICATIONS: enableSoundNotifications, ENABLE_PROACTIVE_CONVERSATION_STARTERS: enableProactiveConversations, + MAX_BUDGET_PER_TASK: maxBudgetPerTask, }, { onSuccess: () => { @@ -74,6 +84,7 @@ function AppSettingsScreen() { setAnalyticsSwitchHasChanged(false); setSoundNotificationsSwitchHasChanged(false); setProactiveConversationsSwitchHasChanged(false); + setMaxBudgetPerTaskHasChanged(false); }, }, ); @@ -110,11 +121,18 @@ function AppSettingsScreen() { ); }; + const checkIfMaxBudgetPerTaskHasChanged = (value: string) => { + const newValue = parseMaxBudgetPerTask(value); + const currentValue = settings?.MAX_BUDGET_PER_TASK; + setMaxBudgetPerTaskHasChanged(newValue !== currentValue); + }; + const formIsClean = !languageInputHasChanged && !analyticsSwitchHasChanged && !soundNotificationsSwitchHasChanged && - !proactiveConversationsSwitchHasChanged; + !proactiveConversationsSwitchHasChanged && + !maxBudgetPerTaskHasChanged; const shouldBeLoading = !settings || isLoading || isPending; @@ -163,6 +181,19 @@ function AppSettingsScreen() { {t(I18nKey.SETTINGS$PROACTIVE_CONVERSATION_STARTERS)} )} + +
)} diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index 126b4dac40..f0c02086d1 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -22,6 +22,7 @@ export function handleActionMessage(message: ActionMessage) { if (message.llm_metrics) { const metrics = { cost: message.llm_metrics?.accumulated_cost ?? null, + max_budget_per_task: message.llm_metrics?.max_budget_per_task ?? null, usage: message.llm_metrics?.accumulated_token_usage ?? null, }; store.dispatch(setMetrics(metrics)); diff --git a/frontend/src/services/settings.ts b/frontend/src/services/settings.ts index 5890decf26..c1b46b46f1 100644 --- a/frontend/src/services/settings.ts +++ b/frontend/src/services/settings.ts @@ -19,6 +19,7 @@ export const DEFAULT_SETTINGS: Settings = { ENABLE_PROACTIVE_CONVERSATION_STARTERS: false, SEARCH_API_KEY: "", IS_NEW_USER: true, + MAX_BUDGET_PER_TASK: null, EMAIL: "", EMAIL_VERIFIED: true, // Default to true to avoid restricting access unnecessarily MCP_CONFIG: { diff --git a/frontend/src/state/metrics-slice.ts b/frontend/src/state/metrics-slice.ts index 551430756e..77154340b1 100644 --- a/frontend/src/state/metrics-slice.ts +++ b/frontend/src/state/metrics-slice.ts @@ -2,6 +2,7 @@ import { createSlice, PayloadAction } from "@reduxjs/toolkit"; interface MetricsState { cost: number | null; + max_budget_per_task: number | null; usage: { prompt_tokens: number; completion_tokens: number; @@ -14,6 +15,7 @@ interface MetricsState { const initialState: MetricsState = { cost: null, + max_budget_per_task: null, usage: null, }; @@ -23,6 +25,7 @@ const metricsSlice = createSlice({ reducers: { setMetrics: (state, action: PayloadAction) => { state.cost = action.payload.cost; + state.max_budget_per_task = action.payload.max_budget_per_task; state.usage = action.payload.usage; }, }, diff --git a/frontend/src/types/message.tsx b/frontend/src/types/message.tsx index 752b8c5c19..0ae9e708ce 100644 --- a/frontend/src/types/message.tsx +++ b/frontend/src/types/message.tsx @@ -23,6 +23,7 @@ export interface ActionMessage { // LLM metrics information llm_metrics?: { accumulated_cost: number; + max_budget_per_task: number | null; accumulated_token_usage: { prompt_tokens: number; completion_tokens: number; diff --git a/frontend/src/types/settings.ts b/frontend/src/types/settings.ts index 8390d66ff9..a083429b38 100644 --- a/frontend/src/types/settings.ts +++ b/frontend/src/types/settings.ts @@ -46,6 +46,7 @@ export type Settings = { SEARCH_API_KEY?: string; IS_NEW_USER?: boolean; MCP_CONFIG?: MCPConfig; + MAX_BUDGET_PER_TASK: number | null; EMAIL?: string; EMAIL_VERIFIED?: boolean; }; @@ -67,6 +68,7 @@ export type ApiSettings = { user_consents_to_analytics: boolean | null; search_api_key?: string; provider_tokens_set: Partial>; + max_budget_per_task: number | null; mcp_config?: { sse_servers: (string | MCPSSEServer)[]; stdio_servers: MCPStdioServer[]; diff --git a/frontend/src/utils/__tests__/settings-utils.test.ts b/frontend/src/utils/__tests__/settings-utils.test.ts new file mode 100644 index 0000000000..130cbbe555 --- /dev/null +++ b/frontend/src/utils/__tests__/settings-utils.test.ts @@ -0,0 +1,49 @@ +import { describe, it, expect } from "vitest"; +import { parseMaxBudgetPerTask } from "../settings-utils"; + +describe("parseMaxBudgetPerTask", () => { + it("should return null for empty string", () => { + expect(parseMaxBudgetPerTask("")).toBeNull(); + }); + + it("should return null for whitespace-only string", () => { + expect(parseMaxBudgetPerTask(" ")).toBeNull(); + }); + + it("should return null for non-numeric string", () => { + expect(parseMaxBudgetPerTask("abc")).toBeNull(); + }); + + it("should return null for values less than 1", () => { + expect(parseMaxBudgetPerTask("0")).toBeNull(); + expect(parseMaxBudgetPerTask("0.5")).toBeNull(); + expect(parseMaxBudgetPerTask("-1")).toBeNull(); + expect(parseMaxBudgetPerTask("-10.5")).toBeNull(); + }); + + it("should return the parsed value for valid numbers >= 1", () => { + expect(parseMaxBudgetPerTask("1")).toBe(1); + expect(parseMaxBudgetPerTask("1.0")).toBe(1); + expect(parseMaxBudgetPerTask("1.5")).toBe(1.5); + expect(parseMaxBudgetPerTask("10")).toBe(10); + expect(parseMaxBudgetPerTask("100.99")).toBe(100.99); + }); + + it("should handle string numbers with leading/trailing whitespace", () => { + expect(parseMaxBudgetPerTask(" 1 ")).toBe(1); + expect(parseMaxBudgetPerTask(" 10.5 ")).toBe(10.5); + }); + + it("should return null for edge cases", () => { + expect(parseMaxBudgetPerTask("0.999")).toBeNull(); + expect(parseMaxBudgetPerTask("NaN")).toBeNull(); + expect(parseMaxBudgetPerTask("Infinity")).toBeNull(); + expect(parseMaxBudgetPerTask("-Infinity")).toBeNull(); + }); + + it("should handle scientific notation", () => { + expect(parseMaxBudgetPerTask("1e0")).toBe(1); + expect(parseMaxBudgetPerTask("1.5e1")).toBe(15); + expect(parseMaxBudgetPerTask("5e-1")).toBeNull(); // 0.5, which is < 1 + }); +}); diff --git a/frontend/src/utils/settings-utils.ts b/frontend/src/utils/settings-utils.ts index 979e4bb1b4..93c5a5d4b1 100644 --- a/frontend/src/utils/settings-utils.ts +++ b/frontend/src/utils/settings-utils.ts @@ -47,6 +47,24 @@ const extractAdvancedFormData = (formData: FormData) => { }; }; +/** + * Parses and validates a max budget per task value. + * Ensures the value is at least 1 dollar. + * @param value - The string value to parse + * @returns The parsed number if valid (>= 1), null otherwise + */ +export const parseMaxBudgetPerTask = (value: string): number | null => { + if (!value) { + return null; + } + + const parsedValue = parseFloat(value); + // Ensure the value is at least 1 dollar and is a finite number + return parsedValue && parsedValue >= 1 && Number.isFinite(parsedValue) + ? parsedValue + : null; +}; + export const extractSettings = ( formData: FormData, ): Partial & { llm_api_key?: string | null } => { diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 32afcedbe7..4b06a53218 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -1136,6 +1136,7 @@ class AgentController: To avoid performance issues with long conversations, we only keep: - accumulated_cost: The current total cost - accumulated_token_usage: Accumulated token statistics across all API calls + - max_budget_per_task: The maximum budget allowed for the task This includes metrics from both the agent's LLM and the condenser's LLM if it exists. @@ -1158,6 +1159,10 @@ class AgentController: if condenser_metrics: metrics.accumulated_cost += condenser_metrics.accumulated_cost + # Add max_budget_per_task to metrics + if self.state.budget_flag: + metrics.max_budget_per_task = self.state.budget_flag.max_value + # Set accumulated token usage (sum of agent and condenser token usage) # Use a deep copy to ensure we don't modify the original object metrics._accumulated_token_usage = ( @@ -1180,7 +1185,7 @@ class AgentController: accumulated_usage = self.state.metrics.accumulated_token_usage self.log( 'debug', - f'Action metrics - accumulated_cost: {metrics.accumulated_cost}, ' + f'Action metrics - accumulated_cost: {metrics.accumulated_cost}, max_budget: {metrics.max_budget_per_task}, ' f'latest tokens (prompt/completion/cache_read/cache_write): ' f'{latest_usage.prompt_tokens if latest_usage else 0}/' f'{latest_usage.completion_tokens if latest_usage else 0}/' diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py index 546bc7155f..1bd669c7c3 100644 --- a/openhands/events/serialization/event.py +++ b/openhands/events/serialization/event.py @@ -70,6 +70,8 @@ def event_from_dict(data: dict[str, Any]) -> 'Event': metrics = Metrics() if isinstance(value, dict): metrics.accumulated_cost = value.get('accumulated_cost', 0.0) + # Set max_budget_per_task if available + metrics.max_budget_per_task = value.get('max_budget_per_task') for cost in value.get('costs', []): metrics._costs.append(Cost(**cost)) metrics.response_latencies = [ diff --git a/openhands/llm/metrics.py b/openhands/llm/metrics.py index 6142091882..2dbe90f824 100644 --- a/openhands/llm/metrics.py +++ b/openhands/llm/metrics.py @@ -48,12 +48,14 @@ class Metrics: """Metrics class can record various metrics during running and evaluation. We track: - accumulated_cost and costs + - max_budget_per_task (budget limit) - A list of ResponseLatency - A list of TokenUsage (one per call). """ def __init__(self, model_name: str = 'default') -> None: self._accumulated_cost: float = 0.0 + self._max_budget_per_task: float | None = None self._costs: list[Cost] = [] self._response_latencies: list[ResponseLatency] = [] self.model_name = model_name @@ -78,6 +80,14 @@ class Metrics: raise ValueError('Total cost cannot be negative.') self._accumulated_cost = value + @property + def max_budget_per_task(self) -> float | None: + return self._max_budget_per_task + + @max_budget_per_task.setter + def max_budget_per_task(self, value: float | None) -> None: + self._max_budget_per_task = value + @property def costs(self) -> list[Cost]: return self._costs @@ -171,6 +181,11 @@ class Metrics: def merge(self, other: 'Metrics') -> None: """Merge 'other' metrics into this one.""" self._accumulated_cost += other.accumulated_cost + + # Keep the max_budget_per_task from other if it's set and this one isn't + if self._max_budget_per_task is None and other.max_budget_per_task is not None: + self._max_budget_per_task = other.max_budget_per_task + self._costs += other._costs # use the property so older picked objects that lack the field won't crash self.token_usages += other.token_usages @@ -185,6 +200,7 @@ class Metrics: """Return the metrics in a dictionary.""" return { 'accumulated_cost': self._accumulated_cost, + 'max_budget_per_task': self._max_budget_per_task, 'accumulated_token_usage': self.accumulated_token_usage.model_dump(), 'costs': [cost.model_dump() for cost in self._costs], 'response_latencies': [ diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index c04fcf10d6..28060c2af9 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -118,6 +118,13 @@ class Session: ) max_iterations = settings.max_iterations or self.config.max_iterations + # Prioritize settings over config for max_budget_per_task + max_budget_per_task = ( + settings.max_budget_per_task + if settings.max_budget_per_task is not None + else self.config.max_budget_per_task + ) + # This is a shallow copy of the default LLM config, so changes here will # persist if we retrieve the default LLM config again when constructing # the agent @@ -189,7 +196,7 @@ class Session: config=self.config, agent=agent, max_iterations=max_iterations, - max_budget_per_task=self.config.max_budget_per_task, + max_budget_per_task=max_budget_per_task, agent_to_llm_config=self.config.get_agent_to_llm_config_map(), agent_configs=self.config.get_agent_configs(), git_provider_tokens=git_provider_tokens, diff --git a/openhands/storage/data_models/settings.py b/openhands/storage/data_models/settings.py index 027b7d9047..af0abdda62 100644 --- a/openhands/storage/data_models/settings.py +++ b/openhands/storage/data_models/settings.py @@ -40,6 +40,7 @@ class Settings(BaseModel): sandbox_runtime_container_image: str | None = None mcp_config: MCPConfig | None = None search_api_key: SecretStr | None = None + max_budget_per_task: float | None = None email: str | None = None email_verified: bool | None = None @@ -131,5 +132,6 @@ class Settings(BaseModel): remote_runtime_resource_factor=app_config.sandbox.remote_runtime_resource_factor, mcp_config=mcp_config, search_api_key=app_config.search_api_key, + max_budget_per_task=app_config.max_budget_per_task, ) return settings