From 3d68711ca3b71161fa2ff687e2f7c2356613f2ef Mon Sep 17 00:00:00 2001 From: AutoLTX Date: Fri, 9 May 2025 11:39:14 +0800 Subject: [PATCH] Display context window usage status in UI (#8267) --- .../conversation-panel/conversation-card.tsx | 30 ++++++++++++++++++- frontend/src/i18n/declaration.ts | 2 ++ frontend/src/i18n/translation.json | 30 +++++++++++++++++++ frontend/src/state/metrics-slice.ts | 2 ++ frontend/src/types/message.tsx | 2 ++ openhands/llm/llm.py | 8 +++++ openhands/llm/metrics.py | 16 ++++++++++ tests/unit/test_llm.py | 6 ++-- tests/unit/test_message_utils.py | 2 ++ 9 files changed, 94 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/features/conversation-panel/conversation-card.tsx b/frontend/src/components/features/conversation-panel/conversation-card.tsx index 56a21e5f97..a8c02c969d 100644 --- a/frontend/src/components/features/conversation-panel/conversation-card.tsx +++ b/frontend/src/components/features/conversation-panel/conversation-card.tsx @@ -307,7 +307,7 @@ export function ConversationCard({ -
+
{t(I18nKey.CONVERSATION$TOTAL)}: @@ -318,6 +318,34 @@ export function ConversationCard({ ).toLocaleString()}
+ +
+
+ + {t(I18nKey.CONVERSATION$CONTEXT_WINDOW)} + +
+
+
+
+
+ + {metrics.usage.per_turn_token.toLocaleString()} /{" "} + {metrics.usage.context_window.toLocaleString()} ( + {( + (metrics.usage.per_turn_token / + metrics.usage.context_window) * + 100 + ).toFixed(2)} + % {t(I18nKey.CONVERSATION$USED)}) + +
+
)}
diff --git a/frontend/src/i18n/declaration.ts b/frontend/src/i18n/declaration.ts index 64d40c1ace..e98a103b37 100644 --- a/frontend/src/i18n/declaration.ts +++ b/frontend/src/i18n/declaration.ts @@ -463,6 +463,8 @@ export enum I18nKey { CONVERSATION$INPUT = "CONVERSATION$INPUT", CONVERSATION$OUTPUT = "CONVERSATION$OUTPUT", CONVERSATION$TOTAL = "CONVERSATION$TOTAL", + CONVERSATION$CONTEXT_WINDOW = "CONVERSATION$CONTEXT_WINDOW", + CONVERSATION$USED = "CONVERSATION$USED", SETTINGS$RUNTIME_SETTINGS = "SETTINGS$RUNTIME_SETTINGS", SETTINGS$RESET_CONFIRMATION = "SETTINGS$RESET_CONFIRMATION", ERROR$GENERIC_OOPS = "ERROR$GENERIC_OOPS", diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json index 0dd790aecc..41267f248b 100644 --- a/frontend/src/i18n/translation.json +++ b/frontend/src/i18n/translation.json @@ -7111,6 +7111,36 @@ "tr": "- Toplam:", "uk": "- Всього:" }, + "CONVERSATION$CONTEXT_WINDOW": { + "en": "Context Window", + "ja": "コンテキストウィンドウ", + "zh-CN": "上下文窗口", + "zh-TW": "上下文視窗", + "ko-KR": "컨텍스트 윈도우", + "de": "Kontextfenster", + "no": "Kontekstvindu", + "it": "Finestra di contesto", + "pt": "Janela de contexto", + "es": "Ventana de contexto", + "ar": "نافذة السياق", + "fr": "Fenêtre de contexte", + "tr": "Bağlam Penceresi" + }, + "CONVERSATION$USED": { + "en": "used", + "ja": "使用済み", + "zh-CN": "已使用", + "zh-TW": "已使用", + "ko-KR": "사용됨", + "de": "verwendet", + "no": "brukt", + "it": "usato", + "pt": "usado", + "es": "usado", + "ar": "مستخدم", + "fr": "utilisé", + "tr": "kullanıldı" + }, "SETTINGS$RUNTIME_SETTINGS": { "en": "Runtime Settings (", "ja": "ランタイム設定 (", diff --git a/frontend/src/state/metrics-slice.ts b/frontend/src/state/metrics-slice.ts index 52803072d0..551430756e 100644 --- a/frontend/src/state/metrics-slice.ts +++ b/frontend/src/state/metrics-slice.ts @@ -7,6 +7,8 @@ interface MetricsState { completion_tokens: number; cache_read_tokens: number; cache_write_tokens: number; + context_window: number; + per_turn_token: number; } | null; } diff --git a/frontend/src/types/message.tsx b/frontend/src/types/message.tsx index cc92660bf0..664d51e797 100644 --- a/frontend/src/types/message.tsx +++ b/frontend/src/types/message.tsx @@ -24,6 +24,8 @@ export interface ActionMessage { completion_tokens: number; cache_read_tokens: number; cache_write_tokens: number; + context_window: number; + per_turn_token: number; }; }; diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 504eb6c8b2..429a102fb9 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -414,6 +414,7 @@ class LLM(RetryMixin, DebugMixin): ) if current_model_info: self.model_info = current_model_info['model_info'] + logger.debug(f'Got model info from litellm proxy: {self.model_info}') # Last two attempts to get model info from NAME if not self.model_info: @@ -600,6 +601,12 @@ class LLM(RetryMixin, DebugMixin): if cache_write_tokens: stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n' + # Get context window from model info + context_window = 0 + if self.model_info and 'max_input_tokens' in self.model_info: + context_window = self.model_info['max_input_tokens'] + logger.debug(f'Using context window: {context_window}') + # Record in metrics # We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write" self.metrics.add_token_usage( @@ -607,6 +614,7 @@ class LLM(RetryMixin, DebugMixin): completion_tokens=completion_tokens, cache_read_tokens=cache_hit_tokens, cache_write_tokens=cache_write_tokens, + context_window=context_window, response_id=response_id, ) diff --git a/openhands/llm/metrics.py b/openhands/llm/metrics.py index 6230c05c28..b29d9acc48 100644 --- a/openhands/llm/metrics.py +++ b/openhands/llm/metrics.py @@ -26,6 +26,8 @@ class TokenUsage(BaseModel): completion_tokens: int = Field(default=0) cache_read_tokens: int = Field(default=0) cache_write_tokens: int = Field(default=0) + context_window: int = Field(default=0) + per_turn_token: int = Field(default=0) response_id: str = Field(default='') def __add__(self, other: 'TokenUsage') -> 'TokenUsage': @@ -36,6 +38,8 @@ class TokenUsage(BaseModel): completion_tokens=self.completion_tokens + other.completion_tokens, cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens, cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens, + context_window=max(self.context_window, other.context_window), + per_turn_token=other.per_turn_token, response_id=self.response_id, ) @@ -60,6 +64,7 @@ class Metrics: completion_tokens=0, cache_read_tokens=0, cache_write_tokens=0, + context_window=0, response_id='', ) @@ -107,6 +112,7 @@ class Metrics: completion_tokens=0, cache_read_tokens=0, cache_write_tokens=0, + context_window=0, response_id='', ) return self._accumulated_token_usage @@ -130,15 +136,22 @@ class Metrics: completion_tokens: int, cache_read_tokens: int, cache_write_tokens: int, + context_window: int, response_id: str, ) -> None: """Add a single usage record.""" + + # Token each turn for calculating context usage. + per_turn_token = prompt_tokens + completion_tokens + usage = TokenUsage( model=self.model_name, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cache_read_tokens=cache_read_tokens, cache_write_tokens=cache_write_tokens, + context_window=context_window, + per_turn_token=per_turn_token, response_id=response_id, ) self._token_usages.append(usage) @@ -150,6 +163,8 @@ class Metrics: completion_tokens=completion_tokens, cache_read_tokens=cache_read_tokens, cache_write_tokens=cache_write_tokens, + context_window=context_window, + per_turn_token=per_turn_token, response_id='', ) @@ -190,6 +205,7 @@ class Metrics: completion_tokens=0, cache_read_tokens=0, cache_write_tokens=0, + context_window=0, response_id='', ) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 4f074af255..cb7d48accc 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -87,8 +87,8 @@ def test_metrics_merge_accumulated_token_usage(): metrics2 = Metrics(model_name='model2') # Add token usage to each - metrics1.add_token_usage(10, 5, 3, 2, 'response-1') - metrics2.add_token_usage(8, 6, 2, 4, 'response-2') + metrics1.add_token_usage(10, 5, 3, 2, 1000, 'response-1') + metrics2.add_token_usage(8, 6, 2, 4, 1000, 'response-2') # Verify initial accumulated token usage metrics1_data = metrics1.get() @@ -218,7 +218,7 @@ def test_llm_reset(): initial_metrics = copy.deepcopy(llm.metrics) initial_metrics.add_cost(1.0) initial_metrics.add_response_latency(0.5, 'test-id') - initial_metrics.add_token_usage(10, 5, 3, 2, 'test-id') + initial_metrics.add_token_usage(10, 5, 3, 2, 1000, 'test-id') llm.reset() assert llm.metrics.accumulated_cost != initial_metrics.accumulated_cost assert llm.metrics.costs != initial_metrics.costs diff --git a/tests/unit/test_message_utils.py b/tests/unit/test_message_utils.py index 98b49e6751..2cc5ad92f1 100644 --- a/tests/unit/test_message_utils.py +++ b/tests/unit/test_message_utils.py @@ -23,6 +23,7 @@ def test_get_token_usage_for_event(): completion_tokens=usage_record.completion_tokens, cache_read_tokens=usage_record.cache_read_tokens, cache_write_tokens=usage_record.cache_write_tokens, + context_window=1000, response_id=usage_record.response_id, ) @@ -136,6 +137,7 @@ def test_get_token_usage_for_event_fallback(): completion_tokens=usage_record.completion_tokens, cache_read_tokens=usage_record.cache_read_tokens, cache_write_tokens=usage_record.cache_write_tokens, + context_window=1000, response_id=usage_record.response_id, )