mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Display context window usage status in UI (#8267)
This commit is contained in:
parent
7d356cad47
commit
3d68711ca3
@ -307,7 +307,7 @@ export function ConversationCard({
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-between items-center pt-1">
|
||||
<div className="flex justify-between items-center border-b border-neutral-700 pb-2">
|
||||
<span className="font-semibold">
|
||||
{t(I18nKey.CONVERSATION$TOTAL)}:
|
||||
</span>
|
||||
@ -318,6 +318,34 @@ export function ConversationCard({
|
||||
).toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="font-semibold">
|
||||
{t(I18nKey.CONVERSATION$CONTEXT_WINDOW)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="w-full h-1.5 bg-neutral-700 rounded-full overflow-hidden">
|
||||
<div
|
||||
className="h-full bg-blue-500 transition-all duration-300"
|
||||
style={{
|
||||
width: `${Math.min(100, (metrics.usage.per_turn_token / metrics.usage.context_window) * 100)}%`,
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex justify-end">
|
||||
<span className="text-xs text-neutral-400">
|
||||
{metrics.usage.per_turn_token.toLocaleString()} /{" "}
|
||||
{metrics.usage.context_window.toLocaleString()} (
|
||||
{(
|
||||
(metrics.usage.per_turn_token /
|
||||
metrics.usage.context_window) *
|
||||
100
|
||||
).toFixed(2)}
|
||||
% {t(I18nKey.CONVERSATION$USED)})
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@ -463,6 +463,8 @@ export enum I18nKey {
|
||||
CONVERSATION$INPUT = "CONVERSATION$INPUT",
|
||||
CONVERSATION$OUTPUT = "CONVERSATION$OUTPUT",
|
||||
CONVERSATION$TOTAL = "CONVERSATION$TOTAL",
|
||||
CONVERSATION$CONTEXT_WINDOW = "CONVERSATION$CONTEXT_WINDOW",
|
||||
CONVERSATION$USED = "CONVERSATION$USED",
|
||||
SETTINGS$RUNTIME_SETTINGS = "SETTINGS$RUNTIME_SETTINGS",
|
||||
SETTINGS$RESET_CONFIRMATION = "SETTINGS$RESET_CONFIRMATION",
|
||||
ERROR$GENERIC_OOPS = "ERROR$GENERIC_OOPS",
|
||||
|
||||
@ -7111,6 +7111,36 @@
|
||||
"tr": "- Toplam:",
|
||||
"uk": "- Всього:"
|
||||
},
|
||||
"CONVERSATION$CONTEXT_WINDOW": {
|
||||
"en": "Context Window",
|
||||
"ja": "コンテキストウィンドウ",
|
||||
"zh-CN": "上下文窗口",
|
||||
"zh-TW": "上下文視窗",
|
||||
"ko-KR": "컨텍스트 윈도우",
|
||||
"de": "Kontextfenster",
|
||||
"no": "Kontekstvindu",
|
||||
"it": "Finestra di contesto",
|
||||
"pt": "Janela de contexto",
|
||||
"es": "Ventana de contexto",
|
||||
"ar": "نافذة السياق",
|
||||
"fr": "Fenêtre de contexte",
|
||||
"tr": "Bağlam Penceresi"
|
||||
},
|
||||
"CONVERSATION$USED": {
|
||||
"en": "used",
|
||||
"ja": "使用済み",
|
||||
"zh-CN": "已使用",
|
||||
"zh-TW": "已使用",
|
||||
"ko-KR": "사용됨",
|
||||
"de": "verwendet",
|
||||
"no": "brukt",
|
||||
"it": "usato",
|
||||
"pt": "usado",
|
||||
"es": "usado",
|
||||
"ar": "مستخدم",
|
||||
"fr": "utilisé",
|
||||
"tr": "kullanıldı"
|
||||
},
|
||||
"SETTINGS$RUNTIME_SETTINGS": {
|
||||
"en": "Runtime Settings (",
|
||||
"ja": "ランタイム設定 (",
|
||||
|
||||
@ -7,6 +7,8 @@ interface MetricsState {
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
context_window: number;
|
||||
per_turn_token: number;
|
||||
} | null;
|
||||
}
|
||||
|
||||
|
||||
@ -24,6 +24,8 @@ export interface ActionMessage {
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
context_window: number;
|
||||
per_turn_token: number;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -414,6 +414,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
)
|
||||
if current_model_info:
|
||||
self.model_info = current_model_info['model_info']
|
||||
logger.debug(f'Got model info from litellm proxy: {self.model_info}')
|
||||
|
||||
# Last two attempts to get model info from NAME
|
||||
if not self.model_info:
|
||||
@ -600,6 +601,12 @@ class LLM(RetryMixin, DebugMixin):
|
||||
if cache_write_tokens:
|
||||
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'
|
||||
|
||||
# Get context window from model info
|
||||
context_window = 0
|
||||
if self.model_info and 'max_input_tokens' in self.model_info:
|
||||
context_window = self.model_info['max_input_tokens']
|
||||
logger.debug(f'Using context window: {context_window}')
|
||||
|
||||
# Record in metrics
|
||||
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
|
||||
self.metrics.add_token_usage(
|
||||
@ -607,6 +614,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_hit_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
context_window=context_window,
|
||||
response_id=response_id,
|
||||
)
|
||||
|
||||
|
||||
@ -26,6 +26,8 @@ class TokenUsage(BaseModel):
|
||||
completion_tokens: int = Field(default=0)
|
||||
cache_read_tokens: int = Field(default=0)
|
||||
cache_write_tokens: int = Field(default=0)
|
||||
context_window: int = Field(default=0)
|
||||
per_turn_token: int = Field(default=0)
|
||||
response_id: str = Field(default='')
|
||||
|
||||
def __add__(self, other: 'TokenUsage') -> 'TokenUsage':
|
||||
@ -36,6 +38,8 @@ class TokenUsage(BaseModel):
|
||||
completion_tokens=self.completion_tokens + other.completion_tokens,
|
||||
cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
|
||||
cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens,
|
||||
context_window=max(self.context_window, other.context_window),
|
||||
per_turn_token=other.per_turn_token,
|
||||
response_id=self.response_id,
|
||||
)
|
||||
|
||||
@ -60,6 +64,7 @@ class Metrics:
|
||||
completion_tokens=0,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
context_window=0,
|
||||
response_id='',
|
||||
)
|
||||
|
||||
@ -107,6 +112,7 @@ class Metrics:
|
||||
completion_tokens=0,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
context_window=0,
|
||||
response_id='',
|
||||
)
|
||||
return self._accumulated_token_usage
|
||||
@ -130,15 +136,22 @@ class Metrics:
|
||||
completion_tokens: int,
|
||||
cache_read_tokens: int,
|
||||
cache_write_tokens: int,
|
||||
context_window: int,
|
||||
response_id: str,
|
||||
) -> None:
|
||||
"""Add a single usage record."""
|
||||
|
||||
# Token each turn for calculating context usage.
|
||||
per_turn_token = prompt_tokens + completion_tokens
|
||||
|
||||
usage = TokenUsage(
|
||||
model=self.model_name,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_read_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
context_window=context_window,
|
||||
per_turn_token=per_turn_token,
|
||||
response_id=response_id,
|
||||
)
|
||||
self._token_usages.append(usage)
|
||||
@ -150,6 +163,8 @@ class Metrics:
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_read_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
context_window=context_window,
|
||||
per_turn_token=per_turn_token,
|
||||
response_id='',
|
||||
)
|
||||
|
||||
@ -190,6 +205,7 @@ class Metrics:
|
||||
completion_tokens=0,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
context_window=0,
|
||||
response_id='',
|
||||
)
|
||||
|
||||
|
||||
@ -87,8 +87,8 @@ def test_metrics_merge_accumulated_token_usage():
|
||||
metrics2 = Metrics(model_name='model2')
|
||||
|
||||
# Add token usage to each
|
||||
metrics1.add_token_usage(10, 5, 3, 2, 'response-1')
|
||||
metrics2.add_token_usage(8, 6, 2, 4, 'response-2')
|
||||
metrics1.add_token_usage(10, 5, 3, 2, 1000, 'response-1')
|
||||
metrics2.add_token_usage(8, 6, 2, 4, 1000, 'response-2')
|
||||
|
||||
# Verify initial accumulated token usage
|
||||
metrics1_data = metrics1.get()
|
||||
@ -218,7 +218,7 @@ def test_llm_reset():
|
||||
initial_metrics = copy.deepcopy(llm.metrics)
|
||||
initial_metrics.add_cost(1.0)
|
||||
initial_metrics.add_response_latency(0.5, 'test-id')
|
||||
initial_metrics.add_token_usage(10, 5, 3, 2, 'test-id')
|
||||
initial_metrics.add_token_usage(10, 5, 3, 2, 1000, 'test-id')
|
||||
llm.reset()
|
||||
assert llm.metrics.accumulated_cost != initial_metrics.accumulated_cost
|
||||
assert llm.metrics.costs != initial_metrics.costs
|
||||
|
||||
@ -23,6 +23,7 @@ def test_get_token_usage_for_event():
|
||||
completion_tokens=usage_record.completion_tokens,
|
||||
cache_read_tokens=usage_record.cache_read_tokens,
|
||||
cache_write_tokens=usage_record.cache_write_tokens,
|
||||
context_window=1000,
|
||||
response_id=usage_record.response_id,
|
||||
)
|
||||
|
||||
@ -136,6 +137,7 @@ def test_get_token_usage_for_event_fallback():
|
||||
completion_tokens=usage_record.completion_tokens,
|
||||
cache_read_tokens=usage_record.cache_read_tokens,
|
||||
cache_write_tokens=usage_record.cache_write_tokens,
|
||||
context_window=1000,
|
||||
response_id=usage_record.response_id,
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user