Display context window usage status in UI (#8267)

This commit is contained in:
AutoLTX 2025-05-09 11:39:14 +08:00 committed by GitHub
parent 7d356cad47
commit 3d68711ca3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 94 additions and 4 deletions

View File

@ -307,7 +307,7 @@ export function ConversationCard({
</span>
</div>
<div className="flex justify-between items-center pt-1">
<div className="flex justify-between items-center border-b border-neutral-700 pb-2">
<span className="font-semibold">
{t(I18nKey.CONVERSATION$TOTAL)}:
</span>
@ -318,6 +318,34 @@ export function ConversationCard({
).toLocaleString()}
</span>
</div>
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<span className="font-semibold">
{t(I18nKey.CONVERSATION$CONTEXT_WINDOW)}
</span>
</div>
<div className="w-full h-1.5 bg-neutral-700 rounded-full overflow-hidden">
<div
className="h-full bg-blue-500 transition-all duration-300"
style={{
width: `${Math.min(100, (metrics.usage.per_turn_token / metrics.usage.context_window) * 100)}%`,
}}
/>
</div>
<div className="flex justify-end">
<span className="text-xs text-neutral-400">
{metrics.usage.per_turn_token.toLocaleString()} /{" "}
{metrics.usage.context_window.toLocaleString()} (
{(
(metrics.usage.per_turn_token /
metrics.usage.context_window) *
100
).toFixed(2)}
% {t(I18nKey.CONVERSATION$USED)})
</span>
</div>
</div>
</>
)}
</div>

View File

@ -463,6 +463,8 @@ export enum I18nKey {
CONVERSATION$INPUT = "CONVERSATION$INPUT",
CONVERSATION$OUTPUT = "CONVERSATION$OUTPUT",
CONVERSATION$TOTAL = "CONVERSATION$TOTAL",
CONVERSATION$CONTEXT_WINDOW = "CONVERSATION$CONTEXT_WINDOW",
CONVERSATION$USED = "CONVERSATION$USED",
SETTINGS$RUNTIME_SETTINGS = "SETTINGS$RUNTIME_SETTINGS",
SETTINGS$RESET_CONFIRMATION = "SETTINGS$RESET_CONFIRMATION",
ERROR$GENERIC_OOPS = "ERROR$GENERIC_OOPS",

View File

@ -7111,6 +7111,36 @@
"tr": "- Toplam:",
"uk": "- Всього:"
},
"CONVERSATION$CONTEXT_WINDOW": {
"en": "Context Window",
"ja": "コンテキストウィンドウ",
"zh-CN": "上下文窗口",
"zh-TW": "上下文視窗",
"ko-KR": "컨텍스트 윈도우",
"de": "Kontextfenster",
"no": "Kontekstvindu",
"it": "Finestra di contesto",
"pt": "Janela de contexto",
"es": "Ventana de contexto",
"ar": "نافذة السياق",
"fr": "Fenêtre de contexte",
"tr": "Bağlam Penceresi"
},
"CONVERSATION$USED": {
"en": "used",
"ja": "使用済み",
"zh-CN": "已使用",
"zh-TW": "已使用",
"ko-KR": "사용됨",
"de": "verwendet",
"no": "brukt",
"it": "usato",
"pt": "usado",
"es": "usado",
"ar": "مستخدم",
"fr": "utilisé",
"tr": "kullanıldı"
},
"SETTINGS$RUNTIME_SETTINGS": {
"en": "Runtime Settings (",
"ja": "ランタイム設定 (",

View File

@ -7,6 +7,8 @@ interface MetricsState {
completion_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
context_window: number;
per_turn_token: number;
} | null;
}

View File

@ -24,6 +24,8 @@ export interface ActionMessage {
completion_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
context_window: number;
per_turn_token: number;
};
};

View File

@ -414,6 +414,7 @@ class LLM(RetryMixin, DebugMixin):
)
if current_model_info:
self.model_info = current_model_info['model_info']
logger.debug(f'Got model info from litellm proxy: {self.model_info}')
# Last two attempts to get model info from NAME
if not self.model_info:
@ -600,6 +601,12 @@ class LLM(RetryMixin, DebugMixin):
if cache_write_tokens:
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'
# Get context window from model info
context_window = 0
if self.model_info and 'max_input_tokens' in self.model_info:
context_window = self.model_info['max_input_tokens']
logger.debug(f'Using context window: {context_window}')
# Record in metrics
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
self.metrics.add_token_usage(
@ -607,6 +614,7 @@ class LLM(RetryMixin, DebugMixin):
completion_tokens=completion_tokens,
cache_read_tokens=cache_hit_tokens,
cache_write_tokens=cache_write_tokens,
context_window=context_window,
response_id=response_id,
)

View File

@ -26,6 +26,8 @@ class TokenUsage(BaseModel):
completion_tokens: int = Field(default=0)
cache_read_tokens: int = Field(default=0)
cache_write_tokens: int = Field(default=0)
context_window: int = Field(default=0)
per_turn_token: int = Field(default=0)
response_id: str = Field(default='')
def __add__(self, other: 'TokenUsage') -> 'TokenUsage':
@ -36,6 +38,8 @@ class TokenUsage(BaseModel):
completion_tokens=self.completion_tokens + other.completion_tokens,
cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens,
context_window=max(self.context_window, other.context_window),
per_turn_token=other.per_turn_token,
response_id=self.response_id,
)
@ -60,6 +64,7 @@ class Metrics:
completion_tokens=0,
cache_read_tokens=0,
cache_write_tokens=0,
context_window=0,
response_id='',
)
@ -107,6 +112,7 @@ class Metrics:
completion_tokens=0,
cache_read_tokens=0,
cache_write_tokens=0,
context_window=0,
response_id='',
)
return self._accumulated_token_usage
@ -130,15 +136,22 @@ class Metrics:
completion_tokens: int,
cache_read_tokens: int,
cache_write_tokens: int,
context_window: int,
response_id: str,
) -> None:
"""Add a single usage record."""
# Token each turn for calculating context usage.
per_turn_token = prompt_tokens + completion_tokens
usage = TokenUsage(
model=self.model_name,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cache_read_tokens=cache_read_tokens,
cache_write_tokens=cache_write_tokens,
context_window=context_window,
per_turn_token=per_turn_token,
response_id=response_id,
)
self._token_usages.append(usage)
@ -150,6 +163,8 @@ class Metrics:
completion_tokens=completion_tokens,
cache_read_tokens=cache_read_tokens,
cache_write_tokens=cache_write_tokens,
context_window=context_window,
per_turn_token=per_turn_token,
response_id='',
)
@ -190,6 +205,7 @@ class Metrics:
completion_tokens=0,
cache_read_tokens=0,
cache_write_tokens=0,
context_window=0,
response_id='',
)

View File

@ -87,8 +87,8 @@ def test_metrics_merge_accumulated_token_usage():
metrics2 = Metrics(model_name='model2')
# Add token usage to each
metrics1.add_token_usage(10, 5, 3, 2, 'response-1')
metrics2.add_token_usage(8, 6, 2, 4, 'response-2')
metrics1.add_token_usage(10, 5, 3, 2, 1000, 'response-1')
metrics2.add_token_usage(8, 6, 2, 4, 1000, 'response-2')
# Verify initial accumulated token usage
metrics1_data = metrics1.get()
@ -218,7 +218,7 @@ def test_llm_reset():
initial_metrics = copy.deepcopy(llm.metrics)
initial_metrics.add_cost(1.0)
initial_metrics.add_response_latency(0.5, 'test-id')
initial_metrics.add_token_usage(10, 5, 3, 2, 'test-id')
initial_metrics.add_token_usage(10, 5, 3, 2, 1000, 'test-id')
llm.reset()
assert llm.metrics.accumulated_cost != initial_metrics.accumulated_cost
assert llm.metrics.costs != initial_metrics.costs

View File

@ -23,6 +23,7 @@ def test_get_token_usage_for_event():
completion_tokens=usage_record.completion_tokens,
cache_read_tokens=usage_record.cache_read_tokens,
cache_write_tokens=usage_record.cache_write_tokens,
context_window=1000,
response_id=usage_record.response_id,
)
@ -136,6 +137,7 @@ def test_get_token_usage_for_event_fallback():
completion_tokens=usage_record.completion_tokens,
cache_read_tokens=usage_record.cache_read_tokens,
cache_write_tokens=usage_record.cache_write_tokens,
context_window=1000,
response_id=usage_record.response_id,
)