feat(frontend): add user feedback Likert scale for agent performance rating (only on OH Cloud) (#8992)

Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
2026-03-22 13:47:19 +08:00 · 2025-06-16 15:26:24 -04:00
parent 2fd1fdcd7e
commit 2c4ecd02f7
14 changed files with 710 additions and 100 deletions
--- a/openhands/controller/state/state_tracker.py
+++ b/openhands/controller/state/state_tracker.py
@@ -1,5 +1,8 @@
 from openhands.controller.agent import Agent
-from openhands.controller.state.control_flags import BudgetControlFlag, IterationControlFlag
+from openhands.controller.state.control_flags import (
+    BudgetControlFlag,
+    IterationControlFlag,
+)
 from openhands.controller.state.state import State
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.agent import AgentDelegateAction, ChangeAgentStateAction
@@ -71,9 +74,19 @@ class StateTracker:
            self.state = State(
                session_id=id.removesuffix('-delegate'),
                inputs={},
-                iteration_flag=IterationControlFlag(limit_increase_amount=max_iterations, current_value=0, max_value= max_iterations),
-                budget_flag=None if not max_budget_per_task else BudgetControlFlag(limit_increase_amount=max_budget_per_task, current_value=0, max_value=max_budget_per_task),
-                confirmation_mode=confirmation_mode
+                iteration_flag=IterationControlFlag(
+                    limit_increase_amount=max_iterations,
+                    current_value=0,
+                    max_value=max_iterations,
+                ),
+                budget_flag=None
+                if not max_budget_per_task
+                else BudgetControlFlag(
+                    limit_increase_amount=max_budget_per_task,
+                    current_value=0,
+                    max_value=max_budget_per_task,
+                ),
+                confirmation_mode=confirmation_mode,
            )
            self.state.start_id = 0

@@ -89,7 +102,6 @@ class StateTracker:
                f'AgentController {id} initializing history from event {self.state.start_id}',
            )

-
        # Share the state metrics with the agent's LLM metrics
        # This ensures that all accumulated metrics are always in sync between controller and llm
        agent.llm.metrics = self.state.metrics
@@ -221,9 +233,7 @@ class StateTracker:
            for event in self.state.history
        ]

-    def maybe_increase_control_flags_limits(
-        self, headless_mode: bool
-    ):
+    def maybe_increase_control_flags_limits(self, headless_mode: bool):
        # Iteration and budget extensions are independent of each other
        # An error will be thrown if any one of the control flags have reached or exceeded its limit
        self.state.iteration_flag.increase_limit(headless_mode)
@@ -247,7 +257,6 @@ class StateTracker:
        if self.sid and self.file_store:
            self.state.save_to_session(self.sid, self.file_store, self.user_id)

-
    def run_control_flags(self):
        """
        Performs one step of the control flags
@@ -256,25 +265,24 @@ class StateTracker:
        if self.state.budget_flag:
            self.state.budget_flag.step()

-
    def sync_budget_flag_with_metrics(self):
        """
-            Ensures that budget flag is up to date with accumulated costs from llm completions
-            Budget flag will monitor for when budget is exceeded
+        Ensures that budget flag is up to date with accumulated costs from llm completions
+        Budget flag will monitor for when budget is exceeded
        """
        if self.state.budget_flag:
            self.state.budget_flag.current_value = self.state.metrics.accumulated_cost

    def merge_metrics(self, metrics: Metrics):
        """
-            Merges metrics with the state metrics
+        Merges metrics with the state metrics

-            NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc)
-            use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from
-            all services
+        NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc)
+        use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from
+        all services

-            This would prevent having fragmented stores for metrics, and we don't have the burden of deciding where and how to store them
-            if we decide introduce more specialized services that require llm completions
+        This would prevent having fragmented stores for metrics, and we don't have the burden of deciding where and how to store them
+        if we decide introduce more specialized services that require llm completions

        """
        self.state.metrics.merge(metrics)