[agent] improve finish tool for sonnet 3.7 (#7002)

2025-12-26 05:48:36 +08:00 · 2025-02-28 10:43:13 -05:00 · 2025-02-28 10:43:13 -05:00 · 7810d8c4a0
commit 7810d8c4a0
parent a8f1feee5d
8 changed files with 95 additions and 7 deletions
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@ -57,6 +57,28 @@ const messageActions = {
      store.dispatch(appendJupyterInput(message.args.code));
    }
  },
+  [ActionType.FINISH]: (message: ActionMessage) => {
+    store.dispatch(addAssistantMessage(message.args.final_thought));
+    let successPrediction = "";
+    if (message.args.task_completed === "partial") {
+      successPrediction =
+        "The agent thinks that the task was **completed partially**.";
+    } else if (message.args.task_completed === "false") {
+      successPrediction =
+        "The agent thinks that the task was **not completed**.";
+    } else if (message.args.task_completed === "true") {
+      successPrediction =
+        "The agent thinks that the task was **completed successfully**.";
+    }
+    if (successPrediction) {
+      // if final_thought is not empty, add a new line before the success prediction
+      if (message.args.final_thought) {
+        store.dispatch(addAssistantMessage(`\n${successPrediction}`));
+      } else {
+        store.dispatch(addAssistantMessage(successPrediction));
+      }
+    }
+  },
 };

 export function handleActionMessage(message: ActionMessage) {
--- a/frontend/src/types/core/actions.ts
+++ b/frontend/src/types/core/actions.ts
@ -51,6 +51,8 @@ export interface ThinkAction extends OpenHandsActionEvent<"think"> {
 export interface FinishAction extends OpenHandsActionEvent<"finish"> {
  source: "agent";
  args: {
+    final_thought: string;
+    task_completed: "success" | "failure" | "partial";
    outputs: Record<string, unknown>;
    thought: string;
  };
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@ -108,7 +108,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
            # AgentFinishAction
            # ================================================
            elif tool_call.function.name == FinishTool['function']['name']:
-                action = AgentFinishAction()
+                action = AgentFinishAction(
+                    final_thought=arguments.get('message', ''),
+                    task_completed=arguments.get('task_completed', None),
+                )

            # ================================================
            # LLMBasedFileEditTool (LLM-based file editor, deprecated)
--- a/openhands/agenthub/codeact_agent/tools/finish.py
+++ b/openhands/agenthub/codeact_agent/tools/finish.py
@ -1,11 +1,39 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

-_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
+_FINISH_DESCRIPTION = """Signals the completion of the current task or conversation.
+
+Use this tool when:
+- You have successfully completed the user's requested task
+- You cannot proceed further due to technical limitations or missing information
+
+The message should include:
+- A clear summary of actions taken and their results
+- Any next steps for the user
+- Explanation if you're unable to complete the task
+- Any follow-up questions if more information is needed
+
+The task_completed field should be set to True if you believed you have completed the task, and False otherwise.
+"""

 FinishTool = ChatCompletionToolParam(
    type='function',
    function=ChatCompletionToolParamFunctionChunk(
        name='finish',
        description=_FINISH_DESCRIPTION,
+        parameters={
+            'type': 'object',
+            'required': ['message', 'task_completed'],
+            'properties': {
+                'message': {
+                    'type': 'string',
+                    'description': 'Final message to send to the user',
+                },
+                'task_completed': {
+                    'type': 'string',
+                    'enum': ['true', 'false', 'partial'],
+                    'description': 'Whether you have completed the task.',
+                },
+            },
+        },
    ),
 )
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@ -77,7 +77,14 @@ class StackInfoFilter(logging.Filter):
    def filter(self, record: logging.LogRecord) -> bool:
        if record.levelno >= logging.ERROR:
            # LogRecord attributes are dynamically typed
-            setattr(record, 'stack_info', True)
+
+            # Capture the current stack trace as a string
+            stack = traceback.format_stack()
+            # Remove the last entries which are related to the logging machinery
+            stack = stack[:-3]  # Adjust this number if needed
+            # Join the stack frames into a single string
+            stack_str = ''.join(stack)
+            setattr(record, 'stack_info', stack_str)
            setattr(record, 'exc_info', sys.exc_info())
        return True

--- a/openhands/events/action/agent.py
+++ b/openhands/events/action/agent.py
@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from enum import Enum
 from typing import Any

 from openhands.core.schema import ActionType
@ -33,16 +34,26 @@ class AgentSummarizeAction(Action):
        return ret


+class AgentFinishTaskCompleted(Enum):
+    FALSE = 'false'
+    PARTIAL = 'partial'
+    TRUE = 'true'
+
+
@dataclass
 class AgentFinishAction(Action):
    """An action where the agent finishes the task.

    Attributes:
-        outputs (dict): The outputs of the agent, for instance "content".
+        final_thought (str): The message to send to the user.
+        task_completed (enum): Whether the agent believes the task has been completed.
+        outputs (dict): The other outputs of the agent, for instance "content".
        thought (str): The agent's explanation of its actions.
        action (str): The action type, namely ActionType.FINISH.
    """

+    final_thought: str = ''
+    task_completed: AgentFinishTaskCompleted | None = None
    outputs: dict[str, Any] = field(default_factory=dict)
    thought: str = ''
    action: str = ActionType.FINISH
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@ -84,12 +84,23 @@ def test_message_action_serialization_deserialization():


 def test_agent_finish_action_serialization_deserialization():
-    original_action_dict = {'action': 'finish', 'args': {'outputs': {}, 'thought': ''}}
+    original_action_dict = {
+        'action': 'finish',
+        'args': {
+            'outputs': {},
+            'thought': '',
+            'task_completed': None,
+            'final_thought': '',
+        },
+    }
    serialization_deserialization(original_action_dict, AgentFinishAction)


 def test_agent_reject_action_serialization_deserialization():
-    original_action_dict = {'action': 'reject', 'args': {'outputs': {}, 'thought': ''}}
+    original_action_dict = {
+        'action': 'reject',
+        'args': {'outputs': {}, 'thought': ''},
+    }
    serialization_deserialization(original_action_dict, AgentRejectAction)


--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@ -347,7 +347,11 @@ async def test_unsafe_bash_command(temp_dir: str):
                    type='function',
                    function=Function(
                        name=ActionType.FINISH,
-                        arguments={'outputs': {'content': 'outputs content'}},
+                        arguments={
+                            'outputs': {'content': 'outputs content'},
+                            'task_completed': None,
+                            'final_thought': '',
+                        },
                    ),
                ),
            ],