[agent] improve finish tool for sonnet 3.7 (#7002)

This commit is contained in:
Xingyao Wang 2025-02-28 10:43:13 -05:00 committed by GitHub
parent a8f1feee5d
commit 7810d8c4a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 95 additions and 7 deletions

View File

@ -57,6 +57,28 @@ const messageActions = {
store.dispatch(appendJupyterInput(message.args.code));
}
},
[ActionType.FINISH]: (message: ActionMessage) => {
store.dispatch(addAssistantMessage(message.args.final_thought));
let successPrediction = "";
if (message.args.task_completed === "partial") {
successPrediction =
"The agent thinks that the task was **completed partially**.";
} else if (message.args.task_completed === "false") {
successPrediction =
"The agent thinks that the task was **not completed**.";
} else if (message.args.task_completed === "true") {
successPrediction =
"The agent thinks that the task was **completed successfully**.";
}
if (successPrediction) {
// if final_thought is not empty, add a new line before the success prediction
if (message.args.final_thought) {
store.dispatch(addAssistantMessage(`\n${successPrediction}`));
} else {
store.dispatch(addAssistantMessage(successPrediction));
}
}
},
};
export function handleActionMessage(message: ActionMessage) {

View File

@ -51,6 +51,8 @@ export interface ThinkAction extends OpenHandsActionEvent<"think"> {
export interface FinishAction extends OpenHandsActionEvent<"finish"> {
source: "agent";
args: {
final_thought: string;
task_completed: "success" | "failure" | "partial";
outputs: Record<string, unknown>;
thought: string;
};

View File

@ -108,7 +108,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
# AgentFinishAction
# ================================================
elif tool_call.function.name == FinishTool['function']['name']:
action = AgentFinishAction()
action = AgentFinishAction(
final_thought=arguments.get('message', ''),
task_completed=arguments.get('task_completed', None),
)
# ================================================
# LLMBasedFileEditTool (LLM-based file editor, deprecated)

View File

@ -1,11 +1,39 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
_FINISH_DESCRIPTION = """Signals the completion of the current task or conversation.
Use this tool when:
- You have successfully completed the user's requested task
- You cannot proceed further due to technical limitations or missing information
The message should include:
- A clear summary of actions taken and their results
- Any next steps for the user
- Explanation if you're unable to complete the task
- Any follow-up questions if more information is needed
The task_completed field should be set to True if you believed you have completed the task, and False otherwise.
"""
FinishTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='finish',
description=_FINISH_DESCRIPTION,
parameters={
'type': 'object',
'required': ['message', 'task_completed'],
'properties': {
'message': {
'type': 'string',
'description': 'Final message to send to the user',
},
'task_completed': {
'type': 'string',
'enum': ['true', 'false', 'partial'],
'description': 'Whether you have completed the task.',
},
},
},
),
)

View File

@ -77,7 +77,14 @@ class StackInfoFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
if record.levelno >= logging.ERROR:
# LogRecord attributes are dynamically typed
setattr(record, 'stack_info', True)
# Capture the current stack trace as a string
stack = traceback.format_stack()
# Remove the last entries which are related to the logging machinery
stack = stack[:-3] # Adjust this number if needed
# Join the stack frames into a single string
stack_str = ''.join(stack)
setattr(record, 'stack_info', stack_str)
setattr(record, 'exc_info', sys.exc_info())
return True

View File

@ -1,4 +1,5 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
from openhands.core.schema import ActionType
@ -33,16 +34,26 @@ class AgentSummarizeAction(Action):
return ret
class AgentFinishTaskCompleted(Enum):
FALSE = 'false'
PARTIAL = 'partial'
TRUE = 'true'
@dataclass
class AgentFinishAction(Action):
"""An action where the agent finishes the task.
Attributes:
outputs (dict): The outputs of the agent, for instance "content".
final_thought (str): The message to send to the user.
task_completed (enum): Whether the agent believes the task has been completed.
outputs (dict): The other outputs of the agent, for instance "content".
thought (str): The agent's explanation of its actions.
action (str): The action type, namely ActionType.FINISH.
"""
final_thought: str = ''
task_completed: AgentFinishTaskCompleted | None = None
outputs: dict[str, Any] = field(default_factory=dict)
thought: str = ''
action: str = ActionType.FINISH

View File

@ -84,12 +84,23 @@ def test_message_action_serialization_deserialization():
def test_agent_finish_action_serialization_deserialization():
original_action_dict = {'action': 'finish', 'args': {'outputs': {}, 'thought': ''}}
original_action_dict = {
'action': 'finish',
'args': {
'outputs': {},
'thought': '',
'task_completed': None,
'final_thought': '',
},
}
serialization_deserialization(original_action_dict, AgentFinishAction)
def test_agent_reject_action_serialization_deserialization():
original_action_dict = {'action': 'reject', 'args': {'outputs': {}, 'thought': ''}}
original_action_dict = {
'action': 'reject',
'args': {'outputs': {}, 'thought': ''},
}
serialization_deserialization(original_action_dict, AgentRejectAction)

View File

@ -347,7 +347,11 @@ async def test_unsafe_bash_command(temp_dir: str):
type='function',
function=Function(
name=ActionType.FINISH,
arguments={'outputs': {'content': 'outputs content'}},
arguments={
'outputs': {'content': 'outputs content'},
'task_completed': None,
'final_thought': '',
},
),
),
],