mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
[agent] Add "thinking" tool only (#6977)
This commit is contained in:
@@ -4178,6 +4178,21 @@
|
||||
"es": "Navegando en la web",
|
||||
"tr": "Web'de geziniyor"
|
||||
},
|
||||
"ACTION_MESSAGE$THINK": {
|
||||
"en": "Thinking",
|
||||
"zh-CN": "思考",
|
||||
"zh-TW": "思考",
|
||||
"ko-KR": "생각",
|
||||
"ja": "考える",
|
||||
"no": "Tenker",
|
||||
"ar": "يفكر",
|
||||
"de": "Denkt",
|
||||
"fr": "Pensant",
|
||||
"it": "Pensando",
|
||||
"pt": "Pensando",
|
||||
"es": "Pensando",
|
||||
"tr": "Düşünüyor"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$RUN": {
|
||||
"en": "Ran a bash command",
|
||||
"zh-CN": "运行",
|
||||
|
||||
@@ -48,6 +48,8 @@ export function handleObservationMessage(message: ObservationMessage) {
|
||||
break;
|
||||
case ObservationType.READ:
|
||||
case ObservationType.EDIT:
|
||||
case ObservationType.THINK:
|
||||
case ObservationType.NULL:
|
||||
break; // We don't display the default message for these observations
|
||||
default:
|
||||
store.dispatch(addAssistantMessage(message.message));
|
||||
|
||||
@@ -115,6 +115,8 @@ export const chatSlice = createSlice({
|
||||
) {
|
||||
text += `\n\n${getRiskText(action.payload.args.security_risk as unknown as ActionSecurityRisk)}`;
|
||||
}
|
||||
} else if (actionID === "think") {
|
||||
text = action.payload.args.thought;
|
||||
}
|
||||
const message: Message = {
|
||||
type: "action",
|
||||
|
||||
@@ -26,6 +26,9 @@ enum ActionType {
|
||||
// Delegate a (sub)task to another agent.
|
||||
DELEGATE = "delegate",
|
||||
|
||||
// Logs a thought.
|
||||
THINK = "think",
|
||||
|
||||
// If you're absolutely certain that you've completed your task and have tested your work,
|
||||
// use the finish action to stop working.
|
||||
FINISH = "finish",
|
||||
|
||||
@@ -41,6 +41,13 @@ export interface IPythonAction extends OpenHandsActionEvent<"run_ipython"> {
|
||||
};
|
||||
}
|
||||
|
||||
export interface ThinkAction extends OpenHandsActionEvent<"think"> {
|
||||
source: "agent";
|
||||
args: {
|
||||
thought: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface FinishAction extends OpenHandsActionEvent<"finish"> {
|
||||
source: "agent";
|
||||
args: {
|
||||
@@ -129,6 +136,7 @@ export type OpenHandsAction =
|
||||
| AssistantMessageAction
|
||||
| CommandAction
|
||||
| IPythonAction
|
||||
| ThinkAction
|
||||
| FinishAction
|
||||
| DelegateAction
|
||||
| BrowseAction
|
||||
|
||||
@@ -10,6 +10,7 @@ export type OpenHandsEventType =
|
||||
| "browse"
|
||||
| "browse_interactive"
|
||||
| "reject"
|
||||
| "think"
|
||||
| "finish"
|
||||
| "error";
|
||||
|
||||
|
||||
@@ -80,8 +80,17 @@ export interface ErrorObservation extends OpenHandsObservationEvent<"error"> {
|
||||
};
|
||||
}
|
||||
|
||||
export interface AgentThinkObservation
|
||||
extends OpenHandsObservationEvent<"think"> {
|
||||
source: "agent";
|
||||
extras: {
|
||||
thought: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type OpenHandsObservation =
|
||||
| AgentStateChangeObservation
|
||||
| AgentThinkObservation
|
||||
| CommandObservation
|
||||
| IPythonObservation
|
||||
| DelegateObservation
|
||||
|
||||
@@ -22,6 +22,12 @@ enum ObservationType {
|
||||
|
||||
// Delegate result
|
||||
DELEGATE = "delegate",
|
||||
|
||||
// A response to the agent's thought (usually a static message)
|
||||
THINK = "think",
|
||||
|
||||
// A no-op observation
|
||||
NULL = "null",
|
||||
}
|
||||
|
||||
export default ObservationType;
|
||||
|
||||
@@ -17,6 +17,7 @@ from openhands.agenthub.codeact_agent.tools import (
|
||||
IPythonTool,
|
||||
LLMBasedFileEditTool,
|
||||
StrReplaceEditorTool,
|
||||
ThinkTool,
|
||||
WebReadTool,
|
||||
)
|
||||
from openhands.core.exceptions import (
|
||||
@@ -27,6 +28,7 @@ from openhands.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
AgentThinkAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
@@ -42,7 +44,9 @@ from openhands.events.tool import ToolCallMetadata
|
||||
def combine_thought(action: Action, thought: str) -> Action:
|
||||
if not hasattr(action, 'thought'):
|
||||
return action
|
||||
if thought:
|
||||
if thought and action.thought:
|
||||
action.thought = f'{thought}\n{action.thought}'
|
||||
elif thought:
|
||||
action.thought = thought
|
||||
return action
|
||||
|
||||
@@ -71,6 +75,11 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
raise RuntimeError(
|
||||
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
|
||||
) from e
|
||||
|
||||
# ================================================
|
||||
# CmdRunTool (Bash)
|
||||
# ================================================
|
||||
|
||||
if tool_call.function.name == CmdRunTool['function']['name']:
|
||||
if 'command' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
@@ -79,6 +88,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
# convert is_input to boolean
|
||||
is_input = arguments.get('is_input', 'false') == 'true'
|
||||
action = CmdRunAction(command=arguments['command'], is_input=is_input)
|
||||
|
||||
# ================================================
|
||||
# IPythonTool (Jupyter)
|
||||
# ================================================
|
||||
elif tool_call.function.name == IPythonTool['function']['name']:
|
||||
if 'code' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
@@ -90,8 +103,16 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
agent='BrowsingAgent',
|
||||
inputs=arguments,
|
||||
)
|
||||
|
||||
# ================================================
|
||||
# AgentFinishAction
|
||||
# ================================================
|
||||
elif tool_call.function.name == FinishTool['function']['name']:
|
||||
action = AgentFinishAction()
|
||||
|
||||
# ================================================
|
||||
# LLMBasedFileEditTool (LLM-based file editor, deprecated)
|
||||
# ================================================
|
||||
elif tool_call.function.name == LLMBasedFileEditTool['function']['name']:
|
||||
if 'path' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
@@ -138,12 +159,25 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
impl_source=FileEditSource.OH_ACI,
|
||||
**other_kwargs,
|
||||
)
|
||||
# ================================================
|
||||
# AgentThinkAction
|
||||
# ================================================
|
||||
elif tool_call.function.name == ThinkTool['function']['name']:
|
||||
action = AgentThinkAction(thought=arguments.get('thought', ''))
|
||||
|
||||
# ================================================
|
||||
# BrowserTool
|
||||
# ================================================
|
||||
elif tool_call.function.name == BrowserTool['function']['name']:
|
||||
if 'code' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "code" in tool call {tool_call.function.name}'
|
||||
)
|
||||
action = BrowseInteractiveAction(browser_actions=arguments['code'])
|
||||
|
||||
# ================================================
|
||||
# WebReadTool (simplified browsing)
|
||||
# ================================================
|
||||
elif tool_call.function.name == WebReadTool['function']['name']:
|
||||
if 'url' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
@@ -183,7 +217,7 @@ def get_tools(
|
||||
codeact_enable_llm_editor: bool = False,
|
||||
codeact_enable_jupyter: bool = False,
|
||||
) -> list[ChatCompletionToolParam]:
|
||||
tools = [CmdRunTool, FinishTool]
|
||||
tools = [CmdRunTool, ThinkTool, FinishTool]
|
||||
if codeact_enable_browsing:
|
||||
tools.append(WebReadTool)
|
||||
tools.append(BrowserTool)
|
||||
|
||||
@@ -4,6 +4,7 @@ from .finish import FinishTool
|
||||
from .ipython import IPythonTool
|
||||
from .llm_based_edit import LLMBasedFileEditTool
|
||||
from .str_replace_editor import StrReplaceEditorTool
|
||||
from .think import ThinkTool
|
||||
from .web_read import WebReadTool
|
||||
|
||||
__all__ = [
|
||||
@@ -14,4 +15,5 @@ __all__ = [
|
||||
'LLMBasedFileEditTool',
|
||||
'StrReplaceEditorTool',
|
||||
'WebReadTool',
|
||||
'ThinkTool',
|
||||
]
|
||||
|
||||
27
openhands/agenthub/codeact_agent/tools/think.py
Normal file
27
openhands/agenthub/codeact_agent/tools/think.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_THINK_DESCRIPTION = """Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed.
|
||||
|
||||
Common use cases:
|
||||
1. When exploring a repository and discovering the source of a bug, call this tool to brainstorm several unique ways of fixing the bug, and assess which change(s) are likely to be simplest and most effective.
|
||||
2. After receiving test results, use this tool to brainstorm ways to fix failing tests.
|
||||
3. When planning a complex refactoring, use this tool to outline different approaches and their tradeoffs.
|
||||
4. When designing a new feature, use this tool to think through architecture decisions and implementation details.
|
||||
5. When debugging a complex issue, use this tool to organize your thoughts and hypotheses.
|
||||
|
||||
The tool simply logs your thought process for better transparency and does not execute any code or make changes."""
|
||||
|
||||
ThinkTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='think',
|
||||
description=_THINK_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'thought': {'type': 'string', 'description': 'The thought to log.'},
|
||||
},
|
||||
'required': ['thought'],
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -7,6 +7,7 @@ from openhands.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
AgentThinkAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
@@ -19,6 +20,7 @@ from openhands.events.event import Event
|
||||
from openhands.events.observation import (
|
||||
AgentCondensationObservation,
|
||||
AgentDelegateObservation,
|
||||
AgentThinkObservation,
|
||||
BrowserOutputObservation,
|
||||
CmdOutputObservation,
|
||||
FileEditObservation,
|
||||
@@ -151,6 +153,7 @@ def get_action_message(
|
||||
FileReadAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
AgentThinkAction,
|
||||
),
|
||||
) or (isinstance(action, CmdRunAction) and action.source == 'agent'):
|
||||
tool_metadata = action.tool_call_metadata
|
||||
@@ -323,6 +326,9 @@ def get_observation_message(
|
||||
max_message_chars,
|
||||
)
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, AgentThinkObservation):
|
||||
text = truncate_content(obs.content, max_message_chars)
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
text = truncate_content(obs.content, max_message_chars)
|
||||
text += '\n[Error occurred in processing last action]'
|
||||
|
||||
@@ -44,6 +44,10 @@ class ActionTypeSchema(BaseModel):
|
||||
"""Delegates a task to another agent.
|
||||
"""
|
||||
|
||||
THINK: str = Field(default='think')
|
||||
"""Logs a thought.
|
||||
"""
|
||||
|
||||
FINISH: str = Field(default='finish')
|
||||
"""If you're absolutely certain that you've completed your task and have tested your work,
|
||||
use the finish action to stop working.
|
||||
|
||||
@@ -40,6 +40,8 @@ class ObservationTypeSchema(BaseModel):
|
||||
|
||||
NULL: str = Field(default='null')
|
||||
|
||||
THINK: str = Field(default='think')
|
||||
|
||||
AGENT_STATE_CHANGED: str = Field(default='agent_state_changed')
|
||||
|
||||
USER_REJECTED: str = Field(default='user_rejected')
|
||||
|
||||
@@ -4,6 +4,7 @@ from openhands.events.action.agent import (
|
||||
AgentFinishAction,
|
||||
AgentRejectAction,
|
||||
AgentSummarizeAction,
|
||||
AgentThinkAction,
|
||||
ChangeAgentStateAction,
|
||||
)
|
||||
from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction
|
||||
@@ -33,4 +34,5 @@ __all__ = [
|
||||
'IPythonRunCellAction',
|
||||
'MessageAction',
|
||||
'ActionConfirmationStatus',
|
||||
'AgentThinkAction',
|
||||
]
|
||||
|
||||
@@ -54,6 +54,23 @@ class AgentFinishAction(Action):
|
||||
return "All done! What's next on the agenda?"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentThinkAction(Action):
|
||||
"""An action where the agent logs a thought.
|
||||
|
||||
Attributes:
|
||||
thought (str): The agent's explanation of its actions.
|
||||
action (str): The action type, namely ActionType.THINK.
|
||||
"""
|
||||
|
||||
thought: str = ''
|
||||
action: str = ActionType.THINK
|
||||
|
||||
@property
|
||||
def message(self) -> str:
|
||||
return f'I am thinking...: {self.thought}'
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentRejectAction(Action):
|
||||
outputs: dict = field(default_factory=dict)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from openhands.events.observation.agent import (
|
||||
AgentCondensationObservation,
|
||||
AgentStateChangedObservation,
|
||||
AgentThinkObservation,
|
||||
)
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
@@ -9,7 +10,9 @@ from openhands.events.observation.commands import (
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
from openhands.events.observation.delegate import AgentDelegateObservation
|
||||
from openhands.events.observation.empty import NullObservation
|
||||
from openhands.events.observation.empty import (
|
||||
NullObservation,
|
||||
)
|
||||
from openhands.events.observation.error import ErrorObservation
|
||||
from openhands.events.observation.files import (
|
||||
FileEditObservation,
|
||||
@@ -23,6 +26,7 @@ from openhands.events.observation.success import SuccessObservation
|
||||
__all__ = [
|
||||
'Observation',
|
||||
'NullObservation',
|
||||
'AgentThinkObservation',
|
||||
'CmdOutputObservation',
|
||||
'CmdOutputMetadata',
|
||||
'IPythonRunCellObservation',
|
||||
|
||||
@@ -25,3 +25,18 @@ class AgentCondensationObservation(Observation):
|
||||
@property
|
||||
def message(self) -> str:
|
||||
return self.content
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentThinkObservation(Observation):
|
||||
"""The output of a think action.
|
||||
|
||||
In practice, this is a no-op, since it will just reply a static message to the agent
|
||||
acknowledging that the thought has been logged.
|
||||
"""
|
||||
|
||||
observation: str = ObservationType.THINK
|
||||
|
||||
@property
|
||||
def message(self) -> str:
|
||||
return self.content
|
||||
|
||||
@@ -6,6 +6,7 @@ from openhands.events.action.agent import (
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
AgentRejectAction,
|
||||
AgentThinkAction,
|
||||
ChangeAgentStateAction,
|
||||
)
|
||||
from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction
|
||||
@@ -30,6 +31,7 @@ actions = (
|
||||
FileReadAction,
|
||||
FileWriteAction,
|
||||
FileEditAction,
|
||||
AgentThinkAction,
|
||||
AgentFinishAction,
|
||||
AgentRejectAction,
|
||||
AgentDelegateAction,
|
||||
|
||||
@@ -3,6 +3,7 @@ import copy
|
||||
from openhands.events.observation.agent import (
|
||||
AgentCondensationObservation,
|
||||
AgentStateChangedObservation,
|
||||
AgentThinkObservation,
|
||||
)
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
@@ -11,7 +12,9 @@ from openhands.events.observation.commands import (
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
from openhands.events.observation.delegate import AgentDelegateObservation
|
||||
from openhands.events.observation.empty import NullObservation
|
||||
from openhands.events.observation.empty import (
|
||||
NullObservation,
|
||||
)
|
||||
from openhands.events.observation.error import ErrorObservation
|
||||
from openhands.events.observation.files import (
|
||||
FileEditObservation,
|
||||
@@ -36,6 +39,7 @@ observations = (
|
||||
AgentStateChangedObservation,
|
||||
UserRejectObservation,
|
||||
AgentCondensationObservation,
|
||||
AgentThinkObservation,
|
||||
)
|
||||
|
||||
OBSERVATION_TYPE_TO_CLASS = {
|
||||
|
||||
@@ -22,6 +22,7 @@ from openhands.events import EventSource, EventStream, EventStreamSubscriber
|
||||
from openhands.events.action import (
|
||||
Action,
|
||||
ActionConfirmationStatus,
|
||||
AgentThinkAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
@@ -31,6 +32,7 @@ from openhands.events.action import (
|
||||
)
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import (
|
||||
AgentThinkObservation,
|
||||
CmdOutputObservation,
|
||||
ErrorObservation,
|
||||
FileReadObservation,
|
||||
@@ -381,6 +383,8 @@ class Runtime(FileEditRuntimeMixin):
|
||||
If the action is not supported by the current runtime, an ErrorObservation is returned.
|
||||
"""
|
||||
if not action.runnable:
|
||||
if isinstance(action, AgentThinkAction):
|
||||
return AgentThinkObservation('Your thought has been logged.')
|
||||
return NullObservation('')
|
||||
if (
|
||||
hasattr(action, 'confirmation_state')
|
||||
|
||||
@@ -16,6 +16,7 @@ from openhands.core.exceptions import (
|
||||
from openhands.events import EventStream
|
||||
from openhands.events.action import (
|
||||
ActionConfirmationStatus,
|
||||
AgentThinkAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
@@ -27,6 +28,7 @@ from openhands.events.action import (
|
||||
from openhands.events.action.action import Action
|
||||
from openhands.events.action.files import FileEditSource
|
||||
from openhands.events.observation import (
|
||||
AgentThinkObservation,
|
||||
ErrorObservation,
|
||||
NullObservation,
|
||||
Observation,
|
||||
@@ -230,6 +232,8 @@ class ActionExecutionClient(Runtime):
|
||||
|
||||
with self.action_semaphore:
|
||||
if not action.runnable:
|
||||
if isinstance(action, AgentThinkAction):
|
||||
return AgentThinkObservation('Your thought has been logged.')
|
||||
return NullObservation('')
|
||||
if (
|
||||
hasattr(action, 'confirmation_state')
|
||||
|
||||
Reference in New Issue
Block a user