integrate LocAgent into OpenHands (#7371)

Co-authored-by: czlll <gangda@huaihe.usc.edu> Co-authored-by: Hoang Tran <descience.thh10@gmail.com>
2026-03-22 13:47:19 +08:00 · 2025-05-23 23:42:58 +08:00
parent fa5b52298e
commit efe287ce34
17 changed files with 2180 additions and 84 deletions
--- a/openhands/agenthub/loc_agent/function_calling.py
+++ b/openhands/agenthub/loc_agent/function_calling.py
@@ -0,0 +1,126 @@
+"""This file contains the function calling implementation for different actions.
+
+This is similar to the functionality of `CodeActResponseParser`.
+"""
+
+import json
+
+
+from litellm import (
+    ChatCompletionToolParam,
+    ModelResponse,
+)
+
+from openhands.agenthub.codeact_agent.tools import FinishTool
+from openhands.agenthub.codeact_agent.function_calling import combine_thought
+from openhands.agenthub.loc_agent.tools import (
+    SearchEntityTool,
+    SearchRepoTool,
+    create_explore_tree_structure_tool,
+)
+from openhands.core.exceptions import (
+    FunctionCallNotExistsError,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.events.action import (
+    Action,
+    AgentFinishAction,
+    IPythonRunCellAction,
+    MessageAction,
+)
+from openhands.events.tool import ToolCallMetadata
+
+
+def response_to_actions(
+    response: ModelResponse, mcp_tool_names: list[str] | None = None,
+) -> list[Action]:
+    actions: list[Action] = []
+    assert len(response.choices) == 1, 'Only one choice is supported for now'
+    choice = response.choices[0]
+    assistant_msg = choice.message
+
+    if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
+        # Check if there's assistant_msg.content. If so, add it to the thought
+        thought = ''
+        if isinstance(assistant_msg.content, str):
+            thought = assistant_msg.content
+        elif isinstance(assistant_msg.content, list):
+            for msg in assistant_msg.content:
+                if msg['type'] == 'text':
+                    thought += msg['text']
+
+        # Process each tool call to OpenHands action
+        for i, tool_call in enumerate(assistant_msg.tool_calls):
+            action: Action
+            logger.debug(f'Tool call in function_calling.py: {tool_call}')
+            try:
+                arguments = json.loads(tool_call.function.arguments)
+            except json.decoder.JSONDecodeError as e:
+                raise RuntimeError(
+                    f'Failed to parse tool call arguments: {tool_call.function.arguments}'
+                ) from e
+
+            # ================================================
+            # LocAgent's Tools
+            # ================================================
+            ALL_FUNCTIONS = [
+                'explore_tree_structure',
+                'search_code_snippets',
+                'get_entity_contents',
+            ]
+            if tool_call.function.name in ALL_FUNCTIONS:
+                # We implement this in agent_skills, which can be used via Jupyter
+                func_name = tool_call.function.name
+                code = f'print({func_name}(**{arguments}))'
+                logger.debug(f'TOOL CALL: {func_name} with code: {code}')
+                action = IPythonRunCellAction(code=code)
+
+            # ================================================
+            # AgentFinishAction
+            # ================================================
+            elif tool_call.function.name == FinishTool['function']['name']:
+                action = AgentFinishAction(
+                    final_thought=arguments.get('message', ''),
+                    task_completed=arguments.get('task_completed', None),
+                )
+            else:
+                raise FunctionCallNotExistsError(
+                    f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
+                )
+        
+            # We only add thought to the first action
+            if i == 0:
+                action = combine_thought(action, thought)
+            # Add metadata for tool calling
+            action.tool_call_metadata = ToolCallMetadata(
+                tool_call_id=tool_call.id,
+                function_name=tool_call.function.name,
+                model_response=response,
+                total_calls_in_response=len(assistant_msg.tool_calls),
+            )
+            actions.append(action)
+    else:
+        actions.append(
+            MessageAction(
+                content=str(assistant_msg.content) if assistant_msg.content else '',
+                wait_for_response=True,
+            )
+        )
+    
+    # Add response id to actions
+    # This will ensure we can match both actions without tool calls (e.g. MessageAction)
+    # and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
+    # with the token usage data
+    for action in actions:
+        action.response_id = response.id
+
+    assert len(actions) >= 1
+    return actions
+        
+
+def get_tools() -> list[ChatCompletionToolParam]:
+    tools = [FinishTool]
+    tools.append(SearchRepoTool)
+    tools.append(SearchEntityTool)
+    tools.append(create_explore_tree_structure_tool(use_simplified_description=True))
+    return tools