[agent] Read-only Agent (#6947)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Xingyao Wang 2025-04-18 14:46:24 -04:00 committed by GitHub
parent c6092291ce
commit 91c691d526
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1014 additions and 21 deletions

View File

@ -7,6 +7,7 @@ from openhands.agenthub import ( # noqa: E402
browsing_agent,
codeact_agent,
dummy_agent,
readonly_agent,
visualbrowsing_agent,
)
from openhands.controller.agent import Agent # noqa: E402
@ -17,4 +18,5 @@ __all__ = [
'dummy_agent',
'browsing_agent',
'visualbrowsing_agent',
'readonly_agent',
]

View File

@ -87,6 +87,8 @@ class CodeActAgent(Agent):
self.condenser = Condenser.from_config(self.config.condenser)
logger.debug(f'Using condenser: {type(self.condenser)}')
self.response_to_actions_fn = codeact_function_calling.response_to_actions
def reset(self) -> None:
"""Resets the CodeAct Agent."""
super().reset()
@ -152,7 +154,7 @@ class CodeActAgent(Agent):
params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
response = self.llm.completion(**params)
logger.debug(f'Response from LLM: {response}')
actions = codeact_function_calling.response_to_actions(response)
actions = self.response_to_actions_fn(response)
logger.debug(f'Actions after response_to_actions: {actions}')
for action in actions:
self.pending_actions.append(action)

View File

@ -0,0 +1,4 @@
from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent
from openhands.controller.agent import Agent
Agent.register('ReadOnlyAgent', ReadOnlyAgent)

View File

@ -0,0 +1,244 @@
"""This file contains the function calling implementation for different actions.
This is similar to the functionality of `CodeActResponseParser`.
"""
import json
import shlex
from litellm import (
ChatCompletionToolParam,
ModelResponse,
)
from openhands.agenthub.codeact_agent.function_calling import (
combine_thought,
)
from openhands.agenthub.codeact_agent.tools import (
FinishTool,
ThinkTool,
WebReadTool,
)
from openhands.agenthub.readonly_agent.tools import (
GlobTool,
GrepTool,
ViewTool,
)
from openhands.core.exceptions import (
FunctionCallNotExistsError,
FunctionCallValidationError,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
Action,
AgentFinishAction,
AgentThinkAction,
BrowseURLAction,
CmdRunAction,
FileReadAction,
MessageAction,
)
from openhands.events.event import FileReadSource
from openhands.events.tool import ToolCallMetadata
def grep_to_cmdrun(
pattern: str, path: str | None = None, include: str | None = None
) -> str:
"""Convert grep tool arguments to a shell command string.
Args:
pattern: The regex pattern to search for in file contents
path: The directory to search in (optional)
include: Optional file pattern to filter which files to search (e.g., "*.js")
Returns:
A properly escaped shell command string for ripgrep
"""
# Use shlex.quote to properly escape all shell special characters
quoted_pattern = shlex.quote(pattern)
path_arg = shlex.quote(path) if path else '.'
# Build ripgrep command
rg_cmd = f'rg -li {quoted_pattern} --sortr=modified'
if include:
quoted_include = shlex.quote(include)
rg_cmd += f' --glob {quoted_include}'
# Build the complete command
complete_cmd = f'{rg_cmd} {path_arg} | head -n 100'
# Add a header to the output
echo_cmd = f'echo "Below are the execution results of the search command: {complete_cmd}\n"; '
return echo_cmd + complete_cmd
def glob_to_cmdrun(pattern: str, path: str = '.') -> str:
"""Convert glob tool arguments to a shell command string.
Args:
pattern: The glob pattern to match files (e.g., "**/*.js")
path: The directory to search in (defaults to current directory)
Returns:
A properly escaped shell command string for ripgrep implementing glob
"""
# Use shlex.quote to properly escape all shell special characters
quoted_path = shlex.quote(path)
quoted_pattern = shlex.quote(pattern)
# Use ripgrep in a glob-only mode with -g flag and --files to list files
# This most closely matches the behavior of the NodeJS glob implementation
rg_cmd = f'rg --files {quoted_path} -g {quoted_pattern} --sortr=modified'
# Sort results and limit to 100 entries (matching the Node.js implementation)
sort_and_limit_cmd = ' | head -n 100'
complete_cmd = f'{rg_cmd}{sort_and_limit_cmd}'
# Add a header to the output
echo_cmd = f'echo "Below are the execution results of the glob command: {complete_cmd}\n"; '
return echo_cmd + complete_cmd
def response_to_actions(response: ModelResponse) -> list[Action]:
actions: list[Action] = []
assert len(response.choices) == 1, 'Only one choice is supported for now'
choice = response.choices[0]
assistant_msg = choice.message
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
thought = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
thought += msg['text']
# Process each tool call to OpenHands action
for i, tool_call in enumerate(assistant_msg.tool_calls):
action: Action
logger.debug(f'Tool call in function_calling.py: {tool_call}')
try:
arguments = json.loads(tool_call.function.arguments)
except json.decoder.JSONDecodeError as e:
raise RuntimeError(
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
) from e
# ================================================
# AgentFinishAction
# ================================================
if tool_call.function.name == FinishTool['function']['name']:
action = AgentFinishAction(
final_thought=arguments.get('message', ''),
task_completed=arguments.get('task_completed', None),
)
# ================================================
# ViewTool (ACI-based file viewer, READ-ONLY)
# ================================================
elif tool_call.function.name == ViewTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
action = FileReadAction(
path=arguments['path'],
impl_source=FileReadSource.OH_ACI,
view_range=arguments.get('view_range', None),
)
# ================================================
# AgentThinkAction
# ================================================
elif tool_call.function.name == ThinkTool['function']['name']:
action = AgentThinkAction(thought=arguments.get('thought', ''))
# ================================================
# GrepTool (file content search)
# ================================================
elif tool_call.function.name == GrepTool['function']['name']:
if 'pattern' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "pattern" in tool call {tool_call.function.name}'
)
pattern = arguments['pattern']
path = arguments.get('path')
include = arguments.get('include')
grep_cmd = grep_to_cmdrun(pattern, path, include)
action = CmdRunAction(command=grep_cmd, is_input=False)
# ================================================
# GlobTool (file pattern matching)
# ================================================
elif tool_call.function.name == GlobTool['function']['name']:
if 'pattern' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "pattern" in tool call {tool_call.function.name}'
)
pattern = arguments['pattern']
path = arguments.get('path', '.')
glob_cmd = glob_to_cmdrun(pattern, path)
action = CmdRunAction(command=glob_cmd, is_input=False)
# ================================================
# WebReadTool (simplified browsing)
# ================================================
elif tool_call.function.name == WebReadTool['function']['name']:
if 'url' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "url" in tool call {tool_call.function.name}'
)
action = BrowseURLAction(url=arguments['url'])
else:
raise FunctionCallNotExistsError(
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
)
# We only add thought to the first action
if i == 0:
action = combine_thought(action, thought)
# Add metadata for tool calling
action.tool_call_metadata = ToolCallMetadata(
tool_call_id=tool_call.id,
function_name=tool_call.function.name,
model_response=response,
total_calls_in_response=len(assistant_msg.tool_calls),
)
actions.append(action)
else:
actions.append(
MessageAction(
content=str(assistant_msg.content) if assistant_msg.content else '',
wait_for_response=True,
)
)
# Add response id to actions
# This will ensure we can match both actions without tool calls (e.g. MessageAction)
# and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
# with the token usage data
for action in actions:
action.response_id = response.id
assert len(actions) >= 1
return actions
def get_tools() -> list[ChatCompletionToolParam]:
return [
ThinkTool,
FinishTool,
WebReadTool,
GrepTool,
GlobTool,
ViewTool,
]

View File

@ -0,0 +1,20 @@
{% if repository_info %}
<REPOSITORY_INFO>
At the user's request, repository {{ repository_info.repo_name }} has been cloned to the current working directory {{ repository_info.repo_directory }}.
</REPOSITORY_INFO>
{% endif %}
{% if repository_instructions -%}
<REPOSITORY_INSTRUCTIONS>
{{ repository_instructions }}
</REPOSITORY_INSTRUCTIONS>
{% endif %}
{% if runtime_info and (runtime_info.additional_agent_instructions or runtime_info.date) -%}
<RUNTIME_INFORMATION>
{% if runtime_info.additional_agent_instructions %}
{{ runtime_info.additional_agent_instructions }}
{% endif %}
{% if runtime_info.date %}
Today's date is {{ runtime_info.date }} (UTC).
{% endif %}
</RUNTIME_INFORMATION>
{% endif %}

View File

@ -0,0 +1,101 @@
Here's a running example of how to perform a task with the provided tools.
--------------------- START OF EXAMPLE ---------------------
USER: I need to find all TypeScript files in the project and then search for files that contain React components.
A: I'll help you find TypeScript files and React components. Let's break this down into steps:
1. First, let's use the glob tool to find all TypeScript files:
<function=glob>
<parameter=pattern>**/*.{ts,tsx}</parameter>
</function>
USER: EXECUTION RESULT of [glob]:
Below are the execution results of the glob command: rg --files . -g **/*.{ts,tsx} --sortr=modified | head -n 100
src/components/Header.tsx
src/components/Button.tsx
src/pages/Home.tsx
src/utils/types.ts
src/App.tsx
src/index.tsx
A: Great! Now let's search for files that likely contain React components by looking for patterns like "export" followed by "function" or "class":
<function=grep>
<parameter=pattern>export\s+(default\s+)?(function|class)</parameter>
<parameter=include>*.{ts,tsx}</parameter>
</function>
USER: EXECUTION RESULT of [grep]:
Below are the execution results of the search command: rg -li export\s+(default\s+)?(function|class) --sortr=modified --glob *.{ts,tsx} .
src/components/Header.tsx
src/components/Button.tsx
src/pages/Home.tsx
src/App.tsx
A: Let's examine one of these components to confirm it's a React component. I'll look at the Header component:
<function=view>
<parameter=path>src/components/Header.tsx</parameter>
</function>
USER: EXECUTION RESULT of [view]:
1 import React from 'react';
2
3 interface HeaderProps {
4 title: string;
5 subtitle?: string;
6 }
7
8 export function Header({ title, subtitle }: HeaderProps) {
9 return (
10 <header className="app-header">
11 <h1>{title}</h1>
12 {subtitle && <h2>{subtitle}</h2>}
13 </header>
14 );
15 }
A: Let me also check the Button component to see its implementation:
<function=view>
<parameter=path>src/components/Button.tsx</parameter>
</function>
USER: EXECUTION RESULT of [view]:
1 import React from 'react';
2
3 interface ButtonProps {
4 onClick: () => void;
5 children: React.ReactNode;
6 variant?: 'primary' | 'secondary';
7 }
8
9 export default function Button({ onClick, children, variant = 'primary' }: ButtonProps) {
10 return (
11 <button
12 className={`btn btn-${variant}`}
13 onClick={onClick}
14 >
15 {children}
16 </button>
17 );
18 }
A: I've found several React components in the project. Here's a summary:
1. Found TypeScript files using glob pattern `**/*.{ts,tsx}`
2. Identified potential React components using grep pattern `export\s+(default\s+)?(function|class)`
3. Confirmed two components:
- `Header.tsx`: A function component that renders a header with title and optional subtitle
- `Button.tsx`: A reusable button component with primary/secondary variants
Would you like me to examine any other components or search for specific features within these components?
<function=finish>
</function>
--------------------- END OF EXAMPLE ---------------------
Do NOT assume the environment is the same as in the example above.
--------------------- NEW TASK DESCRIPTION ---------------------

View File

@ -0,0 +1,3 @@
--------------------- END OF NEW TASK DESCRIPTION ---------------------
PLEASE follow the format strictly! PLEASE EMIT ONE AND ONLY ONE FUNCTION CALL PER MESSAGE.

View File

@ -0,0 +1,8 @@
{% for agent_info in triggered_agents %}
<EXTRA_INFO>
The following information has been included based on a keyword match for "{{ agent_info.trigger }}".
It may or may not be relevant to the user's request.
{{ agent_info.content }}
</EXTRA_INFO>
{% endfor %}

View File

@ -0,0 +1,34 @@
You are OpenHands ReadOnlyAgent, a helpful AI assistant focused on code analysis and exploration. You can:
- Explore and analyze codebases
- Browse the web for relevant information
- Plan potential changes
- Answer questions about code
<CAPABILITIES>
✓ READ-ONLY TOOLS:
- view: Read file contents
- grep: Search for patterns
- glob: List matching files
- think: Analyze information
- web_read: Access web resources
- finish: Complete current task
✗ RESTRICTIONS:
- Cannot modify any files
- Cannot execute state-changing commands
</CAPABILITIES>
<GUIDELINES>
1. When analyzing code or answering questions:
- Be thorough and methodical
- Prioritize accuracy over speed
- Provide detailed explanations
2. For file operations:
- Always verify file locations before accessing
- Don't assume paths are relative to current directory
3. If asked to make changes:
- Explain you are read-only
- Recommend using CodeActAgent instead
</GUIDELINES>

View File

@ -0,0 +1,70 @@
"""
ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools.
"""
import os
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.agenthub.readonly_agent import (
function_calling as readonly_function_calling,
)
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm import LLM
from openhands.utils.prompt import PromptManager
class ReadOnlyAgent(CodeActAgent):
VERSION = '1.0'
"""
The ReadOnlyAgent is a specialized version of CodeActAgent that only uses read-only tools.
This agent is designed for safely exploring codebases without making any changes.
It only has access to tools that don't modify the system: grep, glob, view, think, finish, web_read.
Use this agent when you want to:
1. Explore a codebase to understand its structure
2. Search for specific patterns or code
3. Research without making any changes
When you're ready to make changes, switch to the regular CodeActAgent.
"""
def __init__(
self,
llm: LLM,
config: AgentConfig,
) -> None:
"""Initializes a new instance of the ReadOnlyAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for this agent
"""
# Initialize the CodeActAgent class but we'll override some of its behavior
super().__init__(llm, config)
# Override the tools to only include read-only tools
# Get the read-only tools from our own function_calling module
self.tools = readonly_function_calling.get_tools()
# Set up our own prompt manager
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
self.response_to_actions_fn = readonly_function_calling.response_to_actions
logger.debug(
f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
)
def set_mcp_tools(self, mcp_tools: list[dict]) -> None:
"""Sets the list of MCP tools for the agent.
Args:
- mcp_tools (list[dict]): The list of MCP tools.
"""
logger.warning(
'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.'
)

View File

@ -0,0 +1,21 @@
"""Tools for the ReadOnlyAgent.
This module defines the read-only tools for the ReadOnlyAgent.
"""
from .glob import GlobTool
from .grep import GrepTool
from .view import ViewTool
__all__ = [
'ViewTool',
'GrepTool',
'GlobTool',
]
# Define the list of read-only tools
READ_ONLY_TOOLS = [
ViewTool,
GrepTool,
GlobTool,
]

View File

@ -0,0 +1,31 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
_GLOB_DESCRIPTION = """Fast file pattern matching tool.
* Supports glob patterns like "**/*.js" or "src/**/*.ts"
* Use this tool when you need to find files by name patterns
* Returns matching file paths sorted by modification time
* Only the first 100 results are returned. Consider narrowing your search with stricter glob patterns or provide path parameter if you need more results.
* When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead
"""
GlobTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='glob',
description=_GLOB_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'pattern': {
'type': 'string',
'description': 'The glob pattern to match files (e.g., "**/*.js", "src/**/*.ts")',
},
'path': {
'type': 'string',
'description': 'The directory (absolute path) to search in. Defaults to the current working directory.',
},
},
'required': ['pattern'],
},
),
)

View File

@ -0,0 +1,37 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
_GREP_DESCRIPTION = """Fast content search tool.
* Searches file contents using regular expressions
* Supports full regex syntax (eg. "log.*Error", "function\\s+\\w+", etc.)
* Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}")
* Returns matching file paths sorted by modification time.
* Only the first 100 results are returned. Consider narrowing your search with stricter regex patterns or provide path parameter if you need more results.
* Use this tool when you need to find files containing specific patterns
* When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead
"""
GrepTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='grep',
description=_GREP_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'pattern': {
'type': 'string',
'description': 'The regex pattern to search for in file contents',
},
'path': {
'type': 'string',
'description': 'The directory (absolute path) to search in. Defaults to the current working directory.',
},
'include': {
'type': 'string',
'description': 'Optional file pattern to filter which files to search (e.g., "*.js", "*.{ts,tsx}")',
},
},
'required': ['pattern'],
},
),
)

View File

@ -0,0 +1,34 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
_VIEW_DESCRIPTION = """Reads a file or list directories from the local filesystem.
* The path parameter must be an absolute path, not a relative path.
* If `path` is a file, `view` displays the result of applying `cat -n`; if `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep.
* You can optionally specify a line range to view (especially handy for long files), but it's recommended to read the whole file by not providing this parameter.
* For image files, the tool will display the image for you.
* For large files that exceed the display limit:
- The output will be truncated and marked with `<response clipped>`
- Use the `view_range` parameter to view specific sections after the truncation point
"""
ViewTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='view',
description=_VIEW_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'The absolute path to the file to read or directory to list',
},
'view_range': {
'description': 'Optional parameter of `view` command when `path` points to a *file*. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
'items': {'type': 'integer'},
'type': 'array',
},
},
'required': ['path'],
},
),
)

View File

@ -15,7 +15,7 @@ ENV POETRY_VIRTUALENVS_PATH=/openhands/poetry \
# Install base system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget curl ca-certificates sudo apt-utils git jq tmux build-essential \
wget curl ca-certificates sudo apt-utils git jq tmux build-essential ripgrep \
{%- if 'ubuntu' in base_image and (base_image.endswith(':latest') or base_image.endswith(':24.04')) -%}
libgl1 \
{%- else %}

View File

@ -0,0 +1,291 @@
"""Tests for the command helper functions in function_calling.py."""
from conftest import (
_close_test_runtime,
_load_runtime,
)
from openhands.agenthub.readonly_agent.function_calling import (
glob_to_cmdrun,
grep_to_cmdrun,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
def _run_cmd_action(runtime, custom_command: str):
action = CmdRunAction(command=custom_command)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
return obs
def test_grep_to_cmdrun_basic():
"""Test basic pattern with no special characters."""
cmd = grep_to_cmdrun('function', 'src')
assert 'rg -li function' in cmd
assert 'Below are the execution results' in cmd
# With include parameter
cmd = grep_to_cmdrun('error', 'src', '*.js')
assert 'rg -li error' in cmd
assert "--glob '*.js'" in cmd
assert 'Below are the execution results' in cmd
def test_grep_to_cmdrun_quotes(temp_dir, runtime_cls, run_as_openhands):
"""Test patterns with different types of quotes."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Double quotes in pattern
cmd = grep_to_cmdrun(r'const message = "Hello"', '/workspace')
assert 'rg -li' in cmd
# Verify command works by executing it on a test file
setup_cmd = 'echo \'const message = "Hello";\' > /workspace/test_quotes.js'
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0
assert '/workspace/test_quotes.js' in obs.content
# Single quotes in pattern
cmd = grep_to_cmdrun("function\\('test'\\)", '/workspace')
assert 'rg -li' in cmd
setup_cmd = 'echo "function(\'test\') {}" > /workspace/test_quotes2.js'
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0
assert '/workspace/test_quotes2.js' in obs.content
finally:
_close_test_runtime(runtime)
def test_grep_to_cmdrun_special_chars(runtime_cls, run_as_openhands, temp_dir):
"""Test patterns with special shell characters."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create test directory and files with special pattern content
setup_cmd = """
mkdir -p /workspace/test_special_patterns && \
echo "testing x && y || z pattern" > /workspace/test_special_patterns/logical.txt && \
echo "function() { return x; }" > /workspace/test_special_patterns/function.txt && \
echo "using \\$variable here" > /workspace/test_special_patterns/dollar.txt && \
echo "using \\`backticks\\` here" > /workspace/test_special_patterns/backticks.txt && \
echo "line with \\n newline chars" > /workspace/test_special_patterns/newline.txt && \
echo "matching *.js wildcard" > /workspace/test_special_patterns/wildcard.txt && \
echo "testing x > y redirection" > /workspace/test_special_patterns/redirect.txt && \
echo "testing a | b pipe" > /workspace/test_special_patterns/pipe.txt && \
echo "line with #comment" > /workspace/test_special_patterns/comment.txt && \
echo "CSS \\!important rule" > /workspace/test_special_patterns/bang.txt
"""
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0, 'Failed to set up test files'
special_patterns = [
r'x && y \|\| z', # Shell logical operators (escaping pipe)
r'function\(\) \{ return x; \}', # Properly escaped braces and parentheses
r'\$variable', # Dollar sign
# r"`backticks`", # Backticks
r'\\n newline', # Escaped characters
r'\*\.js', # Wildcards (escaped)
r'x > y', # Redirection
r'a \| b', # Pipe (escaped)
r'#comment', # Hash
# r"!important", # Bang
]
for pattern in special_patterns:
# Generate the grep command using our helper function
cmd = grep_to_cmdrun(pattern, '/workspace/test_special_patterns')
assert 'rg -li' in cmd
assert 'Below are the execution results of the search command:' in cmd
# Execute the command
obs = _run_cmd_action(runtime, cmd)
# Verify the command executed successfully
assert 'command not found' not in obs.content
assert 'syntax error' not in obs.content
assert 'unexpected' not in obs.content
# Check that the pattern was found in the appropriate file
if '&&' in pattern:
assert 'logical.txt' in obs.content
elif 'function' in pattern:
assert 'function.txt' in obs.content
elif '$variable' in pattern:
assert 'dollar.txt' in obs.content
# elif "backticks" in pattern:
# assert "backticks.txt" in obs.content
elif '\\n newline' in pattern:
assert 'newline.txt' in obs.content
elif '*' in pattern:
assert 'wildcard.txt' in obs.content
elif '>' in pattern:
assert 'redirect.txt' in obs.content
elif '|' in pattern:
assert 'pipe.txt' in obs.content
elif '#comment' in pattern:
assert 'comment.txt' in obs.content
# elif "!important" in pattern:
# assert "bang.txt" in obs.content
finally:
_close_test_runtime(runtime)
def test_grep_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
"""Test paths with spaces and special characters."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create test files with content in paths with spaces
setup_cmd = """
mkdir -p "src/my project" "test files/unit tests" "src/special$chars" "path with spaces and $pecial ch@rs" && \
echo "function searchablePattern() { return true; }" > "src/my project/test.js" && \
echo "function testFunction() { return 42; }" > "test files/unit tests/test.js" && \
echo "function specialFunction() { return null; }" > "src/special$chars/test.js" && \
echo "function weirdFunction() { return []; }" > "path with spaces and $pecial ch@rs/test.js"
"""
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0, 'Failed to set up test files'
special_paths = [
'src/my project',
'test files/unit tests',
]
for path in special_paths:
# Generate grep command and execute it
cmd = grep_to_cmdrun('function', path)
assert 'rg -li' in cmd
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0, f'Grep command failed for path: {path}'
assert (
'function' in obs.content
), f'Expected pattern not found in output for path: {path}'
# Verify the actual file was found
if path == 'src/my project':
assert 'src/my project/test.js' in obs.content
elif path == 'test files/unit tests':
assert 'test files/unit tests/test.js' in obs.content
finally:
_close_test_runtime(runtime)
def test_glob_to_cmdrun_basic():
"""Test basic glob patterns."""
cmd = glob_to_cmdrun('*.js', 'src')
assert "rg --files src -g '*.js'" in cmd
assert 'head -n 100' in cmd
assert 'echo "Below are the execution results of the glob command:' in cmd
# Default path
cmd = glob_to_cmdrun('*.py')
assert "rg --files . -g '*.py'" in cmd
assert 'head -n 100' in cmd
assert 'echo "Below are the execution results of the glob command:' in cmd
def test_glob_to_cmdrun_special_patterns(runtime_cls, run_as_openhands, temp_dir):
"""Test glob patterns with special characters."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create test files matching the patterns we'll test
setup_cmd = r"""
mkdir -p src/components src/utils && \
touch src/file1.js src/file2.js src/file9.js && \
touch src/components/comp.jsx src/components/comp.tsx && \
touch src/$special-file.js && \
touch src/temp1.js src/temp2.js && \
touch src/file.js src/file.ts src/file.jsx && \
touch "src/weird\`file\`.js" && \
touch "src/file with spaces.js"
"""
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0, 'Failed to set up test files'
special_patterns = [
'**/*.js', # Double glob
'**/{*.jsx,*.tsx}', # Braces
'file[0-9].js', # Character class
'temp?.js', # Single character wildcard
'file.{js,ts,jsx}', # Multiple extensions
'file with spaces.js', # Spaces
]
for pattern in special_patterns:
cmd = glob_to_cmdrun(pattern, 'src')
logger.info(f'Command: {cmd}')
# Execute the command
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0, f'Glob command failed for pattern: {pattern}'
# Verify expected files are found
if pattern == '**/*.js':
assert 'file1.js' in obs.content
assert 'file2.js' in obs.content
elif pattern == '**/{*.jsx,*.tsx}':
assert 'comp.jsx' in obs.content
assert 'comp.tsx' in obs.content
elif pattern == 'file[0-9].js':
assert 'file1.js' in obs.content
assert 'file2.js' in obs.content
assert 'file9.js' in obs.content
elif pattern == 'temp?.js':
assert 'temp1.js' in obs.content
assert 'temp2.js' in obs.content
elif pattern == 'file.{js,ts,jsx}':
assert 'file.js' in obs.content
assert 'file.ts' in obs.content
assert 'file.jsx' in obs.content
elif pattern == 'file with spaces.js':
assert 'file with spaces.js' in obs.content
finally:
_close_test_runtime(runtime)
def test_glob_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
"""Test paths with spaces and special characters for glob command."""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create test directories with spaces and special characters
setup_cmd = """
mkdir -p "project files/src" "test results/unit tests" "weird$path/code" "path with spaces and $pecial ch@rs" && \
touch "project files/src/file1.js" "project files/src/file2.js" && \
touch "test results/unit tests/test1.js" "test results/unit tests/test2.js" && \
touch "weird$path/code/weird1.js" "weird$path/code/weird2.js" && \
touch "path with spaces and $pecial ch@rs/special1.js" "path with spaces and $pecial ch@rs/special2.js"
"""
obs = _run_cmd_action(runtime, setup_cmd)
assert obs.exit_code == 0, 'Failed to set up test files'
special_paths = [
'project files/src',
'test results/unit tests',
]
for path in special_paths:
cmd = glob_to_cmdrun('*.js', path)
# Execute the command
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0, f'Glob command failed for path: {path}'
# Verify expected files are found in each path
if path == 'project files/src':
assert 'file1.js' in obs.content
assert 'file2.js' in obs.content
elif path == 'test results/unit tests':
assert 'test1.js' in obs.content
assert 'test2.js' in obs.content
finally:
_close_test_runtime(runtime)

View File

@ -1,3 +1,4 @@
from typing import Union
from unittest.mock import Mock
import pytest
@ -5,19 +6,35 @@ from litellm import ChatCompletionMessageToolCall
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.agenthub.codeact_agent.function_calling import (
get_tools as codeact_get_tools,
)
from openhands.agenthub.codeact_agent.function_calling import (
response_to_actions as codeact_response_to_actions,
)
from openhands.agenthub.codeact_agent.tools import (
BrowserTool,
IPythonTool,
LLMBasedFileEditTool,
ThinkTool,
WebReadTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
get_tools,
response_to_actions,
)
from openhands.agenthub.codeact_agent.tools.browser import (
_BROWSER_DESCRIPTION,
_BROWSER_TOOL_DESCRIPTION,
)
from openhands.agenthub.readonly_agent.function_calling import (
get_tools as readonly_get_tools,
)
from openhands.agenthub.readonly_agent.function_calling import (
response_to_actions as readonly_response_to_actions,
)
from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent
from openhands.agenthub.readonly_agent.tools import (
GlobTool,
GrepTool,
)
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig, LLMConfig
from openhands.core.exceptions import FunctionCallNotExistsError
@ -35,10 +52,20 @@ from openhands.events.tool import ToolCallMetadata
from openhands.llm.llm import LLM
@pytest.fixture(params=['CodeActAgent', 'ReadOnlyAgent'])
def agent_class(request):
if request.param == 'CodeActAgent':
return CodeActAgent
else:
from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent
return ReadOnlyAgent
@pytest.fixture
def agent() -> CodeActAgent:
def agent(agent_class) -> Union[CodeActAgent, ReadOnlyAgent]:
config = AgentConfig()
agent = CodeActAgent(llm=LLM(LLMConfig()), config=config)
agent = agent_class(llm=LLM(LLMConfig()), config=config)
agent.llm = Mock()
agent.llm.config = Mock()
agent.llm.config.max_message_chars = 1000
@ -54,7 +81,7 @@ def mock_state() -> State:
return state
def test_reset(agent: CodeActAgent):
def test_reset(agent):
# Add some state
action = MessageAction(content='test')
action._source = EventSource.AGENT
@ -67,7 +94,7 @@ def test_reset(agent: CodeActAgent):
assert len(agent.pending_actions) == 0
def test_step_with_pending_actions(agent: CodeActAgent):
def test_step_with_pending_actions(agent):
# Add a pending action
pending_action = MessageAction(content='test')
pending_action._source = EventSource.AGENT
@ -79,8 +106,8 @@ def test_step_with_pending_actions(agent: CodeActAgent):
assert len(agent.pending_actions) == 0
def test_get_tools_default():
tools = get_tools(
def test_codeact_get_tools_default():
tools = codeact_get_tools(
enable_jupyter=True,
enable_llm_editor=True,
enable_browsing=True,
@ -95,9 +122,24 @@ def test_get_tools_default():
assert 'web_read' in tool_names
def test_get_tools_with_options():
def test_readonly_get_tools_default():
tools = readonly_get_tools()
assert len(tools) > 0
# Check required tools are present
tool_names = [tool['function']['name'] for tool in tools]
assert 'execute_bash' not in tool_names
assert 'execute_ipython_cell' not in tool_names
assert 'edit_file' not in tool_names
assert 'web_read' in tool_names
assert 'grep' in tool_names
assert 'glob' in tool_names
assert 'think' in tool_names
def test_codeact_get_tools_with_options():
# Test with all options enabled
tools = get_tools(
tools = codeact_get_tools(
enable_browsing=True,
enable_jupyter=True,
enable_llm_editor=True,
@ -108,7 +150,7 @@ def test_get_tools_with_options():
assert 'edit_file' in tool_names
# Test with all options disabled
tools = get_tools(
tools = codeact_get_tools(
enable_browsing=False,
enable_jupyter=False,
enable_llm_editor=False,
@ -162,7 +204,6 @@ def test_str_replace_editor_tool():
assert 'old_str' in properties
assert 'new_str' in properties
assert 'insert_line' in properties
assert 'view_range' in properties
assert StrReplaceEditorTool['function']['parameters']['required'] == [
'command',
@ -226,7 +267,9 @@ def test_response_to_actions_invalid_tool():
mock_response.choices[0].message.tool_calls[0].function.arguments = '{}'
with pytest.raises(FunctionCallNotExistsError):
response_to_actions(mock_response)
codeact_response_to_actions(mock_response)
with pytest.raises(FunctionCallNotExistsError):
readonly_response_to_actions(mock_response)
def test_step_with_no_pending_actions(mock_state: State):
@ -267,7 +310,10 @@ def test_step_with_no_pending_actions(mock_state: State):
assert action.content == 'Task completed'
def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
@pytest.mark.parametrize('agent_type', ['CodeActAgent', 'ReadOnlyAgent'])
def test_correct_tool_description_loaded_based_on_model_name(
agent_type, mock_state: State
):
"""Tests that the simplified tool descriptions are loaded for specific models."""
o3_mock_config = Mock()
o3_mock_config.model = 'mock_o3_model'
@ -275,7 +321,16 @@ def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
llm = Mock()
llm.config = o3_mock_config
agent = CodeActAgent(llm=llm, config=AgentConfig())
if agent_type == 'CodeActAgent':
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
agent_class = CodeActAgent
else:
from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent
agent_class = ReadOnlyAgent
agent = agent_class(llm=llm, config=AgentConfig())
for tool in agent.tools:
# Assert all descriptions have less than 1024 characters
assert len(tool['function']['description']) < 1024
@ -284,17 +339,20 @@ def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
sonnet_mock_config.model = 'mock_sonnet_model'
llm.config = sonnet_mock_config
agent = CodeActAgent(llm=llm, config=AgentConfig())
agent = agent_class(llm=llm, config=AgentConfig())
# Assert existence of the detailed tool descriptions that are longer than 1024 characters
assert any(len(tool['function']['description']) > 1024 for tool in agent.tools)
if agent_type == 'CodeActAgent':
# This only holds for CodeActAgent
assert any(len(tool['function']['description']) > 1024 for tool in agent.tools)
def test_mismatched_tool_call_events_and_auto_add_system_message(mock_state: State):
def test_mismatched_tool_call_events_and_auto_add_system_message(
agent, mock_state: State
):
"""Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages.
This also tests that the system message is automatically added to the event stream if SystemMessageAction is not present.
"""
agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())
tool_call_metadata = Mock(
spec=ToolCallMetadata,
@ -355,6 +413,39 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(mock_state: Sta
assert messages[0].role == 'system'
def test_grep_tool():
assert GrepTool['type'] == 'function'
assert GrepTool['function']['name'] == 'grep'
properties = GrepTool['function']['parameters']['properties']
assert 'pattern' in properties
assert 'path' in properties
assert 'include' in properties
assert GrepTool['function']['parameters']['required'] == ['pattern']
def test_glob_tool():
assert GlobTool['type'] == 'function'
assert GlobTool['function']['name'] == 'glob'
properties = GlobTool['function']['parameters']['properties']
assert 'pattern' in properties
assert 'path' in properties
assert GlobTool['function']['parameters']['required'] == ['pattern']
def test_think_tool():
assert ThinkTool['type'] == 'function'
assert ThinkTool['function']['name'] == 'think'
properties = ThinkTool['function']['parameters']['properties']
assert 'thought' in properties
assert ThinkTool['function']['parameters']['required'] == ['thought']
def test_enhance_messages_adds_newlines_between_consecutive_user_messages(
agent: CodeActAgent,
):