diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py index b625e18f50..49918510d1 100644 --- a/openhands/agenthub/__init__.py +++ b/openhands/agenthub/__init__.py @@ -7,6 +7,7 @@ from openhands.agenthub import ( # noqa: E402 browsing_agent, codeact_agent, dummy_agent, + readonly_agent, visualbrowsing_agent, ) from openhands.controller.agent import Agent # noqa: E402 @@ -17,4 +18,5 @@ __all__ = [ 'dummy_agent', 'browsing_agent', 'visualbrowsing_agent', + 'readonly_agent', ] diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 3001209704..ab75be6e39 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -87,6 +87,8 @@ class CodeActAgent(Agent): self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {type(self.condenser)}') + self.response_to_actions_fn = codeact_function_calling.response_to_actions + def reset(self) -> None: """Resets the CodeAct Agent.""" super().reset() @@ -152,7 +154,7 @@ class CodeActAgent(Agent): params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)} response = self.llm.completion(**params) logger.debug(f'Response from LLM: {response}') - actions = codeact_function_calling.response_to_actions(response) + actions = self.response_to_actions_fn(response) logger.debug(f'Actions after response_to_actions: {actions}') for action in actions: self.pending_actions.append(action) diff --git a/openhands/agenthub/readonly_agent/__init__.py b/openhands/agenthub/readonly_agent/__init__.py new file mode 100644 index 0000000000..0e91911887 --- /dev/null +++ b/openhands/agenthub/readonly_agent/__init__.py @@ -0,0 +1,4 @@ +from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent +from openhands.controller.agent import Agent + +Agent.register('ReadOnlyAgent', ReadOnlyAgent) diff --git a/openhands/agenthub/readonly_agent/function_calling.py b/openhands/agenthub/readonly_agent/function_calling.py new file mode 100644 index 0000000000..183c330b5b --- /dev/null +++ b/openhands/agenthub/readonly_agent/function_calling.py @@ -0,0 +1,244 @@ +"""This file contains the function calling implementation for different actions. + +This is similar to the functionality of `CodeActResponseParser`. +""" + +import json +import shlex + +from litellm import ( + ChatCompletionToolParam, + ModelResponse, +) + +from openhands.agenthub.codeact_agent.function_calling import ( + combine_thought, +) +from openhands.agenthub.codeact_agent.tools import ( + FinishTool, + ThinkTool, + WebReadTool, +) +from openhands.agenthub.readonly_agent.tools import ( + GlobTool, + GrepTool, + ViewTool, +) +from openhands.core.exceptions import ( + FunctionCallNotExistsError, + FunctionCallValidationError, +) +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import ( + Action, + AgentFinishAction, + AgentThinkAction, + BrowseURLAction, + CmdRunAction, + FileReadAction, + MessageAction, +) +from openhands.events.event import FileReadSource +from openhands.events.tool import ToolCallMetadata + + +def grep_to_cmdrun( + pattern: str, path: str | None = None, include: str | None = None +) -> str: + """Convert grep tool arguments to a shell command string. + + Args: + pattern: The regex pattern to search for in file contents + path: The directory to search in (optional) + include: Optional file pattern to filter which files to search (e.g., "*.js") + + Returns: + A properly escaped shell command string for ripgrep + """ + # Use shlex.quote to properly escape all shell special characters + quoted_pattern = shlex.quote(pattern) + path_arg = shlex.quote(path) if path else '.' + + # Build ripgrep command + rg_cmd = f'rg -li {quoted_pattern} --sortr=modified' + + if include: + quoted_include = shlex.quote(include) + rg_cmd += f' --glob {quoted_include}' + + # Build the complete command + complete_cmd = f'{rg_cmd} {path_arg} | head -n 100' + + # Add a header to the output + echo_cmd = f'echo "Below are the execution results of the search command: {complete_cmd}\n"; ' + return echo_cmd + complete_cmd + + +def glob_to_cmdrun(pattern: str, path: str = '.') -> str: + """Convert glob tool arguments to a shell command string. + + Args: + pattern: The glob pattern to match files (e.g., "**/*.js") + path: The directory to search in (defaults to current directory) + + Returns: + A properly escaped shell command string for ripgrep implementing glob + """ + # Use shlex.quote to properly escape all shell special characters + quoted_path = shlex.quote(path) + quoted_pattern = shlex.quote(pattern) + + # Use ripgrep in a glob-only mode with -g flag and --files to list files + # This most closely matches the behavior of the NodeJS glob implementation + rg_cmd = f'rg --files {quoted_path} -g {quoted_pattern} --sortr=modified' + + # Sort results and limit to 100 entries (matching the Node.js implementation) + sort_and_limit_cmd = ' | head -n 100' + + complete_cmd = f'{rg_cmd}{sort_and_limit_cmd}' + + # Add a header to the output + echo_cmd = f'echo "Below are the execution results of the glob command: {complete_cmd}\n"; ' + return echo_cmd + complete_cmd + + +def response_to_actions(response: ModelResponse) -> list[Action]: + actions: list[Action] = [] + assert len(response.choices) == 1, 'Only one choice is supported for now' + choice = response.choices[0] + assistant_msg = choice.message + if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls: + # Check if there's assistant_msg.content. If so, add it to the thought + thought = '' + if isinstance(assistant_msg.content, str): + thought = assistant_msg.content + elif isinstance(assistant_msg.content, list): + for msg in assistant_msg.content: + if msg['type'] == 'text': + thought += msg['text'] + + # Process each tool call to OpenHands action + for i, tool_call in enumerate(assistant_msg.tool_calls): + action: Action + logger.debug(f'Tool call in function_calling.py: {tool_call}') + try: + arguments = json.loads(tool_call.function.arguments) + except json.decoder.JSONDecodeError as e: + raise RuntimeError( + f'Failed to parse tool call arguments: {tool_call.function.arguments}' + ) from e + + # ================================================ + # AgentFinishAction + # ================================================ + if tool_call.function.name == FinishTool['function']['name']: + action = AgentFinishAction( + final_thought=arguments.get('message', ''), + task_completed=arguments.get('task_completed', None), + ) + + # ================================================ + # ViewTool (ACI-based file viewer, READ-ONLY) + # ================================================ + elif tool_call.function.name == ViewTool['function']['name']: + if 'path' not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "path" in tool call {tool_call.function.name}' + ) + action = FileReadAction( + path=arguments['path'], + impl_source=FileReadSource.OH_ACI, + view_range=arguments.get('view_range', None), + ) + + # ================================================ + # AgentThinkAction + # ================================================ + elif tool_call.function.name == ThinkTool['function']['name']: + action = AgentThinkAction(thought=arguments.get('thought', '')) + + # ================================================ + # GrepTool (file content search) + # ================================================ + elif tool_call.function.name == GrepTool['function']['name']: + if 'pattern' not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + + pattern = arguments['pattern'] + path = arguments.get('path') + include = arguments.get('include') + + grep_cmd = grep_to_cmdrun(pattern, path, include) + action = CmdRunAction(command=grep_cmd, is_input=False) + + # ================================================ + # GlobTool (file pattern matching) + # ================================================ + elif tool_call.function.name == GlobTool['function']['name']: + if 'pattern' not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + + pattern = arguments['pattern'] + path = arguments.get('path', '.') + + glob_cmd = glob_to_cmdrun(pattern, path) + action = CmdRunAction(command=glob_cmd, is_input=False) + + # ================================================ + # WebReadTool (simplified browsing) + # ================================================ + elif tool_call.function.name == WebReadTool['function']['name']: + if 'url' not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "url" in tool call {tool_call.function.name}' + ) + action = BrowseURLAction(url=arguments['url']) + + else: + raise FunctionCallNotExistsError( + f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.' + ) + + # We only add thought to the first action + if i == 0: + action = combine_thought(action, thought) + # Add metadata for tool calling + action.tool_call_metadata = ToolCallMetadata( + tool_call_id=tool_call.id, + function_name=tool_call.function.name, + model_response=response, + total_calls_in_response=len(assistant_msg.tool_calls), + ) + actions.append(action) + else: + actions.append( + MessageAction( + content=str(assistant_msg.content) if assistant_msg.content else '', + wait_for_response=True, + ) + ) + + # Add response id to actions + # This will ensure we can match both actions without tool calls (e.g. MessageAction) + # and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.) + # with the token usage data + for action in actions: + action.response_id = response.id + + assert len(actions) >= 1 + return actions + + +def get_tools() -> list[ChatCompletionToolParam]: + return [ + ThinkTool, + FinishTool, + WebReadTool, + GrepTool, + GlobTool, + ViewTool, + ] diff --git a/openhands/agenthub/readonly_agent/prompts/additional_info.j2 b/openhands/agenthub/readonly_agent/prompts/additional_info.j2 new file mode 100644 index 0000000000..f3b50b0a37 --- /dev/null +++ b/openhands/agenthub/readonly_agent/prompts/additional_info.j2 @@ -0,0 +1,20 @@ +{% if repository_info %} + +At the user's request, repository {{ repository_info.repo_name }} has been cloned to the current working directory {{ repository_info.repo_directory }}. + +{% endif %} +{% if repository_instructions -%} + +{{ repository_instructions }} + +{% endif %} +{% if runtime_info and (runtime_info.additional_agent_instructions or runtime_info.date) -%} + +{% if runtime_info.additional_agent_instructions %} +{{ runtime_info.additional_agent_instructions }} +{% endif %} +{% if runtime_info.date %} +Today's date is {{ runtime_info.date }} (UTC). +{% endif %} + +{% endif %} diff --git a/openhands/agenthub/readonly_agent/prompts/in_context_learning_example.j2 b/openhands/agenthub/readonly_agent/prompts/in_context_learning_example.j2 new file mode 100644 index 0000000000..c400791c90 --- /dev/null +++ b/openhands/agenthub/readonly_agent/prompts/in_context_learning_example.j2 @@ -0,0 +1,101 @@ +Here's a running example of how to perform a task with the provided tools. + +--------------------- START OF EXAMPLE --------------------- + +USER: I need to find all TypeScript files in the project and then search for files that contain React components. + +A: I'll help you find TypeScript files and React components. Let's break this down into steps: + +1. First, let's use the glob tool to find all TypeScript files: + +**/*.{ts,tsx} + + +USER: EXECUTION RESULT of [glob]: +Below are the execution results of the glob command: rg --files . -g **/*.{ts,tsx} --sortr=modified | head -n 100 + +src/components/Header.tsx +src/components/Button.tsx +src/pages/Home.tsx +src/utils/types.ts +src/App.tsx +src/index.tsx + +A: Great! Now let's search for files that likely contain React components by looking for patterns like "export" followed by "function" or "class": + +export\s+(default\s+)?(function|class) +*.{ts,tsx} + + +USER: EXECUTION RESULT of [grep]: +Below are the execution results of the search command: rg -li export\s+(default\s+)?(function|class) --sortr=modified --glob *.{ts,tsx} . + +src/components/Header.tsx +src/components/Button.tsx +src/pages/Home.tsx +src/App.tsx + +A: Let's examine one of these components to confirm it's a React component. I'll look at the Header component: + +src/components/Header.tsx + + +USER: EXECUTION RESULT of [view]: + 1 import React from 'react'; + 2 + 3 interface HeaderProps { + 4 title: string; + 5 subtitle?: string; + 6 } + 7 + 8 export function Header({ title, subtitle }: HeaderProps) { + 9 return ( + 10
+ 11

{title}

+ 12 {subtitle &&

{subtitle}

} + 13
+ 14 ); + 15 } + +A: Let me also check the Button component to see its implementation: + +src/components/Button.tsx + + +USER: EXECUTION RESULT of [view]: + 1 import React from 'react'; + 2 + 3 interface ButtonProps { + 4 onClick: () => void; + 5 children: React.ReactNode; + 6 variant?: 'primary' | 'secondary'; + 7 } + 8 + 9 export default function Button({ onClick, children, variant = 'primary' }: ButtonProps) { + 10 return ( + 11 + 17 ); + 18 } + +A: I've found several React components in the project. Here's a summary: + +1. Found TypeScript files using glob pattern `**/*.{ts,tsx}` +2. Identified potential React components using grep pattern `export\s+(default\s+)?(function|class)` +3. Confirmed two components: + - `Header.tsx`: A function component that renders a header with title and optional subtitle + - `Button.tsx`: A reusable button component with primary/secondary variants + +Would you like me to examine any other components or search for specific features within these components? + + + +--------------------- END OF EXAMPLE --------------------- + +Do NOT assume the environment is the same as in the example above. + +--------------------- NEW TASK DESCRIPTION --------------------- diff --git a/openhands/agenthub/readonly_agent/prompts/in_context_learning_example_suffix.j2 b/openhands/agenthub/readonly_agent/prompts/in_context_learning_example_suffix.j2 new file mode 100644 index 0000000000..f9868b7b75 --- /dev/null +++ b/openhands/agenthub/readonly_agent/prompts/in_context_learning_example_suffix.j2 @@ -0,0 +1,3 @@ +--------------------- END OF NEW TASK DESCRIPTION --------------------- + +PLEASE follow the format strictly! PLEASE EMIT ONE AND ONLY ONE FUNCTION CALL PER MESSAGE. diff --git a/openhands/agenthub/readonly_agent/prompts/microagent_info.j2 b/openhands/agenthub/readonly_agent/prompts/microagent_info.j2 new file mode 100644 index 0000000000..264828fbe2 --- /dev/null +++ b/openhands/agenthub/readonly_agent/prompts/microagent_info.j2 @@ -0,0 +1,8 @@ +{% for agent_info in triggered_agents %} + +The following information has been included based on a keyword match for "{{ agent_info.trigger }}". +It may or may not be relevant to the user's request. + +{{ agent_info.content }} + +{% endfor %} diff --git a/openhands/agenthub/readonly_agent/prompts/system_prompt.j2 b/openhands/agenthub/readonly_agent/prompts/system_prompt.j2 new file mode 100644 index 0000000000..7cb8d46f29 --- /dev/null +++ b/openhands/agenthub/readonly_agent/prompts/system_prompt.j2 @@ -0,0 +1,34 @@ +You are OpenHands ReadOnlyAgent, a helpful AI assistant focused on code analysis and exploration. You can: +- Explore and analyze codebases +- Browse the web for relevant information +- Plan potential changes +- Answer questions about code + + +✓ READ-ONLY TOOLS: +- view: Read file contents +- grep: Search for patterns +- glob: List matching files +- think: Analyze information +- web_read: Access web resources +- finish: Complete current task + +✗ RESTRICTIONS: +- Cannot modify any files +- Cannot execute state-changing commands + + + +1. When analyzing code or answering questions: + - Be thorough and methodical + - Prioritize accuracy over speed + - Provide detailed explanations + +2. For file operations: + - Always verify file locations before accessing + - Don't assume paths are relative to current directory + +3. If asked to make changes: + - Explain you are read-only + - Recommend using CodeActAgent instead + diff --git a/openhands/agenthub/readonly_agent/prompts/user_prompt.j2 b/openhands/agenthub/readonly_agent/prompts/user_prompt.j2 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openhands/agenthub/readonly_agent/readonly_agent.py b/openhands/agenthub/readonly_agent/readonly_agent.py new file mode 100644 index 0000000000..ec03eb7c58 --- /dev/null +++ b/openhands/agenthub/readonly_agent/readonly_agent.py @@ -0,0 +1,70 @@ +""" +ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools. +""" + +import os + +from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.agenthub.readonly_agent import ( + function_calling as readonly_function_calling, +) +from openhands.core.config import AgentConfig +from openhands.core.logger import openhands_logger as logger +from openhands.llm.llm import LLM +from openhands.utils.prompt import PromptManager + + +class ReadOnlyAgent(CodeActAgent): + VERSION = '1.0' + """ + The ReadOnlyAgent is a specialized version of CodeActAgent that only uses read-only tools. + + This agent is designed for safely exploring codebases without making any changes. + It only has access to tools that don't modify the system: grep, glob, view, think, finish, web_read. + + Use this agent when you want to: + 1. Explore a codebase to understand its structure + 2. Search for specific patterns or code + 3. Research without making any changes + + When you're ready to make changes, switch to the regular CodeActAgent. + """ + + def __init__( + self, + llm: LLM, + config: AgentConfig, + ) -> None: + """Initializes a new instance of the ReadOnlyAgent class. + + Parameters: + - llm (LLM): The llm to be used by this agent + - config (AgentConfig): The configuration for this agent + """ + # Initialize the CodeActAgent class but we'll override some of its behavior + super().__init__(llm, config) + + # Override the tools to only include read-only tools + # Get the read-only tools from our own function_calling module + self.tools = readonly_function_calling.get_tools() + + # Set up our own prompt manager + self.prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), + ) + + self.response_to_actions_fn = readonly_function_calling.response_to_actions + + logger.debug( + f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}" + ) + + def set_mcp_tools(self, mcp_tools: list[dict]) -> None: + """Sets the list of MCP tools for the agent. + + Args: + - mcp_tools (list[dict]): The list of MCP tools. + """ + logger.warning( + 'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.' + ) diff --git a/openhands/agenthub/readonly_agent/tools/__init__.py b/openhands/agenthub/readonly_agent/tools/__init__.py new file mode 100644 index 0000000000..d4adaded89 --- /dev/null +++ b/openhands/agenthub/readonly_agent/tools/__init__.py @@ -0,0 +1,21 @@ +"""Tools for the ReadOnlyAgent. + +This module defines the read-only tools for the ReadOnlyAgent. +""" + +from .glob import GlobTool +from .grep import GrepTool +from .view import ViewTool + +__all__ = [ + 'ViewTool', + 'GrepTool', + 'GlobTool', +] + +# Define the list of read-only tools +READ_ONLY_TOOLS = [ + ViewTool, + GrepTool, + GlobTool, +] diff --git a/openhands/agenthub/readonly_agent/tools/glob.py b/openhands/agenthub/readonly_agent/tools/glob.py new file mode 100644 index 0000000000..202bed4f3b --- /dev/null +++ b/openhands/agenthub/readonly_agent/tools/glob.py @@ -0,0 +1,31 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +_GLOB_DESCRIPTION = """Fast file pattern matching tool. +* Supports glob patterns like "**/*.js" or "src/**/*.ts" +* Use this tool when you need to find files by name patterns +* Returns matching file paths sorted by modification time +* Only the first 100 results are returned. Consider narrowing your search with stricter glob patterns or provide path parameter if you need more results. +* When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead +""" + +GlobTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='glob', + description=_GLOB_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The glob pattern to match files (e.g., "**/*.js", "src/**/*.ts")', + }, + 'path': { + 'type': 'string', + 'description': 'The directory (absolute path) to search in. Defaults to the current working directory.', + }, + }, + 'required': ['pattern'], + }, + ), +) diff --git a/openhands/agenthub/readonly_agent/tools/grep.py b/openhands/agenthub/readonly_agent/tools/grep.py new file mode 100644 index 0000000000..976e288c8c --- /dev/null +++ b/openhands/agenthub/readonly_agent/tools/grep.py @@ -0,0 +1,37 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +_GREP_DESCRIPTION = """Fast content search tool. +* Searches file contents using regular expressions +* Supports full regex syntax (eg. "log.*Error", "function\\s+\\w+", etc.) +* Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}") +* Returns matching file paths sorted by modification time. +* Only the first 100 results are returned. Consider narrowing your search with stricter regex patterns or provide path parameter if you need more results. +* Use this tool when you need to find files containing specific patterns +* When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead +""" + +GrepTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='grep', + description=_GREP_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The regex pattern to search for in file contents', + }, + 'path': { + 'type': 'string', + 'description': 'The directory (absolute path) to search in. Defaults to the current working directory.', + }, + 'include': { + 'type': 'string', + 'description': 'Optional file pattern to filter which files to search (e.g., "*.js", "*.{ts,tsx}")', + }, + }, + 'required': ['pattern'], + }, + ), +) diff --git a/openhands/agenthub/readonly_agent/tools/view.py b/openhands/agenthub/readonly_agent/tools/view.py new file mode 100644 index 0000000000..4ac3196702 --- /dev/null +++ b/openhands/agenthub/readonly_agent/tools/view.py @@ -0,0 +1,34 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +_VIEW_DESCRIPTION = """Reads a file or list directories from the local filesystem. +* The path parameter must be an absolute path, not a relative path. +* If `path` is a file, `view` displays the result of applying `cat -n`; if `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep. +* You can optionally specify a line range to view (especially handy for long files), but it's recommended to read the whole file by not providing this parameter. +* For image files, the tool will display the image for you. +* For large files that exceed the display limit: + - The output will be truncated and marked with `` + - Use the `view_range` parameter to view specific sections after the truncation point +""" + +ViewTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='view', + description=_VIEW_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'The absolute path to the file to read or directory to list', + }, + 'view_range': { + 'description': 'Optional parameter of `view` command when `path` points to a *file*. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.', + 'items': {'type': 'integer'}, + 'type': 'array', + }, + }, + 'required': ['path'], + }, + ), +) diff --git a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 index c645777ed1..610ce537ef 100644 --- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 +++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 @@ -15,7 +15,7 @@ ENV POETRY_VIRTUALENVS_PATH=/openhands/poetry \ # Install base system dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ - wget curl ca-certificates sudo apt-utils git jq tmux build-essential \ + wget curl ca-certificates sudo apt-utils git jq tmux build-essential ripgrep \ {%- if 'ubuntu' in base_image and (base_image.endswith(':latest') or base_image.endswith(':24.04')) -%} libgl1 \ {%- else %} diff --git a/tests/runtime/test_glob_and_grep.py b/tests/runtime/test_glob_and_grep.py new file mode 100644 index 0000000000..2ceb1a5e9b --- /dev/null +++ b/tests/runtime/test_glob_and_grep.py @@ -0,0 +1,291 @@ +"""Tests for the command helper functions in function_calling.py.""" + +from conftest import ( + _close_test_runtime, + _load_runtime, +) + +from openhands.agenthub.readonly_agent.function_calling import ( + glob_to_cmdrun, + grep_to_cmdrun, +) +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import CmdRunAction +from openhands.events.observation import CmdOutputObservation, ErrorObservation + + +def _run_cmd_action(runtime, custom_command: str): + action = CmdRunAction(command=custom_command) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert isinstance(obs, (CmdOutputObservation, ErrorObservation)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + return obs + + +def test_grep_to_cmdrun_basic(): + """Test basic pattern with no special characters.""" + cmd = grep_to_cmdrun('function', 'src') + assert 'rg -li function' in cmd + assert 'Below are the execution results' in cmd + + # With include parameter + cmd = grep_to_cmdrun('error', 'src', '*.js') + assert 'rg -li error' in cmd + assert "--glob '*.js'" in cmd + assert 'Below are the execution results' in cmd + + +def test_grep_to_cmdrun_quotes(temp_dir, runtime_cls, run_as_openhands): + """Test patterns with different types of quotes.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Double quotes in pattern + cmd = grep_to_cmdrun(r'const message = "Hello"', '/workspace') + assert 'rg -li' in cmd + + # Verify command works by executing it on a test file + setup_cmd = 'echo \'const message = "Hello";\' > /workspace/test_quotes.js' + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0 + + obs = _run_cmd_action(runtime, cmd) + assert obs.exit_code == 0 + assert '/workspace/test_quotes.js' in obs.content + + # Single quotes in pattern + cmd = grep_to_cmdrun("function\\('test'\\)", '/workspace') + assert 'rg -li' in cmd + + setup_cmd = 'echo "function(\'test\') {}" > /workspace/test_quotes2.js' + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0 + + obs = _run_cmd_action(runtime, cmd) + assert obs.exit_code == 0 + assert '/workspace/test_quotes2.js' in obs.content + finally: + _close_test_runtime(runtime) + + +def test_grep_to_cmdrun_special_chars(runtime_cls, run_as_openhands, temp_dir): + """Test patterns with special shell characters.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Create test directory and files with special pattern content + setup_cmd = """ + mkdir -p /workspace/test_special_patterns && \ + echo "testing x && y || z pattern" > /workspace/test_special_patterns/logical.txt && \ + echo "function() { return x; }" > /workspace/test_special_patterns/function.txt && \ + echo "using \\$variable here" > /workspace/test_special_patterns/dollar.txt && \ + echo "using \\`backticks\\` here" > /workspace/test_special_patterns/backticks.txt && \ + echo "line with \\n newline chars" > /workspace/test_special_patterns/newline.txt && \ + echo "matching *.js wildcard" > /workspace/test_special_patterns/wildcard.txt && \ + echo "testing x > y redirection" > /workspace/test_special_patterns/redirect.txt && \ + echo "testing a | b pipe" > /workspace/test_special_patterns/pipe.txt && \ + echo "line with #comment" > /workspace/test_special_patterns/comment.txt && \ + echo "CSS \\!important rule" > /workspace/test_special_patterns/bang.txt + """ + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0, 'Failed to set up test files' + + special_patterns = [ + r'x && y \|\| z', # Shell logical operators (escaping pipe) + r'function\(\) \{ return x; \}', # Properly escaped braces and parentheses + r'\$variable', # Dollar sign + # r"`backticks`", # Backticks + r'\\n newline', # Escaped characters + r'\*\.js', # Wildcards (escaped) + r'x > y', # Redirection + r'a \| b', # Pipe (escaped) + r'#comment', # Hash + # r"!important", # Bang + ] + + for pattern in special_patterns: + # Generate the grep command using our helper function + cmd = grep_to_cmdrun(pattern, '/workspace/test_special_patterns') + assert 'rg -li' in cmd + assert 'Below are the execution results of the search command:' in cmd + + # Execute the command + obs = _run_cmd_action(runtime, cmd) + + # Verify the command executed successfully + assert 'command not found' not in obs.content + assert 'syntax error' not in obs.content + assert 'unexpected' not in obs.content + + # Check that the pattern was found in the appropriate file + if '&&' in pattern: + assert 'logical.txt' in obs.content + elif 'function' in pattern: + assert 'function.txt' in obs.content + elif '$variable' in pattern: + assert 'dollar.txt' in obs.content + # elif "backticks" in pattern: + # assert "backticks.txt" in obs.content + elif '\\n newline' in pattern: + assert 'newline.txt' in obs.content + elif '*' in pattern: + assert 'wildcard.txt' in obs.content + elif '>' in pattern: + assert 'redirect.txt' in obs.content + elif '|' in pattern: + assert 'pipe.txt' in obs.content + elif '#comment' in pattern: + assert 'comment.txt' in obs.content + # elif "!important" in pattern: + # assert "bang.txt" in obs.content + finally: + _close_test_runtime(runtime) + + +def test_grep_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir): + """Test paths with spaces and special characters.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Create test files with content in paths with spaces + setup_cmd = """ + mkdir -p "src/my project" "test files/unit tests" "src/special$chars" "path with spaces and $pecial ch@rs" && \ + echo "function searchablePattern() { return true; }" > "src/my project/test.js" && \ + echo "function testFunction() { return 42; }" > "test files/unit tests/test.js" && \ + echo "function specialFunction() { return null; }" > "src/special$chars/test.js" && \ + echo "function weirdFunction() { return []; }" > "path with spaces and $pecial ch@rs/test.js" + """ + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0, 'Failed to set up test files' + + special_paths = [ + 'src/my project', + 'test files/unit tests', + ] + + for path in special_paths: + # Generate grep command and execute it + cmd = grep_to_cmdrun('function', path) + assert 'rg -li' in cmd + + obs = _run_cmd_action(runtime, cmd) + assert obs.exit_code == 0, f'Grep command failed for path: {path}' + assert ( + 'function' in obs.content + ), f'Expected pattern not found in output for path: {path}' + + # Verify the actual file was found + if path == 'src/my project': + assert 'src/my project/test.js' in obs.content + elif path == 'test files/unit tests': + assert 'test files/unit tests/test.js' in obs.content + finally: + _close_test_runtime(runtime) + + +def test_glob_to_cmdrun_basic(): + """Test basic glob patterns.""" + cmd = glob_to_cmdrun('*.js', 'src') + assert "rg --files src -g '*.js'" in cmd + assert 'head -n 100' in cmd + assert 'echo "Below are the execution results of the glob command:' in cmd + + # Default path + cmd = glob_to_cmdrun('*.py') + assert "rg --files . -g '*.py'" in cmd + assert 'head -n 100' in cmd + assert 'echo "Below are the execution results of the glob command:' in cmd + + +def test_glob_to_cmdrun_special_patterns(runtime_cls, run_as_openhands, temp_dir): + """Test glob patterns with special characters.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Create test files matching the patterns we'll test + setup_cmd = r""" + mkdir -p src/components src/utils && \ + touch src/file1.js src/file2.js src/file9.js && \ + touch src/components/comp.jsx src/components/comp.tsx && \ + touch src/$special-file.js && \ + touch src/temp1.js src/temp2.js && \ + touch src/file.js src/file.ts src/file.jsx && \ + touch "src/weird\`file\`.js" && \ + touch "src/file with spaces.js" + """ + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0, 'Failed to set up test files' + + special_patterns = [ + '**/*.js', # Double glob + '**/{*.jsx,*.tsx}', # Braces + 'file[0-9].js', # Character class + 'temp?.js', # Single character wildcard + 'file.{js,ts,jsx}', # Multiple extensions + 'file with spaces.js', # Spaces + ] + + for pattern in special_patterns: + cmd = glob_to_cmdrun(pattern, 'src') + logger.info(f'Command: {cmd}') + # Execute the command + obs = _run_cmd_action(runtime, cmd) + assert obs.exit_code == 0, f'Glob command failed for pattern: {pattern}' + + # Verify expected files are found + if pattern == '**/*.js': + assert 'file1.js' in obs.content + assert 'file2.js' in obs.content + elif pattern == '**/{*.jsx,*.tsx}': + assert 'comp.jsx' in obs.content + assert 'comp.tsx' in obs.content + elif pattern == 'file[0-9].js': + assert 'file1.js' in obs.content + assert 'file2.js' in obs.content + assert 'file9.js' in obs.content + elif pattern == 'temp?.js': + assert 'temp1.js' in obs.content + assert 'temp2.js' in obs.content + elif pattern == 'file.{js,ts,jsx}': + assert 'file.js' in obs.content + assert 'file.ts' in obs.content + assert 'file.jsx' in obs.content + elif pattern == 'file with spaces.js': + assert 'file with spaces.js' in obs.content + finally: + _close_test_runtime(runtime) + + +def test_glob_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir): + """Test paths with spaces and special characters for glob command.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Create test directories with spaces and special characters + setup_cmd = """ + mkdir -p "project files/src" "test results/unit tests" "weird$path/code" "path with spaces and $pecial ch@rs" && \ + touch "project files/src/file1.js" "project files/src/file2.js" && \ + touch "test results/unit tests/test1.js" "test results/unit tests/test2.js" && \ + touch "weird$path/code/weird1.js" "weird$path/code/weird2.js" && \ + touch "path with spaces and $pecial ch@rs/special1.js" "path with spaces and $pecial ch@rs/special2.js" + """ + obs = _run_cmd_action(runtime, setup_cmd) + assert obs.exit_code == 0, 'Failed to set up test files' + + special_paths = [ + 'project files/src', + 'test results/unit tests', + ] + + for path in special_paths: + cmd = glob_to_cmdrun('*.js', path) + + # Execute the command + obs = _run_cmd_action(runtime, cmd) + assert obs.exit_code == 0, f'Glob command failed for path: {path}' + + # Verify expected files are found in each path + if path == 'project files/src': + assert 'file1.js' in obs.content + assert 'file2.js' in obs.content + elif path == 'test results/unit tests': + assert 'test1.js' in obs.content + assert 'test2.js' in obs.content + finally: + _close_test_runtime(runtime) diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_agents.py similarity index 80% rename from tests/unit/test_codeact_agent.py rename to tests/unit/test_agents.py index 715252e389..c313805292 100644 --- a/tests/unit/test_codeact_agent.py +++ b/tests/unit/test_agents.py @@ -1,3 +1,4 @@ +from typing import Union from unittest.mock import Mock import pytest @@ -5,19 +6,35 @@ from litellm import ChatCompletionMessageToolCall from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent from openhands.agenthub.codeact_agent.function_calling import ( + get_tools as codeact_get_tools, +) +from openhands.agenthub.codeact_agent.function_calling import ( + response_to_actions as codeact_response_to_actions, +) +from openhands.agenthub.codeact_agent.tools import ( BrowserTool, IPythonTool, LLMBasedFileEditTool, + ThinkTool, WebReadTool, create_cmd_run_tool, create_str_replace_editor_tool, - get_tools, - response_to_actions, ) from openhands.agenthub.codeact_agent.tools.browser import ( _BROWSER_DESCRIPTION, _BROWSER_TOOL_DESCRIPTION, ) +from openhands.agenthub.readonly_agent.function_calling import ( + get_tools as readonly_get_tools, +) +from openhands.agenthub.readonly_agent.function_calling import ( + response_to_actions as readonly_response_to_actions, +) +from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent +from openhands.agenthub.readonly_agent.tools import ( + GlobTool, + GrepTool, +) from openhands.controller.state.state import State from openhands.core.config import AgentConfig, LLMConfig from openhands.core.exceptions import FunctionCallNotExistsError @@ -35,10 +52,20 @@ from openhands.events.tool import ToolCallMetadata from openhands.llm.llm import LLM +@pytest.fixture(params=['CodeActAgent', 'ReadOnlyAgent']) +def agent_class(request): + if request.param == 'CodeActAgent': + return CodeActAgent + else: + from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent + + return ReadOnlyAgent + + @pytest.fixture -def agent() -> CodeActAgent: +def agent(agent_class) -> Union[CodeActAgent, ReadOnlyAgent]: config = AgentConfig() - agent = CodeActAgent(llm=LLM(LLMConfig()), config=config) + agent = agent_class(llm=LLM(LLMConfig()), config=config) agent.llm = Mock() agent.llm.config = Mock() agent.llm.config.max_message_chars = 1000 @@ -54,7 +81,7 @@ def mock_state() -> State: return state -def test_reset(agent: CodeActAgent): +def test_reset(agent): # Add some state action = MessageAction(content='test') action._source = EventSource.AGENT @@ -67,7 +94,7 @@ def test_reset(agent: CodeActAgent): assert len(agent.pending_actions) == 0 -def test_step_with_pending_actions(agent: CodeActAgent): +def test_step_with_pending_actions(agent): # Add a pending action pending_action = MessageAction(content='test') pending_action._source = EventSource.AGENT @@ -79,8 +106,8 @@ def test_step_with_pending_actions(agent: CodeActAgent): assert len(agent.pending_actions) == 0 -def test_get_tools_default(): - tools = get_tools( +def test_codeact_get_tools_default(): + tools = codeact_get_tools( enable_jupyter=True, enable_llm_editor=True, enable_browsing=True, @@ -95,9 +122,24 @@ def test_get_tools_default(): assert 'web_read' in tool_names -def test_get_tools_with_options(): +def test_readonly_get_tools_default(): + tools = readonly_get_tools() + assert len(tools) > 0 + + # Check required tools are present + tool_names = [tool['function']['name'] for tool in tools] + assert 'execute_bash' not in tool_names + assert 'execute_ipython_cell' not in tool_names + assert 'edit_file' not in tool_names + assert 'web_read' in tool_names + assert 'grep' in tool_names + assert 'glob' in tool_names + assert 'think' in tool_names + + +def test_codeact_get_tools_with_options(): # Test with all options enabled - tools = get_tools( + tools = codeact_get_tools( enable_browsing=True, enable_jupyter=True, enable_llm_editor=True, @@ -108,7 +150,7 @@ def test_get_tools_with_options(): assert 'edit_file' in tool_names # Test with all options disabled - tools = get_tools( + tools = codeact_get_tools( enable_browsing=False, enable_jupyter=False, enable_llm_editor=False, @@ -162,7 +204,6 @@ def test_str_replace_editor_tool(): assert 'old_str' in properties assert 'new_str' in properties assert 'insert_line' in properties - assert 'view_range' in properties assert StrReplaceEditorTool['function']['parameters']['required'] == [ 'command', @@ -226,7 +267,9 @@ def test_response_to_actions_invalid_tool(): mock_response.choices[0].message.tool_calls[0].function.arguments = '{}' with pytest.raises(FunctionCallNotExistsError): - response_to_actions(mock_response) + codeact_response_to_actions(mock_response) + with pytest.raises(FunctionCallNotExistsError): + readonly_response_to_actions(mock_response) def test_step_with_no_pending_actions(mock_state: State): @@ -267,7 +310,10 @@ def test_step_with_no_pending_actions(mock_state: State): assert action.content == 'Task completed' -def test_correct_tool_description_loaded_based_on_model_name(mock_state: State): +@pytest.mark.parametrize('agent_type', ['CodeActAgent', 'ReadOnlyAgent']) +def test_correct_tool_description_loaded_based_on_model_name( + agent_type, mock_state: State +): """Tests that the simplified tool descriptions are loaded for specific models.""" o3_mock_config = Mock() o3_mock_config.model = 'mock_o3_model' @@ -275,7 +321,16 @@ def test_correct_tool_description_loaded_based_on_model_name(mock_state: State): llm = Mock() llm.config = o3_mock_config - agent = CodeActAgent(llm=llm, config=AgentConfig()) + if agent_type == 'CodeActAgent': + from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent + + agent_class = CodeActAgent + else: + from openhands.agenthub.readonly_agent.readonly_agent import ReadOnlyAgent + + agent_class = ReadOnlyAgent + + agent = agent_class(llm=llm, config=AgentConfig()) for tool in agent.tools: # Assert all descriptions have less than 1024 characters assert len(tool['function']['description']) < 1024 @@ -284,17 +339,20 @@ def test_correct_tool_description_loaded_based_on_model_name(mock_state: State): sonnet_mock_config.model = 'mock_sonnet_model' llm.config = sonnet_mock_config - agent = CodeActAgent(llm=llm, config=AgentConfig()) + agent = agent_class(llm=llm, config=AgentConfig()) # Assert existence of the detailed tool descriptions that are longer than 1024 characters - assert any(len(tool['function']['description']) > 1024 for tool in agent.tools) + if agent_type == 'CodeActAgent': + # This only holds for CodeActAgent + assert any(len(tool['function']['description']) > 1024 for tool in agent.tools) -def test_mismatched_tool_call_events_and_auto_add_system_message(mock_state: State): +def test_mismatched_tool_call_events_and_auto_add_system_message( + agent, mock_state: State +): """Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages. This also tests that the system message is automatically added to the event stream if SystemMessageAction is not present. """ - agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig()) tool_call_metadata = Mock( spec=ToolCallMetadata, @@ -355,6 +413,39 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(mock_state: Sta assert messages[0].role == 'system' +def test_grep_tool(): + assert GrepTool['type'] == 'function' + assert GrepTool['function']['name'] == 'grep' + + properties = GrepTool['function']['parameters']['properties'] + assert 'pattern' in properties + assert 'path' in properties + assert 'include' in properties + + assert GrepTool['function']['parameters']['required'] == ['pattern'] + + +def test_glob_tool(): + assert GlobTool['type'] == 'function' + assert GlobTool['function']['name'] == 'glob' + + properties = GlobTool['function']['parameters']['properties'] + assert 'pattern' in properties + assert 'path' in properties + + assert GlobTool['function']['parameters']['required'] == ['pattern'] + + +def test_think_tool(): + assert ThinkTool['type'] == 'function' + assert ThinkTool['function']['name'] == 'think' + + properties = ThinkTool['function']['parameters']['properties'] + assert 'thought' in properties + + assert ThinkTool['function']['parameters']['required'] == ['thought'] + + def test_enhance_messages_adds_newlines_between_consecutive_user_messages( agent: CodeActAgent, ):