From 8d7bf83224e88f6b7b527c4261e167458055b8a4 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sun, 18 Aug 2024 05:18:36 +0800 Subject: [PATCH] refactor: break agentskills single file to multiple composable modules (#3429) * refactor agentskills to prepare for agentless * fix import * fix typo * fix imports * fix globals * fix import * fix import * disable log to file to avoid auto-created log file w/ permission issue when import od in runtime * import agentskills from OD instead from itself directly * add back pythonpath * remove chown since there's no log/folder --- containers/app/Dockerfile | 2 - opendevin/core/logger.py | 12 +- opendevin/runtime/client/client.py | 4 +- .../runtime/plugins/agent_skills/__init__.py | 5 +- .../plugins/agent_skills/agentskills.py | 1110 +---------------- .../plugins/agent_skills/file_ops/__init__.py | 7 + .../plugins/agent_skills/file_ops/file_ops.py | 857 +++++++++++++ .../agent_skills/file_reader/__init__.py | 7 + .../agent_skills/file_reader/file_readers.py | 244 ++++ .../agent_skills/utils/aider/LICENSE.txt | 202 +++ .../agent_skills/{ => utils}/aider/README.md | 0 .../{ => utils}/aider/__init__.py | 0 .../agent_skills/{ => utils}/aider/linter.py | 0 .../plugins/agent_skills/utils/config.py | 30 + .../plugins/agent_skills/utils/dependency.py | 11 + opendevin/runtime/plugins/jupyter/__init__.py | 2 +- .../runtime/plugins/jupyter/execute_server.py | 4 +- tests/unit/test_agent_skill.py | 12 +- tests/unit/test_aider_linter.py | 14 +- 19 files changed, 1401 insertions(+), 1122 deletions(-) create mode 100644 opendevin/runtime/plugins/agent_skills/file_ops/__init__.py create mode 100644 opendevin/runtime/plugins/agent_skills/file_ops/file_ops.py create mode 100644 opendevin/runtime/plugins/agent_skills/file_reader/__init__.py create mode 100644 opendevin/runtime/plugins/agent_skills/file_reader/file_readers.py create mode 100644 opendevin/runtime/plugins/agent_skills/utils/aider/LICENSE.txt rename opendevin/runtime/plugins/agent_skills/{ => utils}/aider/README.md (100%) rename opendevin/runtime/plugins/agent_skills/{ => utils}/aider/__init__.py (100%) rename opendevin/runtime/plugins/agent_skills/{ => utils}/aider/linter.py (100%) create mode 100644 opendevin/runtime/plugins/agent_skills/utils/config.py create mode 100644 opendevin/runtime/plugins/agent_skills/utils/dependency.py diff --git a/containers/app/Dockerfile b/containers/app/Dockerfile index 7e8f6ea6ee..767cc6eab1 100644 --- a/containers/app/Dockerfile +++ b/containers/app/Dockerfile @@ -81,8 +81,6 @@ RUN python opendevin/core/download.py # No-op to download assets # opendevin:opendevin -> opendevin:app RUN find /app \! -group app -exec chgrp app {} + -RUN chown -R opendevin:app /app/logs && chmod -R 770 /app/logs # This gets created by the download.py script - COPY --chown=opendevin:app --chmod=770 --from=frontend-builder /app/dist ./frontend/dist COPY --chown=opendevin:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh diff --git a/opendevin/core/logger.py b/opendevin/core/logger.py index f765cdff99..c054d41b26 100644 --- a/opendevin/core/logger.py +++ b/opendevin/core/logger.py @@ -10,6 +10,7 @@ from termcolor import colored DISABLE_COLOR_PRINTING = False DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes'] +LOG_TO_FILE = os.getenv('LOG_TO_FILE', 'False').lower() in ['true', '1', 'yes'] ColorType = Literal[ 'red', @@ -162,11 +163,15 @@ LOG_DIR = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'logs', ) + if DEBUG: opendevin_logger.setLevel(logging.DEBUG) + +if LOG_TO_FILE: # default log to project root - opendevin_logger.info('DEBUG logging is enabled. Logging to %s', LOG_DIR) -opendevin_logger.addHandler(get_file_handler(LOG_DIR)) + opendevin_logger.info('Logging to file is enabled. Logging to %s', LOG_DIR) + opendevin_logger.addHandler(get_file_handler(LOG_DIR)) + opendevin_logger.addHandler(get_console_handler()) opendevin_logger.addFilter(SensitiveDataFilter(opendevin_logger.name)) opendevin_logger.propagate = False @@ -241,7 +246,8 @@ def _setup_llm_logger(name, debug_level=logging.DEBUG): logger = logging.getLogger(name) logger.propagate = False logger.setLevel(debug_level) - logger.addHandler(_get_llm_file_handler(name, debug_level)) + if LOG_TO_FILE: + logger.addHandler(_get_llm_file_handler(name, debug_level)) return logger diff --git a/opendevin/runtime/client/client.py b/opendevin/runtime/client/client.py index a31371cb12..0af103f4a3 100644 --- a/opendevin/runtime/client/client.py +++ b/opendevin/runtime/client/client.py @@ -109,7 +109,9 @@ class RuntimeClient: # AFTER ServerRuntime is deprecated if 'agent_skills' in self.plugins and 'jupyter' in self.plugins: obs = await self.run_ipython( - IPythonRunCellAction(code='from agentskills import *') + IPythonRunCellAction( + code='from opendevin.runtime.plugins.agent_skills.agentskills import *\n' + ) ) logger.info(f'AgentSkills initialized: {obs}') diff --git a/opendevin/runtime/plugins/agent_skills/__init__.py b/opendevin/runtime/plugins/agent_skills/__init__.py index e331e16096..e5440cf3e6 100644 --- a/opendevin/runtime/plugins/agent_skills/__init__.py +++ b/opendevin/runtime/plugins/agent_skills/__init__.py @@ -1,13 +1,14 @@ from dataclasses import dataclass -from opendevin.runtime.plugins.agent_skills.agentskills import DOCUMENTATION from opendevin.runtime.plugins.requirement import Plugin, PluginRequirement +from . import agentskills + @dataclass class AgentSkillsRequirement(PluginRequirement): name: str = 'agent_skills' - documentation: str = DOCUMENTATION + documentation: str = agentskills.DOCUMENTATION class AgentSkillsPlugin(Plugin): diff --git a/opendevin/runtime/plugins/agent_skills/agentskills.py b/opendevin/runtime/plugins/agent_skills/agentskills.py index 45f0df22a0..8984f8d135 100644 --- a/opendevin/runtime/plugins/agent_skills/agentskills.py +++ b/opendevin/runtime/plugins/agent_skills/agentskills.py @@ -1,1105 +1,15 @@ -"""agentskills.py - -This module provides various file manipulation skills for the OpenDevin agent. - -Functions: -- open_file(path: str, line_number: int | None = 1, context_lines: int = 100): Opens a file and optionally moves to a specific line. -- goto_line(line_number): Moves the window to show the specified line number. -- scroll_down(): Moves the window down by the number of lines specified in WINDOW. -- scroll_up(): Moves the window up by the number of lines specified in WINDOW. -- create_file(filename): Creates and opens a new file with the given name. -- search_dir(search_term, dir_path='./'): Searches for a term in all files in the specified directory. -- search_file(search_term, file_path=None): Searches for a term in the specified file or the currently open file. -- find_file(file_name, dir_path='./'): Finds all files with the given name in the specified directory. -- edit_file_by_replace(file_name: str, to_replace: str, new_content: str): Replaces lines in a file with the given content. -- insert_content_at_line(file_name: str, line_number: int, content: str): Inserts given content at the specified line number in a file. -- append_file(file_name: str, content: str): Appends the given content to the end of the specified file. -""" - -import base64 -import os -import re -import shutil -import tempfile from inspect import signature -import docx -import PyPDF2 -from openai import OpenAI -from pptx import Presentation -from pylatexenc.latex2text import LatexNodes2Text - -if __package__ is None or __package__ == '': - from aider import Linter -else: - from .aider import Linter - -CURRENT_FILE: str | None = None -CURRENT_LINE = 1 -WINDOW = 100 - - -# This is also used in unit tests! -MSG_FILE_UPDATED = '[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]' - - -# ================================================================================================== -# OPENAI -# TODO: Move this to EventStream Actions when EventStreamRuntime is fully implemented -# NOTE: we need to get env vars inside functions because they will be set in IPython -# AFTER the agentskills is imported (the case for EventStreamRuntime) -# ================================================================================================== -def _get_openai_api_key(): - return os.getenv('OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '')) - - -def _get_openai_base_url(): - return os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') - - -def _get_openai_model(): - return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13') - - -def _get_max_token(): - return os.getenv('MAX_TOKEN', 500) - - -def _get_openai_client(): - client = OpenAI(api_key=_get_openai_api_key(), base_url=_get_openai_base_url()) - return client - - -# ================================================================================================== - - -def _is_valid_filename(file_name) -> bool: - if not file_name or not isinstance(file_name, str) or not file_name.strip(): - return False - invalid_chars = '<>:"/\\|?*' - if os.name == 'nt': # Windows - invalid_chars = '<>:"/\\|?*' - elif os.name == 'posix': # Unix-like systems - invalid_chars = '\0' - - for char in invalid_chars: - if char in file_name: - return False - return True - - -def _is_valid_path(path) -> bool: - if not path or not isinstance(path, str): - return False - try: - return os.path.exists(os.path.normpath(path)) - except PermissionError: - return False - - -def _create_paths(file_name) -> bool: - try: - dirname = os.path.dirname(file_name) - if dirname: - os.makedirs(dirname, exist_ok=True) - return True - except PermissionError: - return False - - -def _check_current_file(file_path: str | None = None) -> bool: - global CURRENT_FILE - if not file_path: - file_path = CURRENT_FILE - if not file_path or not os.path.isfile(file_path): - raise ValueError('No file open. Use the open_file function first.') - return True - - -def _clamp(value, min_value, max_value): - return max(min_value, min(value, max_value)) - - -def _lint_file(file_path: str) -> tuple[str | None, int | None]: - """Lint the file at the given path and return a tuple with a boolean indicating if there are errors, - and the line number of the first error, if any. - - Returns: - tuple[str | None, int | None]: (lint_error, first_error_line_number) - """ - linter = Linter(root=os.getcwd()) - lint_error = linter.lint(file_path) - if not lint_error: - # Linting successful. No issues found. - return None, None - return 'ERRORS:\n' + lint_error.text, lint_error.lines[0] - - -def _print_window(file_path, targeted_line, window, return_str=False): - global CURRENT_LINE - _check_current_file(file_path) - with open(file_path) as file: - content = file.read() - - # Ensure the content ends with a newline character - if not content.endswith('\n'): - content += '\n' - - lines = content.splitlines(True) # Keep all line ending characters - total_lines = len(lines) - - # cover edge cases - CURRENT_LINE = _clamp(targeted_line, 1, total_lines) - half_window = max(1, window // 2) - - # Ensure at least one line above and below the targeted line - start = max(1, CURRENT_LINE - half_window) - end = min(total_lines, CURRENT_LINE + half_window) - - # Adjust start and end to ensure at least one line above and below - if start == 1: - end = min(total_lines, start + window - 1) - if end == total_lines: - start = max(1, end - window + 1) - - output = '' - - # only display this when there's at least one line above - if start > 1: - output += f'({start - 1} more lines above)\n' - else: - output += '(this is the beginning of the file)\n' - for i in range(start, end + 1): - _new_line = f'{i}|{lines[i-1]}' - if not _new_line.endswith('\n'): - _new_line += '\n' - output += _new_line - if end < total_lines: - output += f'({total_lines - end} more lines below)\n' - else: - output += '(this is the end of the file)\n' - output = output.rstrip() - - if return_str: - return output - else: - print(output) - - -def _cur_file_header(current_file, total_lines) -> str: - if not current_file: - return '' - return f'[File: {os.path.abspath(current_file)} ({total_lines} lines total)]\n' - - -def open_file( - path: str, line_number: int | None = 1, context_lines: int | None = WINDOW -) -> None: - """Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. - It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` - to view the file if you want to see more. - - Args: - path: str: The path to the file to open, preferred absolute path. - line_number: int | None = 1: The line number to move to. Defaults to 1. - context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. - """ - global CURRENT_FILE, CURRENT_LINE, WINDOW - - if not os.path.isfile(path): - raise FileNotFoundError(f'File {path} not found') - - CURRENT_FILE = os.path.abspath(path) - with open(CURRENT_FILE) as file: - total_lines = max(1, sum(1 for _ in file)) - - if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: - raise ValueError(f'Line number must be between 1 and {total_lines}') - CURRENT_LINE = line_number - - # Override WINDOW with context_lines - if context_lines is None or context_lines < 1: - context_lines = WINDOW - - output = _cur_file_header(CURRENT_FILE, total_lines) - output += _print_window( - CURRENT_FILE, CURRENT_LINE, _clamp(context_lines, 1, 2000), return_str=True - ) - print(output) - - -def goto_line(line_number: int) -> None: - """Moves the window to show the specified line number. - - Args: - line_number: int: The line number to move to. - """ - global CURRENT_FILE, CURRENT_LINE, WINDOW - _check_current_file() - - with open(str(CURRENT_FILE)) as file: - total_lines = max(1, sum(1 for _ in file)) - if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: - raise ValueError(f'Line number must be between 1 and {total_lines}') - - CURRENT_LINE = _clamp(line_number, 1, total_lines) - - output = _cur_file_header(CURRENT_FILE, total_lines) - output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) - print(output) - - -def scroll_down() -> None: - """Moves the window down by 100 lines. - - Args: - None - """ - global CURRENT_FILE, CURRENT_LINE, WINDOW - _check_current_file() - - with open(str(CURRENT_FILE)) as file: - total_lines = max(1, sum(1 for _ in file)) - CURRENT_LINE = _clamp(CURRENT_LINE + WINDOW, 1, total_lines) - output = _cur_file_header(CURRENT_FILE, total_lines) - output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) - print(output) - - -def scroll_up() -> None: - """Moves the window up by 100 lines. - - Args: - None - """ - global CURRENT_FILE, CURRENT_LINE, WINDOW - _check_current_file() - - with open(str(CURRENT_FILE)) as file: - total_lines = max(1, sum(1 for _ in file)) - CURRENT_LINE = _clamp(CURRENT_LINE - WINDOW, 1, total_lines) - output = _cur_file_header(CURRENT_FILE, total_lines) - output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) - print(output) - - -def create_file(filename: str) -> None: - """Creates and opens a new file with the given name. - - Args: - filename: str: The name of the file to create. - """ - if os.path.exists(filename): - raise FileExistsError(f"File '{filename}' already exists.") - - with open(filename, 'w') as file: - file.write('\n') - - open_file(filename) - print(f'[File {filename} created.]') - - -LINTER_ERROR_MSG = '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - - -class LineNumberError(Exception): - pass - - -def _append_impl(lines, content): - """Internal method to handle appending to a file. - - Args: - lines: list[str]: The lines in the original file. - content: str: The content to append to the file. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - """ - content_lines = content.splitlines(keepends=True) - n_added_lines = len(content_lines) - if lines and not (len(lines) == 1 and lines[0].strip() == ''): - # file is not empty - if not lines[-1].endswith('\n'): - lines[-1] += '\n' - new_lines = lines + content_lines - content = ''.join(new_lines) - else: - # file is empty - content = ''.join(content_lines) - - return content, n_added_lines - - -def _insert_impl(lines, start, content): - """Internal method to handle inserting to a file. - - Args: - lines: list[str]: The lines in the original file. - start: int: The start line number for inserting. - content: str: The content to insert to the file. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - - Raises: - LineNumberError: If the start line number is invalid. - """ - inserted_lines = [content + '\n' if not content.endswith('\n') else content] - if len(lines) == 0: - new_lines = inserted_lines - elif start is not None: - if len(lines) == 1 and lines[0].strip() == '': - # if the file with only 1 line and that line is empty - lines = [] - - if len(lines) == 0: - new_lines = inserted_lines - else: - new_lines = lines[: start - 1] + inserted_lines + lines[start - 1 :] - else: - raise LineNumberError( - f'Invalid line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - - content = ''.join(new_lines) - n_added_lines = len(inserted_lines) - return content, n_added_lines - - -def _edit_impl(lines, start, end, content): - """Internal method to handle editing a file. - - REQUIRES (should be checked by caller): - start <= end - start and end are between 1 and len(lines) (inclusive) - content ends with a newline - - Args: - lines: list[str]: The lines in the original file. - start: int: The start line number for editing. - end: int: The end line number for editing. - content: str: The content to replace the lines with. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - """ - # Handle cases where start or end are None - if start is None: - start = 1 # Default to the beginning - if end is None: - end = len(lines) # Default to the end - # Check arguments - if not (1 <= start <= len(lines)): - raise LineNumberError( - f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - if not (1 <= end <= len(lines)): - raise LineNumberError( - f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - if start > end: - raise LineNumberError( - f'Invalid line range: {start}-{end}. Start must be less than or equal to end.' - ) - - if not content.endswith('\n'): - content += '\n' - content_lines = content.splitlines(True) - n_added_lines = len(content_lines) - new_lines = lines[: start - 1] + content_lines + lines[end:] - content = ''.join(new_lines) - return content, n_added_lines - - -def _edit_file_impl( - file_name: str, - start: int | None = None, - end: int | None = None, - content: str = '', - is_insert: bool = False, - is_append: bool = False, -) -> str: - """Internal method to handle common logic for edit_/append_file methods. - - Args: - file_name: str: The name of the file to edit or append to. - start: int | None = None: The start line number for editing. Ignored if is_append is True. - end: int | None = None: The end line number for editing. Ignored if is_append is True. - content: str: The content to replace the lines with or to append. - is_insert: bool = False: Whether to insert content at the given line number instead of editing. - is_append: bool = False: Whether to append content to the file instead of editing. - """ - ret_str = '' - global CURRENT_FILE, CURRENT_LINE, WINDOW - - ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]' - ERROR_MSG_SUFFIX = ( - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Open the correct file and try again or 2) Specify the correct line number arguments.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' - ) - - if not _is_valid_filename(file_name): - raise FileNotFoundError('Invalid file name.') - - if not _is_valid_path(file_name): - raise FileNotFoundError('Invalid path or file name.') - - if not _create_paths(file_name): - raise PermissionError('Could not access or create directories.') - - if not os.path.isfile(file_name): - raise FileNotFoundError(f'File {file_name} not found.') - - if is_insert and is_append: - raise ValueError('Cannot insert and append at the same time.') - - # Use a temporary file to write changes - content = str(content or '') - temp_file_path = '' - src_abs_path = os.path.abspath(file_name) - first_error_line = None - - try: - n_added_lines = None - - # lint the original file - enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' - if enable_auto_lint: - original_lint_error, _ = _lint_file(file_name) - - # Create a temporary file - with tempfile.NamedTemporaryFile('w', delete=False) as temp_file: - temp_file_path = temp_file.name - - # Read the original file and check if empty and for a trailing newline - with open(file_name) as original_file: - lines = original_file.readlines() - - if is_append: - content, n_added_lines = _append_impl(lines, content) - elif is_insert: - try: - content, n_added_lines = _insert_impl(lines, start, content) - except LineNumberError as e: - ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' - return ret_str - else: - try: - content, n_added_lines = _edit_impl(lines, start, end, content) - except LineNumberError as e: - ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' - return ret_str - - if not content.endswith('\n'): - content += '\n' - - # Write the new content to the temporary file - temp_file.write(content) - - # Replace the original file with the temporary file atomically - shutil.move(temp_file_path, src_abs_path) - - # Handle linting - # NOTE: we need to get env var inside this function - # because the env var will be set AFTER the agentskills is imported - if enable_auto_lint: - # BACKUP the original file - original_file_backup_path = os.path.join( - os.path.dirname(file_name), - f'.backup.{os.path.basename(file_name)}', - ) - with open(original_file_backup_path, 'w') as f: - f.writelines(lines) - - lint_error, first_error_line = _lint_file(file_name) - - # Select the errors caused by the modification - def extract_last_part(line): - parts = line.split(':') - if len(parts) > 1: - return parts[-1].strip() - return line.strip() - - def subtract_strings(str1, str2) -> str: - lines1 = str1.splitlines() - lines2 = str2.splitlines() - - last_parts1 = [extract_last_part(line) for line in lines1] - - remaining_lines = [ - line - for line in lines2 - if extract_last_part(line) not in last_parts1 - ] - - result = '\n'.join(remaining_lines) - return result - - if original_lint_error and lint_error: - lint_error = subtract_strings(original_lint_error, lint_error) - if lint_error == '': - lint_error = None - first_error_line = None - - if lint_error is not None: - if first_error_line is not None: - show_line = int(first_error_line) - elif is_append: - # original end-of-file - show_line = len(lines) - # insert OR edit WILL provide meaningful line numbers - elif start is not None and end is not None: - show_line = int((start + end) / 2) - else: - raise ValueError('Invalid state. This should never happen.') - - ret_str += LINTER_ERROR_MSG - ret_str += lint_error + '\n' - - editor_lines = n_added_lines + 20 - - ret_str += '[This is how your edit would have looked if applied]\n' - ret_str += '-------------------------------------------------\n' - ret_str += ( - _print_window(file_name, show_line, editor_lines, return_str=True) - + '\n' - ) - ret_str += '-------------------------------------------------\n\n' - - ret_str += '[This is the original code before your edit]\n' - ret_str += '-------------------------------------------------\n' - ret_str += ( - _print_window( - original_file_backup_path, - show_line, - editor_lines, - return_str=True, - ) - + '\n' - ) - ret_str += '-------------------------------------------------\n' - - ret_str += ( - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' - ) - - # recover the original file - with open(original_file_backup_path) as fin, open( - file_name, 'w' - ) as fout: - fout.write(fin.read()) - os.remove(original_file_backup_path) - return ret_str - - except FileNotFoundError as e: - ret_str += f'File not found: {e}\n' - except IOError as e: - ret_str += f'An error occurred while handling the file: {e}\n' - except ValueError as e: - ret_str += f'Invalid input: {e}\n' - except Exception as e: - # Clean up the temporary file if an error occurs - if temp_file_path and os.path.exists(temp_file_path): - os.remove(temp_file_path) - print(f'An unexpected error occurred: {e}') - raise e - - # Update the file information and print the updated content - with open(file_name, 'r', encoding='utf-8') as file: - n_total_lines = max(1, len(file.readlines())) - if first_error_line is not None and int(first_error_line) > 0: - CURRENT_LINE = first_error_line - else: - if is_append: - CURRENT_LINE = max(1, len(lines)) # end of original file - else: - CURRENT_LINE = start or n_total_lines or 1 - ret_str += f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]\n' - CURRENT_FILE = file_name - ret_str += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + '\n' - ret_str += MSG_FILE_UPDATED.format(line_number=CURRENT_LINE) - return ret_str - - -def edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None: - """Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`. - - Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. - - Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty. - - For example, given a file "/workspace/example.txt" with the following content: - ``` - line 1 - line 2 - line 2 - line 3 - ``` - - EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique: - - edit_file_by_replace( - '/workspace/example.txt', - to_replace='line 2\nline 3', - new_content='new line\nline 3', - ) - - This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged. - - The resulting file will be: - ``` - line 1 - line 2 - new line - line 3 - ``` - - REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string: - - edit_file_by_replace( - '/workspace/example.txt', - to_replace='line 2\nline 3', - new_content='', - ) - - Args: - file_name: str: The name of the file to edit. - to_replace: str: The content to search for and replace. - new_content: str: The new content to replace the old content with. - """ - # FIXME: support replacing *all* occurrences - if to_replace.strip() == '': - raise ValueError('`to_replace` must not be empty.') - - if to_replace == new_content: - raise ValueError('`to_replace` and `new_content` must be different.') - - # search for `to_replace` in the file - # if found, replace it with `new_content` - # if not found, perform a fuzzy search to find the closest match and replace it with `new_content` - with open(file_name, 'r') as file: - file_content = file.read() - - if file_content.count(to_replace) > 1: - raise ValueError( - '`to_replace` appears more than once, please include enough lines to make code in `to_replace` unique.' - ) - - start = file_content.find(to_replace) - if start != -1: - # Convert start from index to line number - start_line_number = file_content[:start].count('\n') + 1 - end_line_number = start_line_number + len(to_replace.splitlines()) - 1 - else: - - def _fuzzy_transform(s: str) -> str: - # remove all space except newline - return re.sub(r'[^\S\n]+', '', s) - - # perform a fuzzy search (remove all spaces except newlines) - to_replace_fuzzy = _fuzzy_transform(to_replace) - file_content_fuzzy = _fuzzy_transform(file_content) - # find the closest match - start = file_content_fuzzy.find(to_replace_fuzzy) - if start == -1: - print( - f'[No exact match found in {file_name} for\n```\n{to_replace}\n```\n]' - ) - return - # Convert start from index to line number for fuzzy match - start_line_number = file_content_fuzzy[:start].count('\n') + 1 - end_line_number = start_line_number + len(to_replace.splitlines()) - 1 - - ret_str = _edit_file_impl( - file_name, - start=start_line_number, - end=end_line_number, - content=new_content, - is_insert=False, - ) - # lint_error = bool(LINTER_ERROR_MSG in ret_str) - # TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation) - print(ret_str) - - -def insert_content_at_line(file_name: str, line_number: int, content: str) -> None: - """Insert content at the given line number in a file. - This will NOT modify the content of the lines before OR after the given line number. - - For example, if the file has the following content: - ``` - line 1 - line 2 - line 3 - ``` - and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to: - ``` - line 1 - new line - line 2 - line 3 - ``` - - Args: - file_name: str: The name of the file to edit. - line_number: int: The line number (starting from 1) to insert the content after. - content: str: The content to insert. - """ - ret_str = _edit_file_impl( - file_name, - start=line_number, - end=line_number, - content=content, - is_insert=True, - is_append=False, - ) - print(ret_str) - - -def append_file(file_name: str, content: str) -> None: - """Append content to the given file. - It appends text `content` to the end of the specified file. - - Args: - file_name: str: The name of the file to edit. - line_number: int: The line number (starting from 1) to insert the content after. - content: str: The content to insert. - """ - ret_str = _edit_file_impl( - file_name, - start=None, - end=None, - content=content, - is_insert=False, - is_append=True, - ) - print(ret_str) - - -def search_dir(search_term: str, dir_path: str = './') -> None: - """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. - - Args: - search_term: str: The term to search for. - dir_path: str: The path to the directory to search. - """ - if not os.path.isdir(dir_path): - raise FileNotFoundError(f'Directory {dir_path} not found') - matches = [] - for root, _, files in os.walk(dir_path): - for file in files: - if file.startswith('.'): - continue - file_path = os.path.join(root, file) - with open(file_path, 'r', errors='ignore') as f: - for line_num, line in enumerate(f, 1): - if search_term in line: - matches.append((file_path, line_num, line.strip())) - - if not matches: - print(f'No matches found for "{search_term}" in {dir_path}') - return - - num_matches = len(matches) - num_files = len(set(match[0] for match in matches)) - - if num_files > 100: - print( - f'More than {num_files} files matched for "{search_term}" in {dir_path}. Please narrow your search.' - ) - return - - print(f'[Found {num_matches} matches for "{search_term}" in {dir_path}]') - for file_path, line_num, line in matches: - print(f'{file_path} (Line {line_num}): {line}') - print(f'[End of matches for "{search_term}" in {dir_path}]') - - -def search_file(search_term: str, file_path: str | None = None) -> None: - """Searches for search_term in file. If file is not provided, searches in the current open file. - - Args: - search_term: str: The term to search for. - file_path: str | None: The path to the file to search. - """ - global CURRENT_FILE - if file_path is None: - file_path = CURRENT_FILE - if file_path is None: - raise FileNotFoundError( - 'No file specified or open. Use the open_file function first.' - ) - if not os.path.isfile(file_path): - raise FileNotFoundError(f'File {file_path} not found') - - matches = [] - with open(file_path) as file: - for i, line in enumerate(file, 1): - if search_term in line: - matches.append((i, line.strip())) - - if matches: - print(f'[Found {len(matches)} matches for "{search_term}" in {file_path}]') - for match in matches: - print(f'Line {match[0]}: {match[1]}') - print(f'[End of matches for "{search_term}" in {file_path}]') - else: - print(f'[No matches found for "{search_term}" in {file_path}]') - - -def find_file(file_name: str, dir_path: str = './') -> None: - """Finds all files with the given name in the specified directory. - - Args: - file_name: str: The name of the file to find. - dir_path: str: The path to the directory to search. - """ - if not os.path.isdir(dir_path): - raise FileNotFoundError(f'Directory {dir_path} not found') - - matches = [] - for root, _, files in os.walk(dir_path): - for file in files: - if file_name in file: - matches.append(os.path.join(root, file)) - - if matches: - print(f'[Found {len(matches)} matches for "{file_name}" in {dir_path}]') - for match in matches: - print(f'{match}') - print(f'[End of matches for "{file_name}" in {dir_path}]') - else: - print(f'[No matches found for "{file_name}" in {dir_path}]') - - -def parse_pdf(file_path: str) -> None: - """Parses the content of a PDF file and prints it. - - Args: - file_path: str: The path to the file to open. - """ - print(f'[Reading PDF file from {file_path}]') - content = PyPDF2.PdfReader(file_path) - text = '' - for page_idx in range(len(content.pages)): - text += ( - f'@@ Page {page_idx + 1} @@\n' - + content.pages[page_idx].extract_text() - + '\n\n' - ) - print(text.strip()) - - -def parse_docx(file_path: str) -> None: - """Parses the content of a DOCX file and prints it. - - Args: - file_path: str: The path to the file to open. - """ - print(f'[Reading DOCX file from {file_path}]') - content = docx.Document(file_path) - text = '' - for i, para in enumerate(content.paragraphs): - text += f'@@ Page {i + 1} @@\n' + para.text + '\n\n' - print(text) - - -def parse_latex(file_path: str) -> None: - """Parses the content of a LaTex file and prints it. - - Args: - file_path: str: The path to the file to open. - """ - print(f'[Reading LaTex file from {file_path}]') - with open(file_path) as f: - data = f.read() - text = LatexNodes2Text().latex_to_text(data) - print(text.strip()) - - -def _base64_img(file_path: str) -> str: - with open(file_path, 'rb') as image_file: - encoded_image = base64.b64encode(image_file.read()).decode('utf-8') - return encoded_image - - -def _base64_video(file_path: str, frame_interval: int = 10) -> list[str]: - import cv2 - - video = cv2.VideoCapture(file_path) - base64_frames = [] - frame_count = 0 - while video.isOpened(): - success, frame = video.read() - if not success: - break - if frame_count % frame_interval == 0: - _, buffer = cv2.imencode('.jpg', frame) - base64_frames.append(base64.b64encode(buffer).decode('utf-8')) - frame_count += 1 - video.release() - return base64_frames - - -def _prepare_image_messages(task: str, base64_image: str): - return [ - { - 'role': 'user', - 'content': [ - {'type': 'text', 'text': task}, - { - 'type': 'image_url', - 'image_url': {'url': f'data:image/jpeg;base64,{base64_image}'}, - }, - ], - } - ] - - -def parse_audio(file_path: str, model: str = 'whisper-1') -> None: - """Parses the content of an audio file and prints it. - - Args: - file_path: str: The path to the audio file to transcribe. - model: str: The audio model to use for transcription. Defaults to 'whisper-1'. - """ - print(f'[Transcribing audio file from {file_path}]') - try: - # TODO: record the COST of the API call - with open(file_path, 'rb') as audio_file: - transcript = _get_openai_client().audio.translations.create( - model=model, file=audio_file - ) - print(transcript.text) - - except Exception as e: - print(f'Error transcribing audio file: {e}') - - -def parse_image( - file_path: str, task: str = 'Describe this image as detail as possible.' -) -> None: - """Parses the content of an image file and prints the description. - - Args: - file_path: str: The path to the file to open. - task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. - """ - print(f'[Reading image file from {file_path}]') - # TODO: record the COST of the API call - try: - base64_image = _base64_img(file_path) - response = _get_openai_client().chat.completions.create( - model=_get_openai_model(), - messages=_prepare_image_messages(task, base64_image), - max_tokens=_get_max_token(), - ) - content = response.choices[0].message.content - print(content) - - except Exception as error: - print(f'Error with the request: {error}') - - -def parse_video( - file_path: str, - task: str = 'Describe this image as detail as possible.', - frame_interval: int = 30, -) -> None: - """Parses the content of an image file and prints the description. - - Args: - file_path: str: The path to the video file to open. - task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. - frame_interval: int: The interval between frames to analyze. Defaults to 30. - - """ - print( - f'[Processing video file from {file_path} with frame interval {frame_interval}]' - ) - - task = task or 'This is one frame from a video, please summarize this frame.' - base64_frames = _base64_video(file_path) - selected_frames = base64_frames[::frame_interval] - - if len(selected_frames) > 30: - new_interval = len(base64_frames) // 30 - selected_frames = base64_frames[::new_interval] - - print(f'Totally {len(selected_frames)} would be analyze...\n') - - idx = 0 - for base64_frame in selected_frames: - idx += 1 - print(f'Process the {file_path}, current No. {idx * frame_interval} frame...') - # TODO: record the COST of the API call - try: - response = _get_openai_client().chat.completions.create( - model=_get_openai_model(), - messages=_prepare_image_messages(task, base64_frame), - max_tokens=_get_max_token(), - ) - - content = response.choices[0].message.content - current_frame_content = f"Frame {idx}'s content: {content}\n" - print(current_frame_content) - - except Exception as error: - print(f'Error with the request: {error}') - - -def parse_pptx(file_path: str) -> None: - """Parses the content of a pptx file and prints it. - - Args: - file_path: str: The path to the file to open. - """ - print(f'[Reading PowerPoint file from {file_path}]') - try: - pres = Presentation(str(file_path)) - text = [] - for slide_idx, slide in enumerate(pres.slides): - text.append(f'@@ Slide {slide_idx + 1} @@') - for shape in slide.shapes: - if hasattr(shape, 'text'): - text.append(shape.text) - print('\n'.join(text)) - - except Exception as e: - print(f'Error reading PowerPoint file: {e}') - - -__all__ = [ - # file operation - 'open_file', - 'goto_line', - 'scroll_down', - 'scroll_up', - 'create_file', - 'edit_file_by_replace', - 'insert_content_at_line', - 'append_file', - 'search_dir', - 'search_file', - 'find_file', - # readers - 'parse_pdf', - 'parse_docx', - 'parse_latex', - 'parse_pptx', -] - -# This is called from OpenDevin's side -# If SANDBOX_ENV_OPENAI_API_KEY is set, we will be able to use these tools in the sandbox environment -if _get_openai_api_key() and _get_openai_base_url(): - __all__ += ['parse_audio', 'parse_video', 'parse_image'] +from . import file_ops, file_reader +from .utils.dependency import import_functions + +import_functions( + module=file_ops, function_names=file_ops.__all__, target_globals=globals() +) +import_functions( + module=file_reader, function_names=file_reader.__all__, target_globals=globals() +) +__all__ = file_ops.__all__ + file_reader.__all__ DOCUMENTATION = '' for func_name in __all__: diff --git a/opendevin/runtime/plugins/agent_skills/file_ops/__init__.py b/opendevin/runtime/plugins/agent_skills/file_ops/__init__.py new file mode 100644 index 0000000000..daf2449f8c --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/file_ops/__init__.py @@ -0,0 +1,7 @@ +from ..utils.dependency import import_functions +from . import file_ops + +import_functions( + module=file_ops, function_names=file_ops.__all__, target_globals=globals() +) +__all__ = file_ops.__all__ diff --git a/opendevin/runtime/plugins/agent_skills/file_ops/file_ops.py b/opendevin/runtime/plugins/agent_skills/file_ops/file_ops.py new file mode 100644 index 0000000000..8be8cb2d01 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/file_ops/file_ops.py @@ -0,0 +1,857 @@ +"""file_ops.py + +This module provides various file manipulation skills for the OpenDevin agent. + +Functions: +- open_file(path: str, line_number: int | None = 1, context_lines: int = 100): Opens a file and optionally moves to a specific line. +- goto_line(line_number: int): Moves the window to show the specified line number. +- scroll_down(): Moves the window down by the number of lines specified in WINDOW. +- scroll_up(): Moves the window up by the number of lines specified in WINDOW. +- create_file(filename: str): Creates and opens a new file with the given name. +- search_dir(search_term: str, dir_path: str = './'): Searches for a term in all files in the specified directory. +- search_file(search_term: str, file_path: str | None = None): Searches for a term in the specified file or the currently open file. +- find_file(file_name: str, dir_path: str = './'): Finds all files with the given name in the specified directory. +- edit_file_by_replace(file_name: str, to_replace: str, new_content: str): Replaces specific content in a file with new content. +- insert_content_at_line(file_name: str, line_number: int, content: str): Inserts given content at the specified line number in a file. +- append_file(file_name: str, content: str): Appends the given content to the end of the specified file. +""" + +import os +import re +import shutil +import tempfile + +if __package__ is None or __package__ == '': + from aider import Linter +else: + from ..utils.aider import Linter + +CURRENT_FILE: str | None = None +CURRENT_LINE = 1 +WINDOW = 100 + +# This is also used in unit tests! +MSG_FILE_UPDATED = '[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]' + +# ================================================================================================== + + +def _is_valid_filename(file_name) -> bool: + if not file_name or not isinstance(file_name, str) or not file_name.strip(): + return False + invalid_chars = '<>:"/\\|?*' + if os.name == 'nt': # Windows + invalid_chars = '<>:"/\\|?*' + elif os.name == 'posix': # Unix-like systems + invalid_chars = '\0' + + for char in invalid_chars: + if char in file_name: + return False + return True + + +def _is_valid_path(path) -> bool: + if not path or not isinstance(path, str): + return False + try: + return os.path.exists(os.path.normpath(path)) + except PermissionError: + return False + + +def _create_paths(file_name) -> bool: + try: + dirname = os.path.dirname(file_name) + if dirname: + os.makedirs(dirname, exist_ok=True) + return True + except PermissionError: + return False + + +def _check_current_file(file_path: str | None = None) -> bool: + global CURRENT_FILE + if not file_path: + file_path = CURRENT_FILE + if not file_path or not os.path.isfile(file_path): + raise ValueError('No file open. Use the open_file function first.') + return True + + +def _clamp(value, min_value, max_value): + return max(min_value, min(value, max_value)) + + +def _lint_file(file_path: str) -> tuple[str | None, int | None]: + """Lint the file at the given path and return a tuple with a boolean indicating if there are errors, + and the line number of the first error, if any. + + Returns: + tuple[str | None, int | None]: (lint_error, first_error_line_number) + """ + linter = Linter(root=os.getcwd()) + lint_error = linter.lint(file_path) + if not lint_error: + # Linting successful. No issues found. + return None, None + return 'ERRORS:\n' + lint_error.text, lint_error.lines[0] + + +def _print_window(file_path, targeted_line, window, return_str=False): + global CURRENT_LINE + _check_current_file(file_path) + with open(file_path) as file: + content = file.read() + + # Ensure the content ends with a newline character + if not content.endswith('\n'): + content += '\n' + + lines = content.splitlines(True) # Keep all line ending characters + total_lines = len(lines) + + # cover edge cases + CURRENT_LINE = _clamp(targeted_line, 1, total_lines) + half_window = max(1, window // 2) + + # Ensure at least one line above and below the targeted line + start = max(1, CURRENT_LINE - half_window) + end = min(total_lines, CURRENT_LINE + half_window) + + # Adjust start and end to ensure at least one line above and below + if start == 1: + end = min(total_lines, start + window - 1) + if end == total_lines: + start = max(1, end - window + 1) + + output = '' + + # only display this when there's at least one line above + if start > 1: + output += f'({start - 1} more lines above)\n' + else: + output += '(this is the beginning of the file)\n' + for i in range(start, end + 1): + _new_line = f'{i}|{lines[i-1]}' + if not _new_line.endswith('\n'): + _new_line += '\n' + output += _new_line + if end < total_lines: + output += f'({total_lines - end} more lines below)\n' + else: + output += '(this is the end of the file)\n' + output = output.rstrip() + + if return_str: + return output + else: + print(output) + + +def _cur_file_header(current_file, total_lines) -> str: + if not current_file: + return '' + return f'[File: {os.path.abspath(current_file)} ({total_lines} lines total)]\n' + + +def open_file( + path: str, line_number: int | None = 1, context_lines: int | None = WINDOW +) -> None: + """Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. + + Args: + path: str: The path to the file to open, preferred absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + + if not os.path.isfile(path): + raise FileNotFoundError(f'File {path} not found') + + CURRENT_FILE = os.path.abspath(path) + with open(CURRENT_FILE) as file: + total_lines = max(1, sum(1 for _ in file)) + + if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: + raise ValueError(f'Line number must be between 1 and {total_lines}') + CURRENT_LINE = line_number + + # Override WINDOW with context_lines + if context_lines is None or context_lines < 1: + context_lines = WINDOW + + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window( + CURRENT_FILE, CURRENT_LINE, _clamp(context_lines, 1, 2000), return_str=True + ) + print(output) + + +def goto_line(line_number: int) -> None: + """Moves the window to show the specified line number. + + Args: + line_number: int: The line number to move to. + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + _check_current_file() + + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) + if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: + raise ValueError(f'Line number must be between 1 and {total_lines}') + + CURRENT_LINE = _clamp(line_number, 1, total_lines) + + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def scroll_down() -> None: + """Moves the window down by 100 lines. + + Args: + None + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + _check_current_file() + + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) + CURRENT_LINE = _clamp(CURRENT_LINE + WINDOW, 1, total_lines) + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def scroll_up() -> None: + """Moves the window up by 100 lines. + + Args: + None + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + _check_current_file() + + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) + CURRENT_LINE = _clamp(CURRENT_LINE - WINDOW, 1, total_lines) + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def create_file(filename: str) -> None: + """Creates and opens a new file with the given name. + + Args: + filename: str: The name of the file to create. + """ + if os.path.exists(filename): + raise FileExistsError(f"File '{filename}' already exists.") + + with open(filename, 'w') as file: + file.write('\n') + + open_file(filename) + print(f'[File {filename} created.]') + + +LINTER_ERROR_MSG = '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + + +class LineNumberError(Exception): + pass + + +def _append_impl(lines, content): + """Internal method to handle appending to a file. + + Args: + lines: list[str]: The lines in the original file. + content: str: The content to append to the file. + + Returns: + content: str: The new content of the file. + n_added_lines: int: The number of lines added to the file. + """ + content_lines = content.splitlines(keepends=True) + n_added_lines = len(content_lines) + if lines and not (len(lines) == 1 and lines[0].strip() == ''): + # file is not empty + if not lines[-1].endswith('\n'): + lines[-1] += '\n' + new_lines = lines + content_lines + content = ''.join(new_lines) + else: + # file is empty + content = ''.join(content_lines) + + return content, n_added_lines + + +def _insert_impl(lines, start, content): + """Internal method to handle inserting to a file. + + Args: + lines: list[str]: The lines in the original file. + start: int: The start line number for inserting. + content: str: The content to insert to the file. + + Returns: + content: str: The new content of the file. + n_added_lines: int: The number of lines added to the file. + + Raises: + LineNumberError: If the start line number is invalid. + """ + inserted_lines = [content + '\n' if not content.endswith('\n') else content] + if len(lines) == 0: + new_lines = inserted_lines + elif start is not None: + if len(lines) == 1 and lines[0].strip() == '': + # if the file with only 1 line and that line is empty + lines = [] + + if len(lines) == 0: + new_lines = inserted_lines + else: + new_lines = lines[: start - 1] + inserted_lines + lines[start - 1 :] + else: + raise LineNumberError( + f'Invalid line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' + ) + + content = ''.join(new_lines) + n_added_lines = len(inserted_lines) + return content, n_added_lines + + +def _edit_impl(lines, start, end, content): + """Internal method to handle editing a file. + + REQUIRES (should be checked by caller): + start <= end + start and end are between 1 and len(lines) (inclusive) + content ends with a newline + + Args: + lines: list[str]: The lines in the original file. + start: int: The start line number for editing. + end: int: The end line number for editing. + content: str: The content to replace the lines with. + + Returns: + content: str: The new content of the file. + n_added_lines: int: The number of lines added to the file. + """ + # Handle cases where start or end are None + if start is None: + start = 1 # Default to the beginning + if end is None: + end = len(lines) # Default to the end + # Check arguments + if not (1 <= start <= len(lines)): + raise LineNumberError( + f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' + ) + if not (1 <= end <= len(lines)): + raise LineNumberError( + f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).' + ) + if start > end: + raise LineNumberError( + f'Invalid line range: {start}-{end}. Start must be less than or equal to end.' + ) + + if not content.endswith('\n'): + content += '\n' + content_lines = content.splitlines(True) + n_added_lines = len(content_lines) + new_lines = lines[: start - 1] + content_lines + lines[end:] + content = ''.join(new_lines) + return content, n_added_lines + + +def _edit_file_impl( + file_name: str, + start: int | None = None, + end: int | None = None, + content: str = '', + is_insert: bool = False, + is_append: bool = False, +) -> str: + """Internal method to handle common logic for edit_/append_file methods. + + Args: + file_name: str: The name of the file to edit or append to. + start: int | None = None: The start line number for editing. Ignored if is_append is True. + end: int | None = None: The end line number for editing. Ignored if is_append is True. + content: str: The content to replace the lines with or to append. + is_insert: bool = False: Whether to insert content at the given line number instead of editing. + is_append: bool = False: Whether to append content to the file instead of editing. + """ + ret_str = '' + global CURRENT_FILE, CURRENT_LINE, WINDOW + + ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]' + ERROR_MSG_SUFFIX = ( + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Open the correct file and try again or 2) Specify the correct line number arguments.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' + ) + + if not _is_valid_filename(file_name): + raise FileNotFoundError('Invalid file name.') + + if not _is_valid_path(file_name): + raise FileNotFoundError('Invalid path or file name.') + + if not _create_paths(file_name): + raise PermissionError('Could not access or create directories.') + + if not os.path.isfile(file_name): + raise FileNotFoundError(f'File {file_name} not found.') + + if is_insert and is_append: + raise ValueError('Cannot insert and append at the same time.') + + # Use a temporary file to write changes + content = str(content or '') + temp_file_path = '' + src_abs_path = os.path.abspath(file_name) + first_error_line = None + + try: + n_added_lines = None + + # lint the original file + enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' + if enable_auto_lint: + original_lint_error, _ = _lint_file(file_name) + + # Create a temporary file + with tempfile.NamedTemporaryFile('w', delete=False) as temp_file: + temp_file_path = temp_file.name + + # Read the original file and check if empty and for a trailing newline + with open(file_name) as original_file: + lines = original_file.readlines() + + if is_append: + content, n_added_lines = _append_impl(lines, content) + elif is_insert: + try: + content, n_added_lines = _insert_impl(lines, start, content) + except LineNumberError as e: + ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' + return ret_str + else: + try: + content, n_added_lines = _edit_impl(lines, start, end, content) + except LineNumberError as e: + ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' + return ret_str + + if not content.endswith('\n'): + content += '\n' + + # Write the new content to the temporary file + temp_file.write(content) + + # Replace the original file with the temporary file atomically + shutil.move(temp_file_path, src_abs_path) + + # Handle linting + # NOTE: we need to get env var inside this function + # because the env var will be set AFTER the agentskills is imported + if enable_auto_lint: + # BACKUP the original file + original_file_backup_path = os.path.join( + os.path.dirname(file_name), + f'.backup.{os.path.basename(file_name)}', + ) + with open(original_file_backup_path, 'w') as f: + f.writelines(lines) + + lint_error, first_error_line = _lint_file(file_name) + + # Select the errors caused by the modification + def extract_last_part(line): + parts = line.split(':') + if len(parts) > 1: + return parts[-1].strip() + return line.strip() + + def subtract_strings(str1, str2) -> str: + lines1 = str1.splitlines() + lines2 = str2.splitlines() + + last_parts1 = [extract_last_part(line) for line in lines1] + + remaining_lines = [ + line + for line in lines2 + if extract_last_part(line) not in last_parts1 + ] + + result = '\n'.join(remaining_lines) + return result + + if original_lint_error and lint_error: + lint_error = subtract_strings(original_lint_error, lint_error) + if lint_error == '': + lint_error = None + first_error_line = None + + if lint_error is not None: + if first_error_line is not None: + show_line = int(first_error_line) + elif is_append: + # original end-of-file + show_line = len(lines) + # insert OR edit WILL provide meaningful line numbers + elif start is not None and end is not None: + show_line = int((start + end) / 2) + else: + raise ValueError('Invalid state. This should never happen.') + + ret_str += LINTER_ERROR_MSG + ret_str += lint_error + '\n' + + editor_lines = n_added_lines + 20 + + ret_str += '[This is how your edit would have looked if applied]\n' + ret_str += '-------------------------------------------------\n' + ret_str += ( + _print_window(file_name, show_line, editor_lines, return_str=True) + + '\n' + ) + ret_str += '-------------------------------------------------\n\n' + + ret_str += '[This is the original code before your edit]\n' + ret_str += '-------------------------------------------------\n' + ret_str += ( + _print_window( + original_file_backup_path, + show_line, + editor_lines, + return_str=True, + ) + + '\n' + ) + ret_str += '-------------------------------------------------\n' + + ret_str += ( + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' + 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' + ) + + # recover the original file + with open(original_file_backup_path) as fin, open( + file_name, 'w' + ) as fout: + fout.write(fin.read()) + os.remove(original_file_backup_path) + return ret_str + + except FileNotFoundError as e: + ret_str += f'File not found: {e}\n' + except IOError as e: + ret_str += f'An error occurred while handling the file: {e}\n' + except ValueError as e: + ret_str += f'Invalid input: {e}\n' + except Exception as e: + # Clean up the temporary file if an error occurs + if temp_file_path and os.path.exists(temp_file_path): + os.remove(temp_file_path) + print(f'An unexpected error occurred: {e}') + raise e + + # Update the file information and print the updated content + with open(file_name, 'r', encoding='utf-8') as file: + n_total_lines = max(1, len(file.readlines())) + if first_error_line is not None and int(first_error_line) > 0: + CURRENT_LINE = first_error_line + else: + if is_append: + CURRENT_LINE = max(1, len(lines)) # end of original file + else: + CURRENT_LINE = start or n_total_lines or 1 + ret_str += f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]\n' + CURRENT_FILE = file_name + ret_str += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + '\n' + ret_str += MSG_FILE_UPDATED.format(line_number=CURRENT_LINE) + return ret_str + + +def edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None: + """Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`. + + Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. + + Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty. + + For example, given a file "/workspace/example.txt" with the following content: + ``` + line 1 + line 2 + line 2 + line 3 + ``` + + EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique: + + edit_file_by_replace( + '/workspace/example.txt', + to_replace='line 2\nline 3', + new_content='new line\nline 3', + ) + + This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged. + + The resulting file will be: + ``` + line 1 + line 2 + new line + line 3 + ``` + + REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string: + + edit_file_by_replace( + '/workspace/example.txt', + to_replace='line 2\nline 3', + new_content='', + ) + + Args: + file_name: str: The name of the file to edit. + to_replace: str: The content to search for and replace. + new_content: str: The new content to replace the old content with. + """ + # FIXME: support replacing *all* occurrences + if to_replace.strip() == '': + raise ValueError('`to_replace` must not be empty.') + + if to_replace == new_content: + raise ValueError('`to_replace` and `new_content` must be different.') + + # search for `to_replace` in the file + # if found, replace it with `new_content` + # if not found, perform a fuzzy search to find the closest match and replace it with `new_content` + with open(file_name, 'r') as file: + file_content = file.read() + + if file_content.count(to_replace) > 1: + raise ValueError( + '`to_replace` appears more than once, please include enough lines to make code in `to_replace` unique.' + ) + + start = file_content.find(to_replace) + if start != -1: + # Convert start from index to line number + start_line_number = file_content[:start].count('\n') + 1 + end_line_number = start_line_number + len(to_replace.splitlines()) - 1 + else: + + def _fuzzy_transform(s: str) -> str: + # remove all space except newline + return re.sub(r'[^\S\n]+', '', s) + + # perform a fuzzy search (remove all spaces except newlines) + to_replace_fuzzy = _fuzzy_transform(to_replace) + file_content_fuzzy = _fuzzy_transform(file_content) + # find the closest match + start = file_content_fuzzy.find(to_replace_fuzzy) + if start == -1: + print( + f'[No exact match found in {file_name} for\n```\n{to_replace}\n```\n]' + ) + return + # Convert start from index to line number for fuzzy match + start_line_number = file_content_fuzzy[:start].count('\n') + 1 + end_line_number = start_line_number + len(to_replace.splitlines()) - 1 + + ret_str = _edit_file_impl( + file_name, + start=start_line_number, + end=end_line_number, + content=new_content, + is_insert=False, + ) + # lint_error = bool(LINTER_ERROR_MSG in ret_str) + # TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation) + print(ret_str) + + +def insert_content_at_line(file_name: str, line_number: int, content: str) -> None: + """Insert content at the given line number in a file. + This will NOT modify the content of the lines before OR after the given line number. + + For example, if the file has the following content: + ``` + line 1 + line 2 + line 3 + ``` + and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to: + ``` + line 1 + new line + line 2 + line 3 + ``` + + Args: + file_name: str: The name of the file to edit. + line_number: int: The line number (starting from 1) to insert the content after. + content: str: The content to insert. + """ + ret_str = _edit_file_impl( + file_name, + start=line_number, + end=line_number, + content=content, + is_insert=True, + is_append=False, + ) + print(ret_str) + + +def append_file(file_name: str, content: str) -> None: + """Append content to the given file. + It appends text `content` to the end of the specified file. + + Args: + file_name: str: The name of the file to edit. + line_number: int: The line number (starting from 1) to insert the content after. + content: str: The content to insert. + """ + ret_str = _edit_file_impl( + file_name, + start=None, + end=None, + content=content, + is_insert=False, + is_append=True, + ) + print(ret_str) + + +def search_dir(search_term: str, dir_path: str = './') -> None: + """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + + Args: + search_term: str: The term to search for. + dir_path: str: The path to the directory to search. + """ + if not os.path.isdir(dir_path): + raise FileNotFoundError(f'Directory {dir_path} not found') + matches = [] + for root, _, files in os.walk(dir_path): + for file in files: + if file.startswith('.'): + continue + file_path = os.path.join(root, file) + with open(file_path, 'r', errors='ignore') as f: + for line_num, line in enumerate(f, 1): + if search_term in line: + matches.append((file_path, line_num, line.strip())) + + if not matches: + print(f'No matches found for "{search_term}" in {dir_path}') + return + + num_matches = len(matches) + num_files = len(set(match[0] for match in matches)) + + if num_files > 100: + print( + f'More than {num_files} files matched for "{search_term}" in {dir_path}. Please narrow your search.' + ) + return + + print(f'[Found {num_matches} matches for "{search_term}" in {dir_path}]') + for file_path, line_num, line in matches: + print(f'{file_path} (Line {line_num}): {line}') + print(f'[End of matches for "{search_term}" in {dir_path}]') + + +def search_file(search_term: str, file_path: str | None = None) -> None: + """Searches for search_term in file. If file is not provided, searches in the current open file. + + Args: + search_term: str: The term to search for. + file_path: str | None: The path to the file to search. + """ + global CURRENT_FILE + if file_path is None: + file_path = CURRENT_FILE + if file_path is None: + raise FileNotFoundError( + 'No file specified or open. Use the open_file function first.' + ) + if not os.path.isfile(file_path): + raise FileNotFoundError(f'File {file_path} not found') + + matches = [] + with open(file_path) as file: + for i, line in enumerate(file, 1): + if search_term in line: + matches.append((i, line.strip())) + + if matches: + print(f'[Found {len(matches)} matches for "{search_term}" in {file_path}]') + for match in matches: + print(f'Line {match[0]}: {match[1]}') + print(f'[End of matches for "{search_term}" in {file_path}]') + else: + print(f'[No matches found for "{search_term}" in {file_path}]') + + +def find_file(file_name: str, dir_path: str = './') -> None: + """Finds all files with the given name in the specified directory. + + Args: + file_name: str: The name of the file to find. + dir_path: str: The path to the directory to search. + """ + if not os.path.isdir(dir_path): + raise FileNotFoundError(f'Directory {dir_path} not found') + + matches = [] + for root, _, files in os.walk(dir_path): + for file in files: + if file_name in file: + matches.append(os.path.join(root, file)) + + if matches: + print(f'[Found {len(matches)} matches for "{file_name}" in {dir_path}]') + for match in matches: + print(f'{match}') + print(f'[End of matches for "{file_name}" in {dir_path}]') + else: + print(f'[No matches found for "{file_name}" in {dir_path}]') + + +__all__ = [ + 'open_file', + 'goto_line', + 'scroll_down', + 'scroll_up', + 'create_file', + 'edit_file_by_replace', + 'insert_content_at_line', + 'append_file', + 'search_dir', + 'search_file', + 'find_file', +] diff --git a/opendevin/runtime/plugins/agent_skills/file_reader/__init__.py b/opendevin/runtime/plugins/agent_skills/file_reader/__init__.py new file mode 100644 index 0000000000..b50d6fb19c --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/file_reader/__init__.py @@ -0,0 +1,7 @@ +from ..utils.dependency import import_functions +from . import file_readers + +import_functions( + module=file_readers, function_names=file_readers.__all__, target_globals=globals() +) +__all__ = file_readers.__all__ diff --git a/opendevin/runtime/plugins/agent_skills/file_reader/file_readers.py b/opendevin/runtime/plugins/agent_skills/file_reader/file_readers.py new file mode 100644 index 0000000000..235dd17180 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/file_reader/file_readers.py @@ -0,0 +1,244 @@ +"""File reader skills for the OpenDevin agent. + +This module provides various functions to parse and extract content from different file types, +including PDF, DOCX, LaTeX, audio, image, video, and PowerPoint files. It utilizes different +libraries and APIs to process these files and output their content or descriptions. + +Functions: + parse_pdf(file_path: str) -> None: Parse and print content of a PDF file. + parse_docx(file_path: str) -> None: Parse and print content of a DOCX file. + parse_latex(file_path: str) -> None: Parse and print content of a LaTeX file. + parse_audio(file_path: str, model: str = 'whisper-1') -> None: Transcribe and print content of an audio file. + parse_image(file_path: str, task: str = 'Describe this image as detail as possible.') -> None: Analyze and print description of an image file. + parse_video(file_path: str, task: str = 'Describe this image as detail as possible.', frame_interval: int = 30) -> None: Analyze and print description of video frames. + parse_pptx(file_path: str) -> None: Parse and print content of a PowerPoint file. + +Note: + Some functions (parse_audio, parse_video, parse_image) require OpenAI API credentials + and are only available if the necessary environment variables are set. +""" + +import base64 + +import docx +import PyPDF2 +from pptx import Presentation +from pylatexenc.latex2text import LatexNodes2Text + +from ..utils.config import ( + _get_max_token, + _get_openai_api_key, + _get_openai_base_url, + _get_openai_client, + _get_openai_model, +) + + +def parse_pdf(file_path: str) -> None: + """Parses the content of a PDF file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading PDF file from {file_path}]') + content = PyPDF2.PdfReader(file_path) + text = '' + for page_idx in range(len(content.pages)): + text += ( + f'@@ Page {page_idx + 1} @@\n' + + content.pages[page_idx].extract_text() + + '\n\n' + ) + print(text.strip()) + + +def parse_docx(file_path: str) -> None: + """Parses the content of a DOCX file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading DOCX file from {file_path}]') + content = docx.Document(file_path) + text = '' + for i, para in enumerate(content.paragraphs): + text += f'@@ Page {i + 1} @@\n' + para.text + '\n\n' + print(text) + + +def parse_latex(file_path: str) -> None: + """Parses the content of a LaTex file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading LaTex file from {file_path}]') + with open(file_path) as f: + data = f.read() + text = LatexNodes2Text().latex_to_text(data) + print(text.strip()) + + +def _base64_img(file_path: str) -> str: + with open(file_path, 'rb') as image_file: + encoded_image = base64.b64encode(image_file.read()).decode('utf-8') + return encoded_image + + +def _base64_video(file_path: str, frame_interval: int = 10) -> list[str]: + import cv2 + + video = cv2.VideoCapture(file_path) + base64_frames = [] + frame_count = 0 + while video.isOpened(): + success, frame = video.read() + if not success: + break + if frame_count % frame_interval == 0: + _, buffer = cv2.imencode('.jpg', frame) + base64_frames.append(base64.b64encode(buffer).decode('utf-8')) + frame_count += 1 + video.release() + return base64_frames + + +def _prepare_image_messages(task: str, base64_image: str): + return [ + { + 'role': 'user', + 'content': [ + {'type': 'text', 'text': task}, + { + 'type': 'image_url', + 'image_url': {'url': f'data:image/jpeg;base64,{base64_image}'}, + }, + ], + } + ] + + +def parse_audio(file_path: str, model: str = 'whisper-1') -> None: + """Parses the content of an audio file and prints it. + + Args: + file_path: str: The path to the audio file to transcribe. + model: str: The audio model to use for transcription. Defaults to 'whisper-1'. + """ + print(f'[Transcribing audio file from {file_path}]') + try: + # TODO: record the COST of the API call + with open(file_path, 'rb') as audio_file: + transcript = _get_openai_client().audio.translations.create( + model=model, file=audio_file + ) + print(transcript.text) + + except Exception as e: + print(f'Error transcribing audio file: {e}') + + +def parse_image( + file_path: str, task: str = 'Describe this image as detail as possible.' +) -> None: + """Parses the content of an image file and prints the description. + + Args: + file_path: str: The path to the file to open. + task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. + """ + print(f'[Reading image file from {file_path}]') + # TODO: record the COST of the API call + try: + base64_image = _base64_img(file_path) + response = _get_openai_client().chat.completions.create( + model=_get_openai_model(), + messages=_prepare_image_messages(task, base64_image), + max_tokens=_get_max_token(), + ) + content = response.choices[0].message.content + print(content) + + except Exception as error: + print(f'Error with the request: {error}') + + +def parse_video( + file_path: str, + task: str = 'Describe this image as detail as possible.', + frame_interval: int = 30, +) -> None: + """Parses the content of an image file and prints the description. + + Args: + file_path: str: The path to the video file to open. + task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. + frame_interval: int: The interval between frames to analyze. Defaults to 30. + + """ + print( + f'[Processing video file from {file_path} with frame interval {frame_interval}]' + ) + + task = task or 'This is one frame from a video, please summarize this frame.' + base64_frames = _base64_video(file_path) + selected_frames = base64_frames[::frame_interval] + + if len(selected_frames) > 30: + new_interval = len(base64_frames) // 30 + selected_frames = base64_frames[::new_interval] + + print(f'Totally {len(selected_frames)} would be analyze...\n') + + idx = 0 + for base64_frame in selected_frames: + idx += 1 + print(f'Process the {file_path}, current No. {idx * frame_interval} frame...') + # TODO: record the COST of the API call + try: + response = _get_openai_client().chat.completions.create( + model=_get_openai_model(), + messages=_prepare_image_messages(task, base64_frame), + max_tokens=_get_max_token(), + ) + + content = response.choices[0].message.content + current_frame_content = f"Frame {idx}'s content: {content}\n" + print(current_frame_content) + + except Exception as error: + print(f'Error with the request: {error}') + + +def parse_pptx(file_path: str) -> None: + """Parses the content of a pptx file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading PowerPoint file from {file_path}]') + try: + pres = Presentation(str(file_path)) + text = [] + for slide_idx, slide in enumerate(pres.slides): + text.append(f'@@ Slide {slide_idx + 1} @@') + for shape in slide.shapes: + if hasattr(shape, 'text'): + text.append(shape.text) + print('\n'.join(text)) + + except Exception as e: + print(f'Error reading PowerPoint file: {e}') + + +__all__ = [ + 'parse_pdf', + 'parse_docx', + 'parse_latex', + 'parse_pptx', +] + +# This is called from OpenDevin's side +# If SANDBOX_ENV_OPENAI_API_KEY is set, we will be able to use these tools in the sandbox environment +if _get_openai_api_key() and _get_openai_base_url(): + __all__ += ['parse_audio', 'parse_video', 'parse_image'] diff --git a/opendevin/runtime/plugins/agent_skills/utils/aider/LICENSE.txt b/opendevin/runtime/plugins/agent_skills/utils/aider/LICENSE.txt new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/utils/aider/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/opendevin/runtime/plugins/agent_skills/aider/README.md b/opendevin/runtime/plugins/agent_skills/utils/aider/README.md similarity index 100% rename from opendevin/runtime/plugins/agent_skills/aider/README.md rename to opendevin/runtime/plugins/agent_skills/utils/aider/README.md diff --git a/opendevin/runtime/plugins/agent_skills/aider/__init__.py b/opendevin/runtime/plugins/agent_skills/utils/aider/__init__.py similarity index 100% rename from opendevin/runtime/plugins/agent_skills/aider/__init__.py rename to opendevin/runtime/plugins/agent_skills/utils/aider/__init__.py diff --git a/opendevin/runtime/plugins/agent_skills/aider/linter.py b/opendevin/runtime/plugins/agent_skills/utils/aider/linter.py similarity index 100% rename from opendevin/runtime/plugins/agent_skills/aider/linter.py rename to opendevin/runtime/plugins/agent_skills/utils/aider/linter.py diff --git a/opendevin/runtime/plugins/agent_skills/utils/config.py b/opendevin/runtime/plugins/agent_skills/utils/config.py new file mode 100644 index 0000000000..cf7cf91e45 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/utils/config.py @@ -0,0 +1,30 @@ +import os + +from openai import OpenAI + + +# ================================================================================================== +# OPENAI +# TODO: Move this to EventStream Actions when EventStreamRuntime is fully implemented +# NOTE: we need to get env vars inside functions because they will be set in IPython +# AFTER the agentskills is imported (the case for EventStreamRuntime) +# ================================================================================================== +def _get_openai_api_key(): + return os.getenv('OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '')) + + +def _get_openai_base_url(): + return os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') + + +def _get_openai_model(): + return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13') + + +def _get_max_token(): + return os.getenv('MAX_TOKEN', 500) + + +def _get_openai_client(): + client = OpenAI(api_key=_get_openai_api_key(), base_url=_get_openai_base_url()) + return client diff --git a/opendevin/runtime/plugins/agent_skills/utils/dependency.py b/opendevin/runtime/plugins/agent_skills/utils/dependency.py new file mode 100644 index 0000000000..1ff1636fb5 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/utils/dependency.py @@ -0,0 +1,11 @@ +from types import ModuleType + + +def import_functions( + module: ModuleType, function_names: list[str], target_globals: dict +) -> None: + for name in function_names: + if hasattr(module, name): + target_globals[name] = getattr(module, name) + else: + raise ValueError(f'Function {name} not found in {module.__name__}') diff --git a/opendevin/runtime/plugins/jupyter/__init__.py b/opendevin/runtime/plugins/jupyter/__init__.py index f9c33bfa19..815156064e 100644 --- a/opendevin/runtime/plugins/jupyter/__init__.py +++ b/opendevin/runtime/plugins/jupyter/__init__.py @@ -27,7 +27,7 @@ class JupyterPlugin(Plugin): f"su - {username} -s /bin/bash << 'EOF'\n" 'cd /opendevin/code\n' 'export POETRY_VIRTUALENVS_PATH=/opendevin/poetry;\n' - 'export PYTHONPATH=/opendevin/code/opendevin/runtime/plugins/agent_skills:$PYTHONPATH;\n' + 'export PYTHONPATH=/opendevin/code:$PYTHONPATH;\n' '/opendevin/miniforge3/bin/mamba run -n base ' 'poetry run jupyter kernelgateway ' '--KernelGatewayApp.ip=0.0.0.0 ' diff --git a/opendevin/runtime/plugins/jupyter/execute_server.py b/opendevin/runtime/plugins/jupyter/execute_server.py index 3582425747..da038d5266 100755 --- a/opendevin/runtime/plugins/jupyter/execute_server.py +++ b/opendevin/runtime/plugins/jupyter/execute_server.py @@ -68,11 +68,9 @@ class JupyterKernel: async def initialize(self): await self.execute(r'%colors nocolor') # pre-defined tools - self.tools_to_run = [ + self.tools_to_run: list[str] = [ # TODO: You can add code for your pre-defined tools here ] - if os.path.exists('/opendevin/plugins/agent_skills/agentskills.py'): - self.tools_to_run.append('from agentskills import *') for tool in self.tools_to_run: res = await self.execute(tool) logging.info(f'Tool [{tool}] initialized:\n{res}') diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index 7e83932778..14e36cd006 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -7,7 +7,7 @@ from unittest.mock import patch import docx import pytest -from opendevin.runtime.plugins.agent_skills.agentskills import ( +from opendevin.runtime.plugins.agent_skills.file_ops.file_ops import ( MSG_FILE_UPDATED, WINDOW, _print_window, @@ -18,15 +18,17 @@ from opendevin.runtime.plugins.agent_skills.agentskills import ( goto_line, insert_content_at_line, open_file, - parse_docx, - parse_latex, - parse_pdf, - parse_pptx, scroll_down, scroll_up, search_dir, search_file, ) +from opendevin.runtime.plugins.agent_skills.file_reader.file_readers import ( + parse_docx, + parse_latex, + parse_pdf, + parse_pptx, +) # CURRENT_FILE must be reset for each test diff --git a/tests/unit/test_aider_linter.py b/tests/unit/test_aider_linter.py index f13d6d24b9..bd0776e069 100644 --- a/tests/unit/test_aider_linter.py +++ b/tests/unit/test_aider_linter.py @@ -2,7 +2,7 @@ import os import pytest -from opendevin.runtime.plugins.agent_skills.aider import Linter, LintResult +from opendevin.runtime.plugins.agent_skills.utils.aider import Linter, LintResult @pytest.fixture @@ -108,7 +108,7 @@ def test_py_lint_fail(linter, temp_file): def test_basic_lint(temp_file): - from opendevin.runtime.plugins.agent_skills.aider.linter import basic_lint + from opendevin.runtime.plugins.agent_skills.utils.aider.linter import basic_lint poorly_formatted_code = """ def foo() @@ -124,7 +124,7 @@ def test_basic_lint(temp_file): def test_basic_lint_fail_returns_text_and_lines(temp_file): - from opendevin.runtime.plugins.agent_skills.aider.linter import basic_lint + from opendevin.runtime.plugins.agent_skills.utils.aider.linter import basic_lint poorly_formatted_code = """ def foo() @@ -141,7 +141,9 @@ def test_basic_lint_fail_returns_text_and_lines(temp_file): def test_lint_python_compile(temp_file): - from opendevin.runtime.plugins.agent_skills.aider.linter import lint_python_compile + from opendevin.runtime.plugins.agent_skills.utils.aider.linter import ( + lint_python_compile, + ) result = lint_python_compile(temp_file, "print('Hello, World!')\n") @@ -149,7 +151,9 @@ def test_lint_python_compile(temp_file): def test_lint_python_compile_fail_returns_text_and_lines(temp_file): - from opendevin.runtime.plugins.agent_skills.aider.linter import lint_python_compile + from opendevin.runtime.plugins.agent_skills.utils.aider.linter import ( + lint_python_compile, + ) poorly_formatted_code = """ def foo()