refactor: break agentskills single file to multiple composable modules (#3429)

* refactor agentskills to prepare for agentless

* fix import

* fix typo

* fix imports

* fix globals

* fix import

* fix import

* disable log to file to avoid auto-created log file w/ permission issue when import od in runtime

* import agentskills from OD instead from itself directly

* add back pythonpath

* remove chown since there's no log/folder
This commit is contained in:
Xingyao Wang
2024-08-18 05:18:36 +08:00
committed by GitHub
parent 3d04bd90e1
commit 8d7bf83224
19 changed files with 1401 additions and 1122 deletions

View File

@@ -81,8 +81,6 @@ RUN python opendevin/core/download.py # No-op to download assets
# opendevin:opendevin -> opendevin:app
RUN find /app \! -group app -exec chgrp app {} +
RUN chown -R opendevin:app /app/logs && chmod -R 770 /app/logs # This gets created by the download.py script
COPY --chown=opendevin:app --chmod=770 --from=frontend-builder /app/dist ./frontend/dist
COPY --chown=opendevin:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh

View File

@@ -10,6 +10,7 @@ from termcolor import colored
DISABLE_COLOR_PRINTING = False
DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
LOG_TO_FILE = os.getenv('LOG_TO_FILE', 'False').lower() in ['true', '1', 'yes']
ColorType = Literal[
'red',
@@ -162,11 +163,15 @@ LOG_DIR = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
'logs',
)
if DEBUG:
opendevin_logger.setLevel(logging.DEBUG)
if LOG_TO_FILE:
# default log to project root
opendevin_logger.info('DEBUG logging is enabled. Logging to %s', LOG_DIR)
opendevin_logger.addHandler(get_file_handler(LOG_DIR))
opendevin_logger.info('Logging to file is enabled. Logging to %s', LOG_DIR)
opendevin_logger.addHandler(get_file_handler(LOG_DIR))
opendevin_logger.addHandler(get_console_handler())
opendevin_logger.addFilter(SensitiveDataFilter(opendevin_logger.name))
opendevin_logger.propagate = False
@@ -241,7 +246,8 @@ def _setup_llm_logger(name, debug_level=logging.DEBUG):
logger = logging.getLogger(name)
logger.propagate = False
logger.setLevel(debug_level)
logger.addHandler(_get_llm_file_handler(name, debug_level))
if LOG_TO_FILE:
logger.addHandler(_get_llm_file_handler(name, debug_level))
return logger

View File

@@ -109,7 +109,9 @@ class RuntimeClient:
# AFTER ServerRuntime is deprecated
if 'agent_skills' in self.plugins and 'jupyter' in self.plugins:
obs = await self.run_ipython(
IPythonRunCellAction(code='from agentskills import *')
IPythonRunCellAction(
code='from opendevin.runtime.plugins.agent_skills.agentskills import *\n'
)
)
logger.info(f'AgentSkills initialized: {obs}')

View File

@@ -1,13 +1,14 @@
from dataclasses import dataclass
from opendevin.runtime.plugins.agent_skills.agentskills import DOCUMENTATION
from opendevin.runtime.plugins.requirement import Plugin, PluginRequirement
from . import agentskills
@dataclass
class AgentSkillsRequirement(PluginRequirement):
name: str = 'agent_skills'
documentation: str = DOCUMENTATION
documentation: str = agentskills.DOCUMENTATION
class AgentSkillsPlugin(Plugin):

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
from ..utils.dependency import import_functions
from . import file_ops
import_functions(
module=file_ops, function_names=file_ops.__all__, target_globals=globals()
)
__all__ = file_ops.__all__

View File

@@ -0,0 +1,857 @@
"""file_ops.py
This module provides various file manipulation skills for the OpenDevin agent.
Functions:
- open_file(path: str, line_number: int | None = 1, context_lines: int = 100): Opens a file and optionally moves to a specific line.
- goto_line(line_number: int): Moves the window to show the specified line number.
- scroll_down(): Moves the window down by the number of lines specified in WINDOW.
- scroll_up(): Moves the window up by the number of lines specified in WINDOW.
- create_file(filename: str): Creates and opens a new file with the given name.
- search_dir(search_term: str, dir_path: str = './'): Searches for a term in all files in the specified directory.
- search_file(search_term: str, file_path: str | None = None): Searches for a term in the specified file or the currently open file.
- find_file(file_name: str, dir_path: str = './'): Finds all files with the given name in the specified directory.
- edit_file_by_replace(file_name: str, to_replace: str, new_content: str): Replaces specific content in a file with new content.
- insert_content_at_line(file_name: str, line_number: int, content: str): Inserts given content at the specified line number in a file.
- append_file(file_name: str, content: str): Appends the given content to the end of the specified file.
"""
import os
import re
import shutil
import tempfile
if __package__ is None or __package__ == '':
from aider import Linter
else:
from ..utils.aider import Linter
CURRENT_FILE: str | None = None
CURRENT_LINE = 1
WINDOW = 100
# This is also used in unit tests!
MSG_FILE_UPDATED = '[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
# ==================================================================================================
def _is_valid_filename(file_name) -> bool:
if not file_name or not isinstance(file_name, str) or not file_name.strip():
return False
invalid_chars = '<>:"/\\|?*'
if os.name == 'nt': # Windows
invalid_chars = '<>:"/\\|?*'
elif os.name == 'posix': # Unix-like systems
invalid_chars = '\0'
for char in invalid_chars:
if char in file_name:
return False
return True
def _is_valid_path(path) -> bool:
if not path or not isinstance(path, str):
return False
try:
return os.path.exists(os.path.normpath(path))
except PermissionError:
return False
def _create_paths(file_name) -> bool:
try:
dirname = os.path.dirname(file_name)
if dirname:
os.makedirs(dirname, exist_ok=True)
return True
except PermissionError:
return False
def _check_current_file(file_path: str | None = None) -> bool:
global CURRENT_FILE
if not file_path:
file_path = CURRENT_FILE
if not file_path or not os.path.isfile(file_path):
raise ValueError('No file open. Use the open_file function first.')
return True
def _clamp(value, min_value, max_value):
return max(min_value, min(value, max_value))
def _lint_file(file_path: str) -> tuple[str | None, int | None]:
"""Lint the file at the given path and return a tuple with a boolean indicating if there are errors,
and the line number of the first error, if any.
Returns:
tuple[str | None, int | None]: (lint_error, first_error_line_number)
"""
linter = Linter(root=os.getcwd())
lint_error = linter.lint(file_path)
if not lint_error:
# Linting successful. No issues found.
return None, None
return 'ERRORS:\n' + lint_error.text, lint_error.lines[0]
def _print_window(file_path, targeted_line, window, return_str=False):
global CURRENT_LINE
_check_current_file(file_path)
with open(file_path) as file:
content = file.read()
# Ensure the content ends with a newline character
if not content.endswith('\n'):
content += '\n'
lines = content.splitlines(True) # Keep all line ending characters
total_lines = len(lines)
# cover edge cases
CURRENT_LINE = _clamp(targeted_line, 1, total_lines)
half_window = max(1, window // 2)
# Ensure at least one line above and below the targeted line
start = max(1, CURRENT_LINE - half_window)
end = min(total_lines, CURRENT_LINE + half_window)
# Adjust start and end to ensure at least one line above and below
if start == 1:
end = min(total_lines, start + window - 1)
if end == total_lines:
start = max(1, end - window + 1)
output = ''
# only display this when there's at least one line above
if start > 1:
output += f'({start - 1} more lines above)\n'
else:
output += '(this is the beginning of the file)\n'
for i in range(start, end + 1):
_new_line = f'{i}|{lines[i-1]}'
if not _new_line.endswith('\n'):
_new_line += '\n'
output += _new_line
if end < total_lines:
output += f'({total_lines - end} more lines below)\n'
else:
output += '(this is the end of the file)\n'
output = output.rstrip()
if return_str:
return output
else:
print(output)
def _cur_file_header(current_file, total_lines) -> str:
if not current_file:
return ''
return f'[File: {os.path.abspath(current_file)} ({total_lines} lines total)]\n'
def open_file(
path: str, line_number: int | None = 1, context_lines: int | None = WINDOW
) -> None:
"""Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down`
to view the file if you want to see more.
Args:
path: str: The path to the file to open, preferred absolute path.
line_number: int | None = 1: The line number to move to. Defaults to 1.
context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100.
"""
global CURRENT_FILE, CURRENT_LINE, WINDOW
if not os.path.isfile(path):
raise FileNotFoundError(f'File {path} not found')
CURRENT_FILE = os.path.abspath(path)
with open(CURRENT_FILE) as file:
total_lines = max(1, sum(1 for _ in file))
if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines:
raise ValueError(f'Line number must be between 1 and {total_lines}')
CURRENT_LINE = line_number
# Override WINDOW with context_lines
if context_lines is None or context_lines < 1:
context_lines = WINDOW
output = _cur_file_header(CURRENT_FILE, total_lines)
output += _print_window(
CURRENT_FILE, CURRENT_LINE, _clamp(context_lines, 1, 2000), return_str=True
)
print(output)
def goto_line(line_number: int) -> None:
"""Moves the window to show the specified line number.
Args:
line_number: int: The line number to move to.
"""
global CURRENT_FILE, CURRENT_LINE, WINDOW
_check_current_file()
with open(str(CURRENT_FILE)) as file:
total_lines = max(1, sum(1 for _ in file))
if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines:
raise ValueError(f'Line number must be between 1 and {total_lines}')
CURRENT_LINE = _clamp(line_number, 1, total_lines)
output = _cur_file_header(CURRENT_FILE, total_lines)
output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
print(output)
def scroll_down() -> None:
"""Moves the window down by 100 lines.
Args:
None
"""
global CURRENT_FILE, CURRENT_LINE, WINDOW
_check_current_file()
with open(str(CURRENT_FILE)) as file:
total_lines = max(1, sum(1 for _ in file))
CURRENT_LINE = _clamp(CURRENT_LINE + WINDOW, 1, total_lines)
output = _cur_file_header(CURRENT_FILE, total_lines)
output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
print(output)
def scroll_up() -> None:
"""Moves the window up by 100 lines.
Args:
None
"""
global CURRENT_FILE, CURRENT_LINE, WINDOW
_check_current_file()
with open(str(CURRENT_FILE)) as file:
total_lines = max(1, sum(1 for _ in file))
CURRENT_LINE = _clamp(CURRENT_LINE - WINDOW, 1, total_lines)
output = _cur_file_header(CURRENT_FILE, total_lines)
output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
print(output)
def create_file(filename: str) -> None:
"""Creates and opens a new file with the given name.
Args:
filename: str: The name of the file to create.
"""
if os.path.exists(filename):
raise FileExistsError(f"File '{filename}' already exists.")
with open(filename, 'w') as file:
file.write('\n')
open_file(filename)
print(f'[File {filename} created.]')
LINTER_ERROR_MSG = '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
class LineNumberError(Exception):
pass
def _append_impl(lines, content):
"""Internal method to handle appending to a file.
Args:
lines: list[str]: The lines in the original file.
content: str: The content to append to the file.
Returns:
content: str: The new content of the file.
n_added_lines: int: The number of lines added to the file.
"""
content_lines = content.splitlines(keepends=True)
n_added_lines = len(content_lines)
if lines and not (len(lines) == 1 and lines[0].strip() == ''):
# file is not empty
if not lines[-1].endswith('\n'):
lines[-1] += '\n'
new_lines = lines + content_lines
content = ''.join(new_lines)
else:
# file is empty
content = ''.join(content_lines)
return content, n_added_lines
def _insert_impl(lines, start, content):
"""Internal method to handle inserting to a file.
Args:
lines: list[str]: The lines in the original file.
start: int: The start line number for inserting.
content: str: The content to insert to the file.
Returns:
content: str: The new content of the file.
n_added_lines: int: The number of lines added to the file.
Raises:
LineNumberError: If the start line number is invalid.
"""
inserted_lines = [content + '\n' if not content.endswith('\n') else content]
if len(lines) == 0:
new_lines = inserted_lines
elif start is not None:
if len(lines) == 1 and lines[0].strip() == '':
# if the file with only 1 line and that line is empty
lines = []
if len(lines) == 0:
new_lines = inserted_lines
else:
new_lines = lines[: start - 1] + inserted_lines + lines[start - 1 :]
else:
raise LineNumberError(
f'Invalid line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
)
content = ''.join(new_lines)
n_added_lines = len(inserted_lines)
return content, n_added_lines
def _edit_impl(lines, start, end, content):
"""Internal method to handle editing a file.
REQUIRES (should be checked by caller):
start <= end
start and end are between 1 and len(lines) (inclusive)
content ends with a newline
Args:
lines: list[str]: The lines in the original file.
start: int: The start line number for editing.
end: int: The end line number for editing.
content: str: The content to replace the lines with.
Returns:
content: str: The new content of the file.
n_added_lines: int: The number of lines added to the file.
"""
# Handle cases where start or end are None
if start is None:
start = 1 # Default to the beginning
if end is None:
end = len(lines) # Default to the end
# Check arguments
if not (1 <= start <= len(lines)):
raise LineNumberError(
f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
)
if not (1 <= end <= len(lines)):
raise LineNumberError(
f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).'
)
if start > end:
raise LineNumberError(
f'Invalid line range: {start}-{end}. Start must be less than or equal to end.'
)
if not content.endswith('\n'):
content += '\n'
content_lines = content.splitlines(True)
n_added_lines = len(content_lines)
new_lines = lines[: start - 1] + content_lines + lines[end:]
content = ''.join(new_lines)
return content, n_added_lines
def _edit_file_impl(
file_name: str,
start: int | None = None,
end: int | None = None,
content: str = '',
is_insert: bool = False,
is_append: bool = False,
) -> str:
"""Internal method to handle common logic for edit_/append_file methods.
Args:
file_name: str: The name of the file to edit or append to.
start: int | None = None: The start line number for editing. Ignored if is_append is True.
end: int | None = None: The end line number for editing. Ignored if is_append is True.
content: str: The content to replace the lines with or to append.
is_insert: bool = False: Whether to insert content at the given line number instead of editing.
is_append: bool = False: Whether to append content to the file instead of editing.
"""
ret_str = ''
global CURRENT_FILE, CURRENT_LINE, WINDOW
ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]'
ERROR_MSG_SUFFIX = (
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Open the correct file and try again or 2) Specify the correct line number arguments.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
)
if not _is_valid_filename(file_name):
raise FileNotFoundError('Invalid file name.')
if not _is_valid_path(file_name):
raise FileNotFoundError('Invalid path or file name.')
if not _create_paths(file_name):
raise PermissionError('Could not access or create directories.')
if not os.path.isfile(file_name):
raise FileNotFoundError(f'File {file_name} not found.')
if is_insert and is_append:
raise ValueError('Cannot insert and append at the same time.')
# Use a temporary file to write changes
content = str(content or '')
temp_file_path = ''
src_abs_path = os.path.abspath(file_name)
first_error_line = None
try:
n_added_lines = None
# lint the original file
enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true'
if enable_auto_lint:
original_lint_error, _ = _lint_file(file_name)
# Create a temporary file
with tempfile.NamedTemporaryFile('w', delete=False) as temp_file:
temp_file_path = temp_file.name
# Read the original file and check if empty and for a trailing newline
with open(file_name) as original_file:
lines = original_file.readlines()
if is_append:
content, n_added_lines = _append_impl(lines, content)
elif is_insert:
try:
content, n_added_lines = _insert_impl(lines, start, content)
except LineNumberError as e:
ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n'
return ret_str
else:
try:
content, n_added_lines = _edit_impl(lines, start, end, content)
except LineNumberError as e:
ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n'
return ret_str
if not content.endswith('\n'):
content += '\n'
# Write the new content to the temporary file
temp_file.write(content)
# Replace the original file with the temporary file atomically
shutil.move(temp_file_path, src_abs_path)
# Handle linting
# NOTE: we need to get env var inside this function
# because the env var will be set AFTER the agentskills is imported
if enable_auto_lint:
# BACKUP the original file
original_file_backup_path = os.path.join(
os.path.dirname(file_name),
f'.backup.{os.path.basename(file_name)}',
)
with open(original_file_backup_path, 'w') as f:
f.writelines(lines)
lint_error, first_error_line = _lint_file(file_name)
# Select the errors caused by the modification
def extract_last_part(line):
parts = line.split(':')
if len(parts) > 1:
return parts[-1].strip()
return line.strip()
def subtract_strings(str1, str2) -> str:
lines1 = str1.splitlines()
lines2 = str2.splitlines()
last_parts1 = [extract_last_part(line) for line in lines1]
remaining_lines = [
line
for line in lines2
if extract_last_part(line) not in last_parts1
]
result = '\n'.join(remaining_lines)
return result
if original_lint_error and lint_error:
lint_error = subtract_strings(original_lint_error, lint_error)
if lint_error == '':
lint_error = None
first_error_line = None
if lint_error is not None:
if first_error_line is not None:
show_line = int(first_error_line)
elif is_append:
# original end-of-file
show_line = len(lines)
# insert OR edit WILL provide meaningful line numbers
elif start is not None and end is not None:
show_line = int((start + end) / 2)
else:
raise ValueError('Invalid state. This should never happen.')
ret_str += LINTER_ERROR_MSG
ret_str += lint_error + '\n'
editor_lines = n_added_lines + 20
ret_str += '[This is how your edit would have looked if applied]\n'
ret_str += '-------------------------------------------------\n'
ret_str += (
_print_window(file_name, show_line, editor_lines, return_str=True)
+ '\n'
)
ret_str += '-------------------------------------------------\n\n'
ret_str += '[This is the original code before your edit]\n'
ret_str += '-------------------------------------------------\n'
ret_str += (
_print_window(
original_file_backup_path,
show_line,
editor_lines,
return_str=True,
)
+ '\n'
)
ret_str += '-------------------------------------------------\n'
ret_str += (
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
)
# recover the original file
with open(original_file_backup_path) as fin, open(
file_name, 'w'
) as fout:
fout.write(fin.read())
os.remove(original_file_backup_path)
return ret_str
except FileNotFoundError as e:
ret_str += f'File not found: {e}\n'
except IOError as e:
ret_str += f'An error occurred while handling the file: {e}\n'
except ValueError as e:
ret_str += f'Invalid input: {e}\n'
except Exception as e:
# Clean up the temporary file if an error occurs
if temp_file_path and os.path.exists(temp_file_path):
os.remove(temp_file_path)
print(f'An unexpected error occurred: {e}')
raise e
# Update the file information and print the updated content
with open(file_name, 'r', encoding='utf-8') as file:
n_total_lines = max(1, len(file.readlines()))
if first_error_line is not None and int(first_error_line) > 0:
CURRENT_LINE = first_error_line
else:
if is_append:
CURRENT_LINE = max(1, len(lines)) # end of original file
else:
CURRENT_LINE = start or n_total_lines or 1
ret_str += f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]\n'
CURRENT_FILE = file_name
ret_str += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + '\n'
ret_str += MSG_FILE_UPDATED.format(line_number=CURRENT_LINE)
return ret_str
def edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
"""Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
For example, given a file "/workspace/example.txt" with the following content:
```
line 1
line 2
line 2
line 3
```
EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique:
edit_file_by_replace(
'/workspace/example.txt',
to_replace='line 2\nline 3',
new_content='new line\nline 3',
)
This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged.
The resulting file will be:
```
line 1
line 2
new line
line 3
```
REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string:
edit_file_by_replace(
'/workspace/example.txt',
to_replace='line 2\nline 3',
new_content='',
)
Args:
file_name: str: The name of the file to edit.
to_replace: str: The content to search for and replace.
new_content: str: The new content to replace the old content with.
"""
# FIXME: support replacing *all* occurrences
if to_replace.strip() == '':
raise ValueError('`to_replace` must not be empty.')
if to_replace == new_content:
raise ValueError('`to_replace` and `new_content` must be different.')
# search for `to_replace` in the file
# if found, replace it with `new_content`
# if not found, perform a fuzzy search to find the closest match and replace it with `new_content`
with open(file_name, 'r') as file:
file_content = file.read()
if file_content.count(to_replace) > 1:
raise ValueError(
'`to_replace` appears more than once, please include enough lines to make code in `to_replace` unique.'
)
start = file_content.find(to_replace)
if start != -1:
# Convert start from index to line number
start_line_number = file_content[:start].count('\n') + 1
end_line_number = start_line_number + len(to_replace.splitlines()) - 1
else:
def _fuzzy_transform(s: str) -> str:
# remove all space except newline
return re.sub(r'[^\S\n]+', '', s)
# perform a fuzzy search (remove all spaces except newlines)
to_replace_fuzzy = _fuzzy_transform(to_replace)
file_content_fuzzy = _fuzzy_transform(file_content)
# find the closest match
start = file_content_fuzzy.find(to_replace_fuzzy)
if start == -1:
print(
f'[No exact match found in {file_name} for\n```\n{to_replace}\n```\n]'
)
return
# Convert start from index to line number for fuzzy match
start_line_number = file_content_fuzzy[:start].count('\n') + 1
end_line_number = start_line_number + len(to_replace.splitlines()) - 1
ret_str = _edit_file_impl(
file_name,
start=start_line_number,
end=end_line_number,
content=new_content,
is_insert=False,
)
# lint_error = bool(LINTER_ERROR_MSG in ret_str)
# TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation)
print(ret_str)
def insert_content_at_line(file_name: str, line_number: int, content: str) -> None:
"""Insert content at the given line number in a file.
This will NOT modify the content of the lines before OR after the given line number.
For example, if the file has the following content:
```
line 1
line 2
line 3
```
and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to:
```
line 1
new line
line 2
line 3
```
Args:
file_name: str: The name of the file to edit.
line_number: int: The line number (starting from 1) to insert the content after.
content: str: The content to insert.
"""
ret_str = _edit_file_impl(
file_name,
start=line_number,
end=line_number,
content=content,
is_insert=True,
is_append=False,
)
print(ret_str)
def append_file(file_name: str, content: str) -> None:
"""Append content to the given file.
It appends text `content` to the end of the specified file.
Args:
file_name: str: The name of the file to edit.
line_number: int: The line number (starting from 1) to insert the content after.
content: str: The content to insert.
"""
ret_str = _edit_file_impl(
file_name,
start=None,
end=None,
content=content,
is_insert=False,
is_append=True,
)
print(ret_str)
def search_dir(search_term: str, dir_path: str = './') -> None:
"""Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
Args:
search_term: str: The term to search for.
dir_path: str: The path to the directory to search.
"""
if not os.path.isdir(dir_path):
raise FileNotFoundError(f'Directory {dir_path} not found')
matches = []
for root, _, files in os.walk(dir_path):
for file in files:
if file.startswith('.'):
continue
file_path = os.path.join(root, file)
with open(file_path, 'r', errors='ignore') as f:
for line_num, line in enumerate(f, 1):
if search_term in line:
matches.append((file_path, line_num, line.strip()))
if not matches:
print(f'No matches found for "{search_term}" in {dir_path}')
return
num_matches = len(matches)
num_files = len(set(match[0] for match in matches))
if num_files > 100:
print(
f'More than {num_files} files matched for "{search_term}" in {dir_path}. Please narrow your search.'
)
return
print(f'[Found {num_matches} matches for "{search_term}" in {dir_path}]')
for file_path, line_num, line in matches:
print(f'{file_path} (Line {line_num}): {line}')
print(f'[End of matches for "{search_term}" in {dir_path}]')
def search_file(search_term: str, file_path: str | None = None) -> None:
"""Searches for search_term in file. If file is not provided, searches in the current open file.
Args:
search_term: str: The term to search for.
file_path: str | None: The path to the file to search.
"""
global CURRENT_FILE
if file_path is None:
file_path = CURRENT_FILE
if file_path is None:
raise FileNotFoundError(
'No file specified or open. Use the open_file function first.'
)
if not os.path.isfile(file_path):
raise FileNotFoundError(f'File {file_path} not found')
matches = []
with open(file_path) as file:
for i, line in enumerate(file, 1):
if search_term in line:
matches.append((i, line.strip()))
if matches:
print(f'[Found {len(matches)} matches for "{search_term}" in {file_path}]')
for match in matches:
print(f'Line {match[0]}: {match[1]}')
print(f'[End of matches for "{search_term}" in {file_path}]')
else:
print(f'[No matches found for "{search_term}" in {file_path}]')
def find_file(file_name: str, dir_path: str = './') -> None:
"""Finds all files with the given name in the specified directory.
Args:
file_name: str: The name of the file to find.
dir_path: str: The path to the directory to search.
"""
if not os.path.isdir(dir_path):
raise FileNotFoundError(f'Directory {dir_path} not found')
matches = []
for root, _, files in os.walk(dir_path):
for file in files:
if file_name in file:
matches.append(os.path.join(root, file))
if matches:
print(f'[Found {len(matches)} matches for "{file_name}" in {dir_path}]')
for match in matches:
print(f'{match}')
print(f'[End of matches for "{file_name}" in {dir_path}]')
else:
print(f'[No matches found for "{file_name}" in {dir_path}]')
__all__ = [
'open_file',
'goto_line',
'scroll_down',
'scroll_up',
'create_file',
'edit_file_by_replace',
'insert_content_at_line',
'append_file',
'search_dir',
'search_file',
'find_file',
]

View File

@@ -0,0 +1,7 @@
from ..utils.dependency import import_functions
from . import file_readers
import_functions(
module=file_readers, function_names=file_readers.__all__, target_globals=globals()
)
__all__ = file_readers.__all__

View File

@@ -0,0 +1,244 @@
"""File reader skills for the OpenDevin agent.
This module provides various functions to parse and extract content from different file types,
including PDF, DOCX, LaTeX, audio, image, video, and PowerPoint files. It utilizes different
libraries and APIs to process these files and output their content or descriptions.
Functions:
parse_pdf(file_path: str) -> None: Parse and print content of a PDF file.
parse_docx(file_path: str) -> None: Parse and print content of a DOCX file.
parse_latex(file_path: str) -> None: Parse and print content of a LaTeX file.
parse_audio(file_path: str, model: str = 'whisper-1') -> None: Transcribe and print content of an audio file.
parse_image(file_path: str, task: str = 'Describe this image as detail as possible.') -> None: Analyze and print description of an image file.
parse_video(file_path: str, task: str = 'Describe this image as detail as possible.', frame_interval: int = 30) -> None: Analyze and print description of video frames.
parse_pptx(file_path: str) -> None: Parse and print content of a PowerPoint file.
Note:
Some functions (parse_audio, parse_video, parse_image) require OpenAI API credentials
and are only available if the necessary environment variables are set.
"""
import base64
import docx
import PyPDF2
from pptx import Presentation
from pylatexenc.latex2text import LatexNodes2Text
from ..utils.config import (
_get_max_token,
_get_openai_api_key,
_get_openai_base_url,
_get_openai_client,
_get_openai_model,
)
def parse_pdf(file_path: str) -> None:
"""Parses the content of a PDF file and prints it.
Args:
file_path: str: The path to the file to open.
"""
print(f'[Reading PDF file from {file_path}]')
content = PyPDF2.PdfReader(file_path)
text = ''
for page_idx in range(len(content.pages)):
text += (
f'@@ Page {page_idx + 1} @@\n'
+ content.pages[page_idx].extract_text()
+ '\n\n'
)
print(text.strip())
def parse_docx(file_path: str) -> None:
"""Parses the content of a DOCX file and prints it.
Args:
file_path: str: The path to the file to open.
"""
print(f'[Reading DOCX file from {file_path}]')
content = docx.Document(file_path)
text = ''
for i, para in enumerate(content.paragraphs):
text += f'@@ Page {i + 1} @@\n' + para.text + '\n\n'
print(text)
def parse_latex(file_path: str) -> None:
"""Parses the content of a LaTex file and prints it.
Args:
file_path: str: The path to the file to open.
"""
print(f'[Reading LaTex file from {file_path}]')
with open(file_path) as f:
data = f.read()
text = LatexNodes2Text().latex_to_text(data)
print(text.strip())
def _base64_img(file_path: str) -> str:
with open(file_path, 'rb') as image_file:
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
return encoded_image
def _base64_video(file_path: str, frame_interval: int = 10) -> list[str]:
import cv2
video = cv2.VideoCapture(file_path)
base64_frames = []
frame_count = 0
while video.isOpened():
success, frame = video.read()
if not success:
break
if frame_count % frame_interval == 0:
_, buffer = cv2.imencode('.jpg', frame)
base64_frames.append(base64.b64encode(buffer).decode('utf-8'))
frame_count += 1
video.release()
return base64_frames
def _prepare_image_messages(task: str, base64_image: str):
return [
{
'role': 'user',
'content': [
{'type': 'text', 'text': task},
{
'type': 'image_url',
'image_url': {'url': f'data:image/jpeg;base64,{base64_image}'},
},
],
}
]
def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
"""Parses the content of an audio file and prints it.
Args:
file_path: str: The path to the audio file to transcribe.
model: str: The audio model to use for transcription. Defaults to 'whisper-1'.
"""
print(f'[Transcribing audio file from {file_path}]')
try:
# TODO: record the COST of the API call
with open(file_path, 'rb') as audio_file:
transcript = _get_openai_client().audio.translations.create(
model=model, file=audio_file
)
print(transcript.text)
except Exception as e:
print(f'Error transcribing audio file: {e}')
def parse_image(
file_path: str, task: str = 'Describe this image as detail as possible.'
) -> None:
"""Parses the content of an image file and prints the description.
Args:
file_path: str: The path to the file to open.
task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'.
"""
print(f'[Reading image file from {file_path}]')
# TODO: record the COST of the API call
try:
base64_image = _base64_img(file_path)
response = _get_openai_client().chat.completions.create(
model=_get_openai_model(),
messages=_prepare_image_messages(task, base64_image),
max_tokens=_get_max_token(),
)
content = response.choices[0].message.content
print(content)
except Exception as error:
print(f'Error with the request: {error}')
def parse_video(
file_path: str,
task: str = 'Describe this image as detail as possible.',
frame_interval: int = 30,
) -> None:
"""Parses the content of an image file and prints the description.
Args:
file_path: str: The path to the video file to open.
task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'.
frame_interval: int: The interval between frames to analyze. Defaults to 30.
"""
print(
f'[Processing video file from {file_path} with frame interval {frame_interval}]'
)
task = task or 'This is one frame from a video, please summarize this frame.'
base64_frames = _base64_video(file_path)
selected_frames = base64_frames[::frame_interval]
if len(selected_frames) > 30:
new_interval = len(base64_frames) // 30
selected_frames = base64_frames[::new_interval]
print(f'Totally {len(selected_frames)} would be analyze...\n')
idx = 0
for base64_frame in selected_frames:
idx += 1
print(f'Process the {file_path}, current No. {idx * frame_interval} frame...')
# TODO: record the COST of the API call
try:
response = _get_openai_client().chat.completions.create(
model=_get_openai_model(),
messages=_prepare_image_messages(task, base64_frame),
max_tokens=_get_max_token(),
)
content = response.choices[0].message.content
current_frame_content = f"Frame {idx}'s content: {content}\n"
print(current_frame_content)
except Exception as error:
print(f'Error with the request: {error}')
def parse_pptx(file_path: str) -> None:
"""Parses the content of a pptx file and prints it.
Args:
file_path: str: The path to the file to open.
"""
print(f'[Reading PowerPoint file from {file_path}]')
try:
pres = Presentation(str(file_path))
text = []
for slide_idx, slide in enumerate(pres.slides):
text.append(f'@@ Slide {slide_idx + 1} @@')
for shape in slide.shapes:
if hasattr(shape, 'text'):
text.append(shape.text)
print('\n'.join(text))
except Exception as e:
print(f'Error reading PowerPoint file: {e}')
__all__ = [
'parse_pdf',
'parse_docx',
'parse_latex',
'parse_pptx',
]
# This is called from OpenDevin's side
# If SANDBOX_ENV_OPENAI_API_KEY is set, we will be able to use these tools in the sandbox environment
if _get_openai_api_key() and _get_openai_base_url():
__all__ += ['parse_audio', 'parse_video', 'parse_image']

View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,30 @@
import os
from openai import OpenAI
# ==================================================================================================
# OPENAI
# TODO: Move this to EventStream Actions when EventStreamRuntime is fully implemented
# NOTE: we need to get env vars inside functions because they will be set in IPython
# AFTER the agentskills is imported (the case for EventStreamRuntime)
# ==================================================================================================
def _get_openai_api_key():
return os.getenv('OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', ''))
def _get_openai_base_url():
return os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1')
def _get_openai_model():
return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13')
def _get_max_token():
return os.getenv('MAX_TOKEN', 500)
def _get_openai_client():
client = OpenAI(api_key=_get_openai_api_key(), base_url=_get_openai_base_url())
return client

View File

@@ -0,0 +1,11 @@
from types import ModuleType
def import_functions(
module: ModuleType, function_names: list[str], target_globals: dict
) -> None:
for name in function_names:
if hasattr(module, name):
target_globals[name] = getattr(module, name)
else:
raise ValueError(f'Function {name} not found in {module.__name__}')

View File

@@ -27,7 +27,7 @@ class JupyterPlugin(Plugin):
f"su - {username} -s /bin/bash << 'EOF'\n"
'cd /opendevin/code\n'
'export POETRY_VIRTUALENVS_PATH=/opendevin/poetry;\n'
'export PYTHONPATH=/opendevin/code/opendevin/runtime/plugins/agent_skills:$PYTHONPATH;\n'
'export PYTHONPATH=/opendevin/code:$PYTHONPATH;\n'
'/opendevin/miniforge3/bin/mamba run -n base '
'poetry run jupyter kernelgateway '
'--KernelGatewayApp.ip=0.0.0.0 '

View File

@@ -68,11 +68,9 @@ class JupyterKernel:
async def initialize(self):
await self.execute(r'%colors nocolor')
# pre-defined tools
self.tools_to_run = [
self.tools_to_run: list[str] = [
# TODO: You can add code for your pre-defined tools here
]
if os.path.exists('/opendevin/plugins/agent_skills/agentskills.py'):
self.tools_to_run.append('from agentskills import *')
for tool in self.tools_to_run:
res = await self.execute(tool)
logging.info(f'Tool [{tool}] initialized:\n{res}')

View File

@@ -7,7 +7,7 @@ from unittest.mock import patch
import docx
import pytest
from opendevin.runtime.plugins.agent_skills.agentskills import (
from opendevin.runtime.plugins.agent_skills.file_ops.file_ops import (
MSG_FILE_UPDATED,
WINDOW,
_print_window,
@@ -18,15 +18,17 @@ from opendevin.runtime.plugins.agent_skills.agentskills import (
goto_line,
insert_content_at_line,
open_file,
parse_docx,
parse_latex,
parse_pdf,
parse_pptx,
scroll_down,
scroll_up,
search_dir,
search_file,
)
from opendevin.runtime.plugins.agent_skills.file_reader.file_readers import (
parse_docx,
parse_latex,
parse_pdf,
parse_pptx,
)
# CURRENT_FILE must be reset for each test

View File

@@ -2,7 +2,7 @@ import os
import pytest
from opendevin.runtime.plugins.agent_skills.aider import Linter, LintResult
from opendevin.runtime.plugins.agent_skills.utils.aider import Linter, LintResult
@pytest.fixture
@@ -108,7 +108,7 @@ def test_py_lint_fail(linter, temp_file):
def test_basic_lint(temp_file):
from opendevin.runtime.plugins.agent_skills.aider.linter import basic_lint
from opendevin.runtime.plugins.agent_skills.utils.aider.linter import basic_lint
poorly_formatted_code = """
def foo()
@@ -124,7 +124,7 @@ def test_basic_lint(temp_file):
def test_basic_lint_fail_returns_text_and_lines(temp_file):
from opendevin.runtime.plugins.agent_skills.aider.linter import basic_lint
from opendevin.runtime.plugins.agent_skills.utils.aider.linter import basic_lint
poorly_formatted_code = """
def foo()
@@ -141,7 +141,9 @@ def test_basic_lint_fail_returns_text_and_lines(temp_file):
def test_lint_python_compile(temp_file):
from opendevin.runtime.plugins.agent_skills.aider.linter import lint_python_compile
from opendevin.runtime.plugins.agent_skills.utils.aider.linter import (
lint_python_compile,
)
result = lint_python_compile(temp_file, "print('Hello, World!')\n")
@@ -149,7 +151,9 @@ def test_lint_python_compile(temp_file):
def test_lint_python_compile_fail_returns_text_and_lines(temp_file):
from opendevin.runtime.plugins.agent_skills.aider.linter import lint_python_compile
from opendevin.runtime.plugins.agent_skills.utils.aider.linter import (
lint_python_compile,
)
poorly_formatted_code = """
def foo()