Hiep Le 36cf4e161a
fix(backend): ensure microagents are loaded for V1 conversations (#11772)
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
2025-11-19 18:54:08 +07:00

332 lines
11 KiB
Python

"""Utilities for loading skills for V1 conversations.
This module provides functions to load skills from various sources:
- Global skills from OpenHands/skills/
- User skills from ~/.openhands/skills/
- Repository-level skills from the workspace
All skills are used in V1 conversations.
"""
import logging
import os
from pathlib import Path
import openhands
from openhands.sdk.context.skills import Skill
from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
_logger = logging.getLogger(__name__)
# Path to global skills directory
GLOBAL_SKILLS_DIR = os.path.join(
os.path.dirname(os.path.dirname(openhands.__file__)),
'skills',
)
def _find_and_load_global_skill_files(skill_dir: Path) -> list[Skill]:
"""Find and load all .md files from the global skills directory.
Args:
skill_dir: Path to the global skills directory
Returns:
List of Skill objects loaded from the files (excluding README.md)
"""
skills = []
try:
# Find all .md files in the directory (excluding README.md)
md_files = [f for f in skill_dir.glob('*.md') if f.name.lower() != 'readme.md']
# Load skills from the found files
for file_path in md_files:
try:
skill = Skill.load(file_path, skill_dir)
skills.append(skill)
_logger.debug(f'Loaded global skill: {skill.name} from {file_path}')
except Exception as e:
_logger.warning(
f'Failed to load global skill from {file_path}: {str(e)}'
)
except Exception as e:
_logger.debug(f'Failed to find global skill files: {str(e)}')
return skills
def load_global_skills() -> list[Skill]:
"""Load global skills from OpenHands/skills/ directory.
Returns:
List of Skill objects loaded from global skills directory.
Returns empty list if directory doesn't exist or on errors.
"""
skill_dir = Path(GLOBAL_SKILLS_DIR)
# Check if directory exists
if not skill_dir.exists():
_logger.debug(f'Global skills directory does not exist: {skill_dir}')
return []
try:
_logger.info(f'Loading global skills from {skill_dir}')
# Find and load all .md files from the directory
skills = _find_and_load_global_skill_files(skill_dir)
_logger.info(f'Loaded {len(skills)} global skills: {[s.name for s in skills]}')
return skills
except Exception as e:
_logger.warning(f'Failed to load global skills: {str(e)}')
return []
def _determine_repo_root(working_dir: str, selected_repository: str | None) -> str:
"""Determine the repository root directory.
Args:
working_dir: Base working directory path
selected_repository: Repository name (e.g., 'owner/repo') or None
Returns:
Path to the repository root directory
"""
if selected_repository:
repo_name = selected_repository.split('/')[-1]
return f'{working_dir}/{repo_name}'
return working_dir
async def _read_file_from_workspace(
workspace: AsyncRemoteWorkspace, file_path: str, working_dir: str
) -> str | None:
"""Read file content from remote workspace.
Args:
workspace: AsyncRemoteWorkspace to execute commands
file_path: Path to the file to read
working_dir: Working directory for command execution
Returns:
File content as string, or None if file doesn't exist or read fails
"""
try:
result = await workspace.execute_command(
f'cat {file_path}', cwd=working_dir, timeout=10.0
)
if result.exit_code == 0 and result.stdout.strip():
return result.stdout
return None
except Exception as e:
_logger.debug(f'Failed to read file {file_path}: {str(e)}')
return None
async def _load_special_files(
workspace: AsyncRemoteWorkspace, repo_root: str, working_dir: str
) -> list[Skill]:
"""Load special skill files from repository root.
Loads: .cursorrules, agents.md, agent.md
Args:
workspace: AsyncRemoteWorkspace to execute commands
repo_root: Path to repository root directory
working_dir: Working directory for command execution
Returns:
List of Skill objects loaded from special files
"""
skills = []
special_files = ['.cursorrules', 'agents.md', 'agent.md']
for filename in special_files:
file_path = f'{repo_root}/{filename}'
content = await _read_file_from_workspace(workspace, file_path, working_dir)
if content:
try:
# Use simple string path to avoid Path filesystem operations
skill = Skill.load(path=filename, skill_dir=None, file_content=content)
skills.append(skill)
_logger.debug(f'Loaded special file skill: {skill.name}')
except Exception as e:
_logger.warning(f'Failed to create skill from {filename}: {str(e)}')
return skills
async def _find_and_load_skill_md_files(
workspace: AsyncRemoteWorkspace, skill_dir: str, working_dir: str
) -> list[Skill]:
"""Find and load all .md files from a skills directory in the workspace.
Args:
workspace: AsyncRemoteWorkspace to execute commands
skill_dir: Path to skills directory
working_dir: Working directory for command execution
Returns:
List of Skill objects loaded from the files (excluding README.md)
"""
skills = []
try:
# Find all .md files in the directory
result = await workspace.execute_command(
f"find {skill_dir} -type f -name '*.md' 2>/dev/null || true",
cwd=working_dir,
timeout=10.0,
)
if result.exit_code == 0 and result.stdout.strip():
file_paths = [
f.strip()
for f in result.stdout.strip().split('\n')
if f.strip() and 'README.md' not in f
]
# Load skills from the found files
for file_path in file_paths:
content = await _read_file_from_workspace(
workspace, file_path, working_dir
)
if content:
# Calculate relative path for skill name
rel_path = file_path.replace(f'{skill_dir}/', '')
try:
# Use simple string path to avoid Path filesystem operations
skill = Skill.load(
path=rel_path, skill_dir=None, file_content=content
)
skills.append(skill)
_logger.debug(f'Loaded repo skill: {skill.name}')
except Exception as e:
_logger.warning(
f'Failed to create skill from {rel_path}: {str(e)}'
)
except Exception as e:
_logger.debug(f'Failed to find skill files in {skill_dir}: {str(e)}')
return skills
def _merge_repo_skills_with_precedence(
special_skills: list[Skill],
skills_dir_skills: list[Skill],
microagents_dir_skills: list[Skill],
) -> list[Skill]:
"""Merge repository skills with precedence order.
Precedence (highest to lowest):
1. Special files (repo root)
2. .openhands/skills/ directory
3. .openhands/microagents/ directory (backward compatibility)
Args:
special_skills: Skills from special files in repo root
skills_dir_skills: Skills from .openhands/skills/ directory
microagents_dir_skills: Skills from .openhands/microagents/ directory
Returns:
Deduplicated list of skills with proper precedence
"""
# Use a dict to deduplicate by name, with earlier sources taking precedence
skills_by_name = {}
for skill in special_skills + skills_dir_skills + microagents_dir_skills:
# Only add if not already present (earlier sources win)
if skill.name not in skills_by_name:
skills_by_name[skill.name] = skill
return list(skills_by_name.values())
async def load_repo_skills(
workspace: AsyncRemoteWorkspace,
selected_repository: str | None,
working_dir: str,
) -> list[Skill]:
"""Load repository-level skills from the workspace.
Loads skills from:
1. Special files in repo root: .cursorrules, agents.md, agent.md
2. .md files in .openhands/skills/ directory (preferred)
3. .md files in .openhands/microagents/ directory (for backward compatibility)
Args:
workspace: AsyncRemoteWorkspace to execute commands in the sandbox
selected_repository: Repository name (e.g., 'owner/repo') or None
working_dir: Working directory path
Returns:
List of Skill objects loaded from repository.
Returns empty list on errors.
"""
try:
# Determine repository root directory
repo_root = _determine_repo_root(working_dir, selected_repository)
_logger.info(f'Loading repo skills from {repo_root}')
# Load special files from repo root
special_skills = await _load_special_files(workspace, repo_root, working_dir)
# Load .md files from .openhands/skills/ directory (preferred)
skills_dir = f'{repo_root}/.openhands/skills'
skills_dir_skills = await _find_and_load_skill_md_files(
workspace, skills_dir, working_dir
)
# Load .md files from .openhands/microagents/ directory (backward compatibility)
microagents_dir = f'{repo_root}/.openhands/microagents'
microagents_dir_skills = await _find_and_load_skill_md_files(
workspace, microagents_dir, working_dir
)
# Merge all loaded skills with proper precedence
all_skills = _merge_repo_skills_with_precedence(
special_skills, skills_dir_skills, microagents_dir_skills
)
_logger.info(
f'Loaded {len(all_skills)} repo skills: {[s.name for s in all_skills]}'
)
return all_skills
except Exception as e:
_logger.warning(f'Failed to load repo skills: {str(e)}')
return []
def merge_skills(skill_lists: list[list[Skill]]) -> list[Skill]:
"""Merge multiple skill lists, avoiding duplicates by name.
Later lists take precedence over earlier lists for duplicate names.
Args:
skill_lists: List of skill lists to merge
Returns:
Deduplicated list of skills with later lists overriding earlier ones
"""
skills_by_name = {}
for skill_list in skill_lists:
for skill in skill_list:
if skill.name in skills_by_name:
_logger.debug(
f'Overriding skill "{skill.name}" from earlier source with later source'
)
skills_by_name[skill.name] = skill
result = list(skills_by_name.values())
_logger.debug(f'Merged skills: {[s.name for s in result]}')
return result