Add support for AGENTS.md files in microagent system (#10528)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Ryan H. Tran 2025-09-03 01:18:38 +07:00 committed by GitHub
parent 8d13c9f328
commit 3f77b8229a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 250 additions and 37 deletions

View File

@ -2,7 +2,7 @@ import io
import re
from itertools import chain
from pathlib import Path
from typing import Union
from typing import ClassVar, Union
import frontmatter
from pydantic import BaseModel
@ -23,6 +23,31 @@ class BaseMicroagent(BaseModel):
source: str # path to the file
type: MicroagentType
PATH_TO_THIRD_PARTY_MICROAGENT_NAME: ClassVar[dict[str, str]] = {
'.cursorrules': 'cursorrules',
'agents.md': 'agents',
'agent.md': 'agents',
}
@classmethod
def _handle_third_party(
cls, path: Path, file_content: str
) -> Union['RepoMicroagent', None]:
# Determine the agent name based on file type
microagent_name = cls.PATH_TO_THIRD_PARTY_MICROAGENT_NAME.get(path.name.lower())
# Create RepoMicroagent if we recognized the file type
if microagent_name is not None:
return RepoMicroagent(
name=microagent_name,
content=file_content,
metadata=MicroagentMetadata(name=microagent_name),
source=str(path),
type=MicroagentType.REPO_KNOWLEDGE,
)
return None
@classmethod
def load(
cls,
@ -40,11 +65,10 @@ class BaseMicroagent(BaseModel):
# Otherwise, we will rely on the name from metadata later
derived_name = None
if microagent_dir is not None:
# Special handling for .cursorrules files which are not in microagent_dir
if path.name == '.cursorrules':
derived_name = 'cursorrules'
else:
derived_name = str(path.relative_to(microagent_dir).with_suffix(''))
# Special handling for files which are not in microagent_dir
derived_name = cls.PATH_TO_THIRD_PARTY_MICROAGENT_NAME.get(
path.name.lower()
) or str(path.relative_to(microagent_dir).with_suffix(''))
# Only load directly from path if file_content is not provided
if file_content is None:
@ -61,15 +85,10 @@ class BaseMicroagent(BaseModel):
type=MicroagentType.REPO_KNOWLEDGE,
)
# Handle .cursorrules files
if path.name == '.cursorrules':
return RepoMicroagent(
name='cursorrules',
content=file_content,
metadata=MicroagentMetadata(name='cursorrules'),
source=str(path),
type=MicroagentType.REPO_KNOWLEDGE,
)
# Handle third-party agent instruction files
third_party_agent = cls._handle_third_party(path, file_content)
if third_party_agent is not None:
return third_party_agent
file_io = io.StringIO(file_content)
loaded = frontmatter.load(file_io)
@ -276,31 +295,44 @@ def load_microagents_from_dir(
# Load all agents from microagents directory
logger.debug(f'Loading agents from {microagent_dir}')
if microagent_dir.exists():
# Collect .cursorrules file from repo root and .md files from microagents dir
cursorrules_files = []
if (microagent_dir.parent.parent / '.cursorrules').exists():
cursorrules_files = [microagent_dir.parent.parent / '.cursorrules']
# Always check for .cursorrules and AGENTS.md files in repo root, regardless of whether microagents_dir exists
special_files = []
repo_root = microagent_dir.parent.parent
# Check for .cursorrules
if (repo_root / '.cursorrules').exists():
special_files.append(repo_root / '.cursorrules')
# Check for AGENTS.md (case-insensitive)
for agents_filename in ['AGENTS.md', 'agents.md', 'AGENT.md', 'agent.md']:
agents_path = repo_root / agents_filename
if agents_path.exists():
special_files.append(agents_path)
break # Only add the first one found to avoid duplicates
# Collect .md files from microagents directory if it exists
md_files = []
if microagent_dir.exists():
md_files = [f for f in microagent_dir.rglob('*.md') if f.name != 'README.md']
# Process all files in one loop
for file in chain(cursorrules_files, md_files):
try:
agent = BaseMicroagent.load(file, microagent_dir)
if isinstance(agent, RepoMicroagent):
repo_agents[agent.name] = agent
elif isinstance(agent, KnowledgeMicroagent):
# Both KnowledgeMicroagent and TaskMicroagent go into knowledge_agents
knowledge_agents[agent.name] = agent
except MicroagentValidationError as e:
# For validation errors, include the original exception
error_msg = f'Error loading microagent from {file}: {str(e)}'
raise MicroagentValidationError(error_msg) from e
except Exception as e:
# For other errors, wrap in a ValueError with detailed message
error_msg = f'Error loading microagent from {file}: {str(e)}'
raise ValueError(error_msg) from e
# Process all files in one loop
for file in chain(special_files, md_files):
try:
agent = BaseMicroagent.load(file, microagent_dir)
if isinstance(agent, RepoMicroagent):
repo_agents[agent.name] = agent
elif isinstance(agent, KnowledgeMicroagent):
# Both KnowledgeMicroagent and TaskMicroagent go into knowledge_agents
knowledge_agents[agent.name] = agent
except MicroagentValidationError as e:
# For validation errors, include the original exception
error_msg = f'Error loading microagent from {file}: {str(e)}'
raise MicroagentValidationError(error_msg) from e
except Exception as e:
# For other errors, wrap in a ValueError with detailed message
error_msg = f'Error loading microagent from {file}: {str(e)}'
raise ValueError(error_msg) from e
logger.debug(
f'Loaded {len(repo_agents) + len(knowledge_agents)} microagents: '

View File

@ -364,3 +364,184 @@ def test_load_microagents_with_cursorrules(temp_microagents_dir_with_cursorrules
assert cursorrules_agent.name == 'cursorrules'
assert 'Always use TypeScript for new files' in cursorrules_agent.content
assert cursorrules_agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_cursorrules_only():
"""Create a temporary directory with only .cursorrules file (no .openhands/microagents directory)."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create .cursorrules file in repository root
cursorrules_content = """Always use Python for new files.
Follow PEP 8 style guidelines."""
(root / '.cursorrules').write_text(cursorrules_content)
# Note: We intentionally do NOT create .openhands/microagents directory
yield root
def test_load_cursorrules_without_microagents_dir(temp_dir_with_cursorrules_only):
"""Test loading .cursorrules file when .openhands/microagents directory doesn't exist.
This test reproduces the bug where .cursorrules is only loaded when
.openhands/microagents directory exists.
"""
# Try to load from non-existent microagents directory
microagents_dir = temp_dir_with_cursorrules_only / '.openhands' / 'microagents'
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find the .cursorrules file even though microagents_dir doesn't exist
assert len(repo_agents) == 1 # Only .cursorrules
assert 'cursorrules' in repo_agents
assert len(knowledge_agents) == 0
# Check .cursorrules agent
cursorrules_agent = repo_agents['cursorrules']
assert isinstance(cursorrules_agent, RepoMicroagent)
assert cursorrules_agent.name == 'cursorrules'
assert 'Always use Python for new files' in cursorrules_agent.content
assert cursorrules_agent.type == MicroagentType.REPO_KNOWLEDGE
def test_agents_md_file_load():
"""Test loading AGENTS.md file as a RepoMicroagent."""
agents_content = """# Project Setup
## Setup commands
- Install deps: `npm install`
- Start dev server: `npm run dev`
- Run tests: `npm test`
## Code style
- TypeScript strict mode
- Single quotes, no semicolons
- Use functional patterns where possible"""
agents_path = Path('AGENTS.md')
# Test loading AGENTS.md file directly
agent = BaseMicroagent.load(agents_path, file_content=agents_content)
# Verify it's loaded as a RepoMicroagent
assert isinstance(agent, RepoMicroagent)
assert agent.name == 'agents'
assert agent.content == agents_content
assert agent.type == MicroagentType.REPO_KNOWLEDGE
assert agent.metadata.name == 'agents'
assert agent.source == str(agents_path)
def test_agents_md_case_insensitive():
"""Test that AGENTS.md loading is case-insensitive."""
agents_content = """# Development Guide
Use TypeScript for all new files."""
test_cases = ['AGENTS.md', 'agents.md', 'AGENT.md', 'agent.md']
for filename in test_cases:
agents_path = Path(filename)
agent = BaseMicroagent.load(agents_path, file_content=agents_content)
assert isinstance(agent, RepoMicroagent)
assert agent.name == 'agents'
assert agent.content == agents_content
assert agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_agents_md_only():
"""Create a temporary directory with only AGENTS.md file (no .openhands/microagents directory)."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create AGENTS.md file in repository root
agents_content = """# Development Guide
## Setup commands
- Install deps: `poetry install`
- Start dev server: `poetry run python app.py`
- Run tests: `poetry run pytest`
## Code style
- Python 3.12+
- Follow PEP 8 guidelines
- Use type hints everywhere"""
(root / 'AGENTS.md').write_text(agents_content)
# Note: We intentionally do NOT create .openhands/microagents directory
yield root
def test_load_agents_md_without_microagents_dir(temp_dir_with_agents_md_only):
"""Test loading AGENTS.md file when .openhands/microagents directory doesn't exist."""
# Try to load from non-existent microagents directory
microagents_dir = temp_dir_with_agents_md_only / '.openhands' / 'microagents'
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find the AGENTS.md file even though microagents_dir doesn't exist
assert len(repo_agents) == 1 # Only AGENTS.md
assert 'agents' in repo_agents
assert len(knowledge_agents) == 0
# Check AGENTS.md agent
agents_agent = repo_agents['agents']
assert isinstance(agents_agent, RepoMicroagent)
assert agents_agent.name == 'agents'
assert 'Install deps: `poetry install`' in agents_agent.content
assert agents_agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_both_cursorrules_and_agents():
"""Create a temporary directory with both .cursorrules and AGENTS.md files."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create .cursorrules file
cursorrules_content = """Always use Python for new files.
Follow PEP 8 style guidelines."""
(root / '.cursorrules').write_text(cursorrules_content)
# Create AGENTS.md file
agents_content = """# Development Guide
## Setup commands
- Install deps: `poetry install`
- Run tests: `poetry run pytest`"""
(root / 'AGENTS.md').write_text(agents_content)
yield root
def test_load_both_cursorrules_and_agents_md(temp_dir_with_both_cursorrules_and_agents):
"""Test loading both .cursorrules and AGENTS.md files when .openhands/microagents doesn't exist."""
# Try to load from non-existent microagents directory
microagents_dir = (
temp_dir_with_both_cursorrules_and_agents / '.openhands' / 'microagents'
)
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find both files
assert len(repo_agents) == 2 # .cursorrules + AGENTS.md
assert 'cursorrules' in repo_agents
assert 'agents' in repo_agents
assert len(knowledge_agents) == 0
# Check both agents
cursorrules_agent = repo_agents['cursorrules']
assert isinstance(cursorrules_agent, RepoMicroagent)
assert 'Always use Python for new files' in cursorrules_agent.content
agents_agent = repo_agents['agents']
assert isinstance(agents_agent, RepoMicroagent)
assert 'Install deps: `poetry install`' in agents_agent.content