Add support for AGENTS.md files in microagent system (#10528)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Ryan H. Tran
2025-09-03 01:18:38 +07:00
committed by GitHub
parent 8d13c9f328
commit 3f77b8229a
2 changed files with 250 additions and 37 deletions

View File

@@ -2,7 +2,7 @@ import io
import re import re
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
from typing import Union from typing import ClassVar, Union
import frontmatter import frontmatter
from pydantic import BaseModel from pydantic import BaseModel
@@ -23,6 +23,31 @@ class BaseMicroagent(BaseModel):
source: str # path to the file source: str # path to the file
type: MicroagentType type: MicroagentType
PATH_TO_THIRD_PARTY_MICROAGENT_NAME: ClassVar[dict[str, str]] = {
'.cursorrules': 'cursorrules',
'agents.md': 'agents',
'agent.md': 'agents',
}
@classmethod
def _handle_third_party(
cls, path: Path, file_content: str
) -> Union['RepoMicroagent', None]:
# Determine the agent name based on file type
microagent_name = cls.PATH_TO_THIRD_PARTY_MICROAGENT_NAME.get(path.name.lower())
# Create RepoMicroagent if we recognized the file type
if microagent_name is not None:
return RepoMicroagent(
name=microagent_name,
content=file_content,
metadata=MicroagentMetadata(name=microagent_name),
source=str(path),
type=MicroagentType.REPO_KNOWLEDGE,
)
return None
@classmethod @classmethod
def load( def load(
cls, cls,
@@ -40,11 +65,10 @@ class BaseMicroagent(BaseModel):
# Otherwise, we will rely on the name from metadata later # Otherwise, we will rely on the name from metadata later
derived_name = None derived_name = None
if microagent_dir is not None: if microagent_dir is not None:
# Special handling for .cursorrules files which are not in microagent_dir # Special handling for files which are not in microagent_dir
if path.name == '.cursorrules': derived_name = cls.PATH_TO_THIRD_PARTY_MICROAGENT_NAME.get(
derived_name = 'cursorrules' path.name.lower()
else: ) or str(path.relative_to(microagent_dir).with_suffix(''))
derived_name = str(path.relative_to(microagent_dir).with_suffix(''))
# Only load directly from path if file_content is not provided # Only load directly from path if file_content is not provided
if file_content is None: if file_content is None:
@@ -61,15 +85,10 @@ class BaseMicroagent(BaseModel):
type=MicroagentType.REPO_KNOWLEDGE, type=MicroagentType.REPO_KNOWLEDGE,
) )
# Handle .cursorrules files # Handle third-party agent instruction files
if path.name == '.cursorrules': third_party_agent = cls._handle_third_party(path, file_content)
return RepoMicroagent( if third_party_agent is not None:
name='cursorrules', return third_party_agent
content=file_content,
metadata=MicroagentMetadata(name='cursorrules'),
source=str(path),
type=MicroagentType.REPO_KNOWLEDGE,
)
file_io = io.StringIO(file_content) file_io = io.StringIO(file_content)
loaded = frontmatter.load(file_io) loaded = frontmatter.load(file_io)
@@ -276,31 +295,44 @@ def load_microagents_from_dir(
# Load all agents from microagents directory # Load all agents from microagents directory
logger.debug(f'Loading agents from {microagent_dir}') logger.debug(f'Loading agents from {microagent_dir}')
if microagent_dir.exists():
# Collect .cursorrules file from repo root and .md files from microagents dir
cursorrules_files = []
if (microagent_dir.parent.parent / '.cursorrules').exists():
cursorrules_files = [microagent_dir.parent.parent / '.cursorrules']
# Always check for .cursorrules and AGENTS.md files in repo root, regardless of whether microagents_dir exists
special_files = []
repo_root = microagent_dir.parent.parent
# Check for .cursorrules
if (repo_root / '.cursorrules').exists():
special_files.append(repo_root / '.cursorrules')
# Check for AGENTS.md (case-insensitive)
for agents_filename in ['AGENTS.md', 'agents.md', 'AGENT.md', 'agent.md']:
agents_path = repo_root / agents_filename
if agents_path.exists():
special_files.append(agents_path)
break # Only add the first one found to avoid duplicates
# Collect .md files from microagents directory if it exists
md_files = []
if microagent_dir.exists():
md_files = [f for f in microagent_dir.rglob('*.md') if f.name != 'README.md'] md_files = [f for f in microagent_dir.rglob('*.md') if f.name != 'README.md']
# Process all files in one loop # Process all files in one loop
for file in chain(cursorrules_files, md_files): for file in chain(special_files, md_files):
try: try:
agent = BaseMicroagent.load(file, microagent_dir) agent = BaseMicroagent.load(file, microagent_dir)
if isinstance(agent, RepoMicroagent): if isinstance(agent, RepoMicroagent):
repo_agents[agent.name] = agent repo_agents[agent.name] = agent
elif isinstance(agent, KnowledgeMicroagent): elif isinstance(agent, KnowledgeMicroagent):
# Both KnowledgeMicroagent and TaskMicroagent go into knowledge_agents # Both KnowledgeMicroagent and TaskMicroagent go into knowledge_agents
knowledge_agents[agent.name] = agent knowledge_agents[agent.name] = agent
except MicroagentValidationError as e: except MicroagentValidationError as e:
# For validation errors, include the original exception # For validation errors, include the original exception
error_msg = f'Error loading microagent from {file}: {str(e)}' error_msg = f'Error loading microagent from {file}: {str(e)}'
raise MicroagentValidationError(error_msg) from e raise MicroagentValidationError(error_msg) from e
except Exception as e: except Exception as e:
# For other errors, wrap in a ValueError with detailed message # For other errors, wrap in a ValueError with detailed message
error_msg = f'Error loading microagent from {file}: {str(e)}' error_msg = f'Error loading microagent from {file}: {str(e)}'
raise ValueError(error_msg) from e raise ValueError(error_msg) from e
logger.debug( logger.debug(
f'Loaded {len(repo_agents) + len(knowledge_agents)} microagents: ' f'Loaded {len(repo_agents) + len(knowledge_agents)} microagents: '

View File

@@ -364,3 +364,184 @@ def test_load_microagents_with_cursorrules(temp_microagents_dir_with_cursorrules
assert cursorrules_agent.name == 'cursorrules' assert cursorrules_agent.name == 'cursorrules'
assert 'Always use TypeScript for new files' in cursorrules_agent.content assert 'Always use TypeScript for new files' in cursorrules_agent.content
assert cursorrules_agent.type == MicroagentType.REPO_KNOWLEDGE assert cursorrules_agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_cursorrules_only():
"""Create a temporary directory with only .cursorrules file (no .openhands/microagents directory)."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create .cursorrules file in repository root
cursorrules_content = """Always use Python for new files.
Follow PEP 8 style guidelines."""
(root / '.cursorrules').write_text(cursorrules_content)
# Note: We intentionally do NOT create .openhands/microagents directory
yield root
def test_load_cursorrules_without_microagents_dir(temp_dir_with_cursorrules_only):
"""Test loading .cursorrules file when .openhands/microagents directory doesn't exist.
This test reproduces the bug where .cursorrules is only loaded when
.openhands/microagents directory exists.
"""
# Try to load from non-existent microagents directory
microagents_dir = temp_dir_with_cursorrules_only / '.openhands' / 'microagents'
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find the .cursorrules file even though microagents_dir doesn't exist
assert len(repo_agents) == 1 # Only .cursorrules
assert 'cursorrules' in repo_agents
assert len(knowledge_agents) == 0
# Check .cursorrules agent
cursorrules_agent = repo_agents['cursorrules']
assert isinstance(cursorrules_agent, RepoMicroagent)
assert cursorrules_agent.name == 'cursorrules'
assert 'Always use Python for new files' in cursorrules_agent.content
assert cursorrules_agent.type == MicroagentType.REPO_KNOWLEDGE
def test_agents_md_file_load():
"""Test loading AGENTS.md file as a RepoMicroagent."""
agents_content = """# Project Setup
## Setup commands
- Install deps: `npm install`
- Start dev server: `npm run dev`
- Run tests: `npm test`
## Code style
- TypeScript strict mode
- Single quotes, no semicolons
- Use functional patterns where possible"""
agents_path = Path('AGENTS.md')
# Test loading AGENTS.md file directly
agent = BaseMicroagent.load(agents_path, file_content=agents_content)
# Verify it's loaded as a RepoMicroagent
assert isinstance(agent, RepoMicroagent)
assert agent.name == 'agents'
assert agent.content == agents_content
assert agent.type == MicroagentType.REPO_KNOWLEDGE
assert agent.metadata.name == 'agents'
assert agent.source == str(agents_path)
def test_agents_md_case_insensitive():
"""Test that AGENTS.md loading is case-insensitive."""
agents_content = """# Development Guide
Use TypeScript for all new files."""
test_cases = ['AGENTS.md', 'agents.md', 'AGENT.md', 'agent.md']
for filename in test_cases:
agents_path = Path(filename)
agent = BaseMicroagent.load(agents_path, file_content=agents_content)
assert isinstance(agent, RepoMicroagent)
assert agent.name == 'agents'
assert agent.content == agents_content
assert agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_agents_md_only():
"""Create a temporary directory with only AGENTS.md file (no .openhands/microagents directory)."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create AGENTS.md file in repository root
agents_content = """# Development Guide
## Setup commands
- Install deps: `poetry install`
- Start dev server: `poetry run python app.py`
- Run tests: `poetry run pytest`
## Code style
- Python 3.12+
- Follow PEP 8 guidelines
- Use type hints everywhere"""
(root / 'AGENTS.md').write_text(agents_content)
# Note: We intentionally do NOT create .openhands/microagents directory
yield root
def test_load_agents_md_without_microagents_dir(temp_dir_with_agents_md_only):
"""Test loading AGENTS.md file when .openhands/microagents directory doesn't exist."""
# Try to load from non-existent microagents directory
microagents_dir = temp_dir_with_agents_md_only / '.openhands' / 'microagents'
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find the AGENTS.md file even though microagents_dir doesn't exist
assert len(repo_agents) == 1 # Only AGENTS.md
assert 'agents' in repo_agents
assert len(knowledge_agents) == 0
# Check AGENTS.md agent
agents_agent = repo_agents['agents']
assert isinstance(agents_agent, RepoMicroagent)
assert agents_agent.name == 'agents'
assert 'Install deps: `poetry install`' in agents_agent.content
assert agents_agent.type == MicroagentType.REPO_KNOWLEDGE
@pytest.fixture
def temp_dir_with_both_cursorrules_and_agents():
"""Create a temporary directory with both .cursorrules and AGENTS.md files."""
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
# Create .cursorrules file
cursorrules_content = """Always use Python for new files.
Follow PEP 8 style guidelines."""
(root / '.cursorrules').write_text(cursorrules_content)
# Create AGENTS.md file
agents_content = """# Development Guide
## Setup commands
- Install deps: `poetry install`
- Run tests: `poetry run pytest`"""
(root / 'AGENTS.md').write_text(agents_content)
yield root
def test_load_both_cursorrules_and_agents_md(temp_dir_with_both_cursorrules_and_agents):
"""Test loading both .cursorrules and AGENTS.md files when .openhands/microagents doesn't exist."""
# Try to load from non-existent microagents directory
microagents_dir = (
temp_dir_with_both_cursorrules_and_agents / '.openhands' / 'microagents'
)
repo_agents, knowledge_agents = load_microagents_from_dir(microagents_dir)
# This should find both files
assert len(repo_agents) == 2 # .cursorrules + AGENTS.md
assert 'cursorrules' in repo_agents
assert 'agents' in repo_agents
assert len(knowledge_agents) == 0
# Check both agents
cursorrules_agent = repo_agents['cursorrules']
assert isinstance(cursorrules_agent, RepoMicroagent)
assert 'Always use Python for new files' in cursorrules_agent.content
agents_agent = repo_agents['agents']
assert isinstance(agents_agent, RepoMicroagent)
assert 'Install deps: `poetry install`' in agents_agent.content