Fix issue #7205: [Feature]: Allow repo microagent Markdown Files Without Required Header (#7655)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
OpenHands 2025-04-03 15:04:51 -04:00 committed by GitHub
parent a899f80ee9
commit 6cfaad86ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 213 additions and 27 deletions

View File

@ -51,7 +51,7 @@ When OpenHands works with a repository, it:
## Types of MicroAgents
All microagents use markdown files with YAML frontmatter.
Most microagents use markdown files with YAML frontmatter. For repository agents (repo.md), the frontmatter is optional - if not provided, the file will be loaded with default settings as a repository agent.
### 1. Knowledge Agents
@ -147,6 +147,7 @@ You can see an example of a task-based agent in [OpenHands's pull request updati
- Specify testing and build procedures
- List environment requirements
- Maintain up-to-date team practices
- YAML frontmatter is optional - files without frontmatter will be loaded with default settings
### Submission Process

View File

@ -1,6 +1,5 @@
import os
from typing import Any
from urllib.parse import quote_plus
import httpx
from pydantic import SecretStr

View File

@ -125,7 +125,11 @@ class Memory:
def _on_workspace_context_recall(
self, event: RecallAction
) -> RecallObservation | None:
"""Add repository and runtime information to the stream as a RecallObservation."""
"""Add repository and runtime information to the stream as a RecallObservation.
This method collects information from all available repo microagents and concatenates their contents.
Multiple repo microagents are supported, and their contents will be concatenated with newlines between them.
"""
# Create WORKSPACE_CONTEXT info:
# - repository_info
@ -135,11 +139,8 @@ class Memory:
# Collect raw repository instructions
repo_instructions = ''
assert (
len(self.repo_microagents) <= 1
), f'Expecting at most one repo microagent, but found {len(self.repo_microagents)}: {self.repo_microagents.keys()}'
# Retrieve the context of repo instructions
# Retrieve the context of repo instructions from all repo microagents
for microagent in self.repo_microagents.values():
if repo_instructions:
repo_instructions += '\n\n'

View File

@ -46,8 +46,12 @@ class BaseMicroAgent(BaseModel):
file_io = io.StringIO(file_content)
loaded = frontmatter.load(file_io)
content = loaded.content
# Handle case where there's no frontmatter or empty frontmatter
metadata_dict = loaded.metadata or {}
try:
metadata = MicroAgentMetadata(**loaded.metadata)
metadata = MicroAgentMetadata(**metadata_dict)
except Exception as e:
raise MicroAgentValidationError(f'Error loading metadata: {e}') from e

View File

@ -15,7 +15,7 @@ class MicroAgentMetadata(BaseModel):
"""Metadata for all microagents."""
name: str = 'default'
type: MicroAgentType = Field(default=MicroAgentType.KNOWLEDGE)
type: MicroAgentType = Field(default=MicroAgentType.REPO_KNOWLEDGE)
version: str = Field(default='1.0.0')
agent: str = Field(default='CodeActAgent')
triggers: list[str] = [] # optional, only exists for knowledge microagents

View File

@ -1,9 +1,8 @@
import os
from functools import lru_cache
from typing import Callable
from uuid import UUID
import os
import docker
import httpx
import tenacity
@ -89,9 +88,13 @@ class DockerRuntime(ActionExecutionClient):
self._vscode_port = -1
self._app_ports: list[int] = []
if os.environ.get("DOCKER_HOST_ADDR"):
logger.info(f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url')
self.config.sandbox.local_runtime_url = f'http://{os.environ["DOCKER_HOST_ADDR"]}'
if os.environ.get('DOCKER_HOST_ADDR'):
logger.info(
f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url'
)
self.config.sandbox.local_runtime_url = (
f'http://{os.environ["DOCKER_HOST_ADDR"]}'
)
self.docker_client: docker.DockerClient = self._init_docker_client()
self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'

View File

@ -20,9 +20,6 @@ from openhands.server.auth import get_access_token, get_provider_tokens
app = APIRouter(prefix='/api/user')
from pydantic import BaseModel
@app.get('/repositories', response_model=list[Repository])
async def get_user_repositories(
sort: str = 'pushed',
@ -30,14 +27,12 @@ async def get_user_repositories(
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
access_token: SecretStr | None = Depends(get_access_token),
):
if provider_tokens:
client = ProviderHandler(
provider_tokens=provider_tokens, external_auth_token=access_token
)
try:
repos: list[Repository] = await client.get_repositories(
sort, installation_id
)
@ -135,7 +130,6 @@ async def search_repositories(
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
access_token: SecretStr | None = Depends(get_access_token),
):
if provider_tokens:
client = ProviderHandler(
provider_tokens=provider_tokens, external_auth_token=access_token

View File

@ -324,9 +324,9 @@ class AgentSession:
return False
if selected_repository and git_provider_tokens:
await self.runtime.clone_repo(git_provider_tokens,
selected_repository,
selected_branch)
await self.runtime.clone_repo(
git_provider_tokens, selected_repository, selected_branch
)
await call_sync_from_async(self.runtime.maybe_run_setup_script)
self.logger.debug(
@ -408,12 +408,15 @@ class AgentSession:
# loads microagents from repo/.openhands/microagents
microagents: list[BaseMicroAgent] = await call_sync_from_async(
self.runtime.get_microagents_from_selected_repo, selected_repository.full_name if selected_repository else None
self.runtime.get_microagents_from_selected_repo,
selected_repository.full_name if selected_repository else None,
)
memory.load_user_workspace_microagents(microagents)
if selected_repository and repo_directory:
memory.set_repository_info(selected_repository.full_name, repo_directory)
memory.set_repository_info(
selected_repository.full_name, repo_directory
)
return memory
def _maybe_restore_state(self) -> State | None:

4
poetry.lock generated
View File

@ -2661,7 +2661,7 @@ grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_versi
grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
proto-plus = [
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
{version = ">=1.22.3,<2.0.0dev"},
]
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
requests = ">=2.18.0,<3.0.0.dev0"
@ -2874,7 +2874,7 @@ google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
grpc-google-iam-v1 = ">=0.14.0,<1.0.0dev"
proto-plus = [
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0dev"},
{version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
]
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"

View File

@ -99,6 +99,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
@ -127,6 +128,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"

View File

@ -319,3 +319,88 @@ async def test_memory_with_agent_microagents():
assert observation.microagent_knowledge[0].name == 'flarglebargle'
assert observation.microagent_knowledge[0].trigger == 'flarglebargle'
assert 'magic word' in observation.microagent_knowledge[0].content
def test_memory_multiple_repo_microagents(prompt_dir):
"""Test that Memory loads and concatenates multiple repo microagents correctly."""
# Create an in-memory file store and real event stream
file_store = InMemoryFileStore()
event_stream = EventStream(sid='test-session', file_store=file_store)
# Create two test repo microagents
repo_microagent1_name = 'test_repo_microagent1'
repo_microagent1_content = """---
REPOSITORY INSTRUCTIONS: This is the first test repository.
"""
repo_microagent2_name = 'test_repo_microagent2'
repo_microagent2_content = """---
name: test_repo2
type: repo
agent: CodeActAgent
---
REPOSITORY INSTRUCTIONS: This is the second test repository.
"""
# Create temporary repo microagent files
os.makedirs(os.path.join(prompt_dir, 'micro'), exist_ok=True)
with open(
os.path.join(prompt_dir, 'micro', f'{repo_microagent1_name}.md'), 'w'
) as f:
f.write(repo_microagent1_content)
with open(
os.path.join(prompt_dir, 'micro', f'{repo_microagent2_name}.md'), 'w'
) as f:
f.write(repo_microagent2_content)
# Patch the global microagents directory to use our test directory
test_microagents_dir = os.path.join(prompt_dir, 'micro')
with patch('openhands.memory.memory.GLOBAL_MICROAGENTS_DIR', test_microagents_dir):
# Initialize Memory
memory = Memory(
event_stream=event_stream,
sid='test-session',
)
# Set repository info
memory.set_repository_info('owner/repo', '/workspace/repo')
# Create and add the first user message
user_message = MessageAction(content='First user message')
user_message._source = EventSource.USER # type: ignore[attr-defined]
event_stream.add_event(user_message, EventSource.USER)
# Create and add the microagent action
microagent_action = RecallAction(
query='First user message', recall_type=RecallType.WORKSPACE_CONTEXT
)
microagent_action._source = EventSource.USER # type: ignore[attr-defined]
event_stream.add_event(microagent_action, EventSource.USER)
# Give it a little time to process
time.sleep(0.3)
# Get all events from the stream
events = list(event_stream.get_events())
# Find the RecallObservation event
microagent_obs_events = [
event for event in events if isinstance(event, RecallObservation)
]
# We should have one RecallObservation
assert len(microagent_obs_events) > 0
# Get the first RecallObservation
observation = microagent_obs_events[0]
assert observation.recall_type == RecallType.WORKSPACE_CONTEXT
assert observation.repo_name == 'owner/repo'
assert observation.repo_directory == '/workspace/repo'
assert 'This is the first test repository' in observation.repo_instructions
assert 'This is the second test repository' in observation.repo_instructions
# Clean up
os.remove(os.path.join(prompt_dir, 'micro', f'{repo_microagent1_name}.md'))
os.remove(os.path.join(prompt_dir, 'micro', f'{repo_microagent2_name}.md'))

View File

@ -0,0 +1,94 @@
from pathlib import Path
from openhands.microagent.microagent import BaseMicroAgent, RepoMicroAgent
from openhands.microagent.types import MicroAgentType
def test_load_markdown_without_frontmatter():
"""Test loading a markdown file without frontmatter."""
content = '# Test Content\nThis is a test markdown file without frontmatter.'
path = Path('test.md')
# Load the agent from content
agent = BaseMicroAgent.load(path, content)
# Verify it's loaded as a repo agent with default values
assert isinstance(agent, RepoMicroAgent)
assert agent.name == 'default'
assert agent.content == content
assert agent.type == MicroAgentType.REPO_KNOWLEDGE
assert agent.metadata.agent == 'CodeActAgent'
assert agent.metadata.version == '1.0.0'
def test_load_markdown_with_empty_frontmatter():
"""Test loading a markdown file with empty frontmatter."""
content = (
'---\n---\n# Test Content\nThis is a test markdown file with empty frontmatter.'
)
path = Path('test.md')
# Load the agent from content
agent = BaseMicroAgent.load(path, content)
# Verify it's loaded as a repo agent with default values
assert isinstance(agent, RepoMicroAgent)
assert agent.name == 'default'
assert (
agent.content
== '# Test Content\nThis is a test markdown file with empty frontmatter.'
)
assert agent.type == MicroAgentType.REPO_KNOWLEDGE
assert agent.metadata.agent == 'CodeActAgent'
assert agent.metadata.version == '1.0.0'
def test_load_markdown_with_partial_frontmatter():
"""Test loading a markdown file with partial frontmatter."""
content = """---
name: custom_name
---
# Test Content
This is a test markdown file with partial frontmatter."""
path = Path('test.md')
# Load the agent from content
agent = BaseMicroAgent.load(path, content)
# Verify it uses provided name but default values for other fields
assert isinstance(agent, RepoMicroAgent)
assert agent.name == 'custom_name'
assert (
agent.content
== '# Test Content\nThis is a test markdown file with partial frontmatter.'
)
assert agent.type == MicroAgentType.REPO_KNOWLEDGE
assert agent.metadata.agent == 'CodeActAgent'
assert agent.metadata.version == '1.0.0'
def test_load_markdown_with_full_frontmatter():
"""Test loading a markdown file with full frontmatter still works."""
content = """---
name: test_agent
type: repo
agent: CustomAgent
version: 2.0.0
---
# Test Content
This is a test markdown file with full frontmatter."""
path = Path('test.md')
# Load the agent from content
agent = BaseMicroAgent.load(path, content)
# Verify all provided values are used
assert isinstance(agent, RepoMicroAgent)
assert agent.name == 'test_agent'
assert (
agent.content
== '# Test Content\nThis is a test markdown file with full frontmatter.'
)
assert agent.type == MicroAgentType.REPO_KNOWLEDGE
assert agent.metadata.agent == 'CustomAgent'
assert agent.metadata.version == '2.0.0'