From 6cfaad86ea57fbe5c5e3e9651d904e185f0b754c Mon Sep 17 00:00:00 2001 From: OpenHands Date: Thu, 3 Apr 2025 15:04:51 -0400 Subject: [PATCH] Fix issue #7205: [Feature]: Allow repo microagent Markdown Files Without Required Header (#7655) Co-authored-by: Engel Nyst --- microagents/README.md | 3 +- .../integrations/gitlab/gitlab_service.py | 1 - openhands/memory/memory.py | 11 ++- openhands/microagent/microagent.py | 6 +- openhands/microagent/types.py | 2 +- .../runtime/impl/docker/docker_runtime.py | 13 ++- openhands/server/routes/git.py | 6 -- openhands/server/session/agent_session.py | 13 ++- poetry.lock | 4 +- pyproject.toml | 2 + tests/unit/test_memory.py | 85 +++++++++++++++++ tests/unit/test_microagent_no_header.py | 94 +++++++++++++++++++ 12 files changed, 213 insertions(+), 27 deletions(-) create mode 100644 tests/unit/test_microagent_no_header.py diff --git a/microagents/README.md b/microagents/README.md index 044c15539d..a130ec582c 100644 --- a/microagents/README.md +++ b/microagents/README.md @@ -51,7 +51,7 @@ When OpenHands works with a repository, it: ## Types of MicroAgents -All microagents use markdown files with YAML frontmatter. +Most microagents use markdown files with YAML frontmatter. For repository agents (repo.md), the frontmatter is optional - if not provided, the file will be loaded with default settings as a repository agent. ### 1. Knowledge Agents @@ -147,6 +147,7 @@ You can see an example of a task-based agent in [OpenHands's pull request updati - Specify testing and build procedures - List environment requirements - Maintain up-to-date team practices + - YAML frontmatter is optional - files without frontmatter will be loaded with default settings ### Submission Process diff --git a/openhands/integrations/gitlab/gitlab_service.py b/openhands/integrations/gitlab/gitlab_service.py index e74dc3f0ca..bfb10f74f3 100644 --- a/openhands/integrations/gitlab/gitlab_service.py +++ b/openhands/integrations/gitlab/gitlab_service.py @@ -1,6 +1,5 @@ import os from typing import Any -from urllib.parse import quote_plus import httpx from pydantic import SecretStr diff --git a/openhands/memory/memory.py b/openhands/memory/memory.py index 948aefede3..4ddf66f043 100644 --- a/openhands/memory/memory.py +++ b/openhands/memory/memory.py @@ -125,7 +125,11 @@ class Memory: def _on_workspace_context_recall( self, event: RecallAction ) -> RecallObservation | None: - """Add repository and runtime information to the stream as a RecallObservation.""" + """Add repository and runtime information to the stream as a RecallObservation. + + This method collects information from all available repo microagents and concatenates their contents. + Multiple repo microagents are supported, and their contents will be concatenated with newlines between them. + """ # Create WORKSPACE_CONTEXT info: # - repository_info @@ -135,11 +139,8 @@ class Memory: # Collect raw repository instructions repo_instructions = '' - assert ( - len(self.repo_microagents) <= 1 - ), f'Expecting at most one repo microagent, but found {len(self.repo_microagents)}: {self.repo_microagents.keys()}' - # Retrieve the context of repo instructions + # Retrieve the context of repo instructions from all repo microagents for microagent in self.repo_microagents.values(): if repo_instructions: repo_instructions += '\n\n' diff --git a/openhands/microagent/microagent.py b/openhands/microagent/microagent.py index a2cd929307..ff846c5013 100644 --- a/openhands/microagent/microagent.py +++ b/openhands/microagent/microagent.py @@ -46,8 +46,12 @@ class BaseMicroAgent(BaseModel): file_io = io.StringIO(file_content) loaded = frontmatter.load(file_io) content = loaded.content + + # Handle case where there's no frontmatter or empty frontmatter + metadata_dict = loaded.metadata or {} + try: - metadata = MicroAgentMetadata(**loaded.metadata) + metadata = MicroAgentMetadata(**metadata_dict) except Exception as e: raise MicroAgentValidationError(f'Error loading metadata: {e}') from e diff --git a/openhands/microagent/types.py b/openhands/microagent/types.py index 0962553d93..63cce23ba1 100644 --- a/openhands/microagent/types.py +++ b/openhands/microagent/types.py @@ -15,7 +15,7 @@ class MicroAgentMetadata(BaseModel): """Metadata for all microagents.""" name: str = 'default' - type: MicroAgentType = Field(default=MicroAgentType.KNOWLEDGE) + type: MicroAgentType = Field(default=MicroAgentType.REPO_KNOWLEDGE) version: str = Field(default='1.0.0') agent: str = Field(default='CodeActAgent') triggers: list[str] = [] # optional, only exists for knowledge microagents diff --git a/openhands/runtime/impl/docker/docker_runtime.py b/openhands/runtime/impl/docker/docker_runtime.py index 7a02d7520a..9cbe319891 100644 --- a/openhands/runtime/impl/docker/docker_runtime.py +++ b/openhands/runtime/impl/docker/docker_runtime.py @@ -1,9 +1,8 @@ +import os from functools import lru_cache from typing import Callable from uuid import UUID -import os - import docker import httpx import tenacity @@ -89,9 +88,13 @@ class DockerRuntime(ActionExecutionClient): self._vscode_port = -1 self._app_ports: list[int] = [] - if os.environ.get("DOCKER_HOST_ADDR"): - logger.info(f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url') - self.config.sandbox.local_runtime_url = f'http://{os.environ["DOCKER_HOST_ADDR"]}' + if os.environ.get('DOCKER_HOST_ADDR'): + logger.info( + f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url' + ) + self.config.sandbox.local_runtime_url = ( + f'http://{os.environ["DOCKER_HOST_ADDR"]}' + ) self.docker_client: docker.DockerClient = self._init_docker_client() self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}' diff --git a/openhands/server/routes/git.py b/openhands/server/routes/git.py index 547130dfbe..1f9f6c1c7a 100644 --- a/openhands/server/routes/git.py +++ b/openhands/server/routes/git.py @@ -20,9 +20,6 @@ from openhands.server.auth import get_access_token, get_provider_tokens app = APIRouter(prefix='/api/user') -from pydantic import BaseModel - - @app.get('/repositories', response_model=list[Repository]) async def get_user_repositories( sort: str = 'pushed', @@ -30,14 +27,12 @@ async def get_user_repositories( provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens), access_token: SecretStr | None = Depends(get_access_token), ): - if provider_tokens: client = ProviderHandler( provider_tokens=provider_tokens, external_auth_token=access_token ) try: - repos: list[Repository] = await client.get_repositories( sort, installation_id ) @@ -135,7 +130,6 @@ async def search_repositories( provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens), access_token: SecretStr | None = Depends(get_access_token), ): - if provider_tokens: client = ProviderHandler( provider_tokens=provider_tokens, external_auth_token=access_token diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index 220f74054b..c7343b9d3d 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -324,9 +324,9 @@ class AgentSession: return False if selected_repository and git_provider_tokens: - await self.runtime.clone_repo(git_provider_tokens, - selected_repository, - selected_branch) + await self.runtime.clone_repo( + git_provider_tokens, selected_repository, selected_branch + ) await call_sync_from_async(self.runtime.maybe_run_setup_script) self.logger.debug( @@ -408,12 +408,15 @@ class AgentSession: # loads microagents from repo/.openhands/microagents microagents: list[BaseMicroAgent] = await call_sync_from_async( - self.runtime.get_microagents_from_selected_repo, selected_repository.full_name if selected_repository else None + self.runtime.get_microagents_from_selected_repo, + selected_repository.full_name if selected_repository else None, ) memory.load_user_workspace_microagents(microagents) if selected_repository and repo_directory: - memory.set_repository_info(selected_repository.full_name, repo_directory) + memory.set_repository_info( + selected_repository.full_name, repo_directory + ) return memory def _maybe_restore_state(self) -> State | None: diff --git a/poetry.lock b/poetry.lock index 06d618329c..e8db7816ac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2661,7 +2661,7 @@ grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_versi grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} proto-plus = [ {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, + {version = ">=1.22.3,<2.0.0dev"}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -2874,7 +2874,7 @@ google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.14.0,<1.0.0dev" proto-plus = [ {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, - {version = ">=1.22.3,<2.0.0dev"}, + {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" diff --git a/pyproject.toml b/pyproject.toml index e95916353f..d811a7ac20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ reportlab = "*" [tool.coverage.run] concurrency = ["gevent"] + [tool.poetry.group.runtime.dependencies] jupyterlab = "*" notebook = "*" @@ -127,6 +128,7 @@ ignore = ["D1"] [tool.ruff.lint.pydocstyle] convention = "google" + [tool.poetry.group.evaluation.dependencies] streamlit = "*" whatthepatch = "*" diff --git a/tests/unit/test_memory.py b/tests/unit/test_memory.py index 83e7dfc360..91fac6045f 100644 --- a/tests/unit/test_memory.py +++ b/tests/unit/test_memory.py @@ -319,3 +319,88 @@ async def test_memory_with_agent_microagents(): assert observation.microagent_knowledge[0].name == 'flarglebargle' assert observation.microagent_knowledge[0].trigger == 'flarglebargle' assert 'magic word' in observation.microagent_knowledge[0].content + + +def test_memory_multiple_repo_microagents(prompt_dir): + """Test that Memory loads and concatenates multiple repo microagents correctly.""" + # Create an in-memory file store and real event stream + file_store = InMemoryFileStore() + event_stream = EventStream(sid='test-session', file_store=file_store) + + # Create two test repo microagents + repo_microagent1_name = 'test_repo_microagent1' + repo_microagent1_content = """--- +REPOSITORY INSTRUCTIONS: This is the first test repository. +""" + + repo_microagent2_name = 'test_repo_microagent2' + repo_microagent2_content = """--- +name: test_repo2 +type: repo +agent: CodeActAgent +--- + +REPOSITORY INSTRUCTIONS: This is the second test repository. +""" + + # Create temporary repo microagent files + os.makedirs(os.path.join(prompt_dir, 'micro'), exist_ok=True) + with open( + os.path.join(prompt_dir, 'micro', f'{repo_microagent1_name}.md'), 'w' + ) as f: + f.write(repo_microagent1_content) + + with open( + os.path.join(prompt_dir, 'micro', f'{repo_microagent2_name}.md'), 'w' + ) as f: + f.write(repo_microagent2_content) + + # Patch the global microagents directory to use our test directory + test_microagents_dir = os.path.join(prompt_dir, 'micro') + with patch('openhands.memory.memory.GLOBAL_MICROAGENTS_DIR', test_microagents_dir): + # Initialize Memory + memory = Memory( + event_stream=event_stream, + sid='test-session', + ) + + # Set repository info + memory.set_repository_info('owner/repo', '/workspace/repo') + + # Create and add the first user message + user_message = MessageAction(content='First user message') + user_message._source = EventSource.USER # type: ignore[attr-defined] + event_stream.add_event(user_message, EventSource.USER) + + # Create and add the microagent action + microagent_action = RecallAction( + query='First user message', recall_type=RecallType.WORKSPACE_CONTEXT + ) + microagent_action._source = EventSource.USER # type: ignore[attr-defined] + event_stream.add_event(microagent_action, EventSource.USER) + + # Give it a little time to process + time.sleep(0.3) + + # Get all events from the stream + events = list(event_stream.get_events()) + + # Find the RecallObservation event + microagent_obs_events = [ + event for event in events if isinstance(event, RecallObservation) + ] + + # We should have one RecallObservation + assert len(microagent_obs_events) > 0 + + # Get the first RecallObservation + observation = microagent_obs_events[0] + assert observation.recall_type == RecallType.WORKSPACE_CONTEXT + assert observation.repo_name == 'owner/repo' + assert observation.repo_directory == '/workspace/repo' + assert 'This is the first test repository' in observation.repo_instructions + assert 'This is the second test repository' in observation.repo_instructions + + # Clean up + os.remove(os.path.join(prompt_dir, 'micro', f'{repo_microagent1_name}.md')) + os.remove(os.path.join(prompt_dir, 'micro', f'{repo_microagent2_name}.md')) diff --git a/tests/unit/test_microagent_no_header.py b/tests/unit/test_microagent_no_header.py new file mode 100644 index 0000000000..5d360a04dc --- /dev/null +++ b/tests/unit/test_microagent_no_header.py @@ -0,0 +1,94 @@ +from pathlib import Path + +from openhands.microagent.microagent import BaseMicroAgent, RepoMicroAgent +from openhands.microagent.types import MicroAgentType + + +def test_load_markdown_without_frontmatter(): + """Test loading a markdown file without frontmatter.""" + content = '# Test Content\nThis is a test markdown file without frontmatter.' + path = Path('test.md') + + # Load the agent from content + agent = BaseMicroAgent.load(path, content) + + # Verify it's loaded as a repo agent with default values + assert isinstance(agent, RepoMicroAgent) + assert agent.name == 'default' + assert agent.content == content + assert agent.type == MicroAgentType.REPO_KNOWLEDGE + assert agent.metadata.agent == 'CodeActAgent' + assert agent.metadata.version == '1.0.0' + + +def test_load_markdown_with_empty_frontmatter(): + """Test loading a markdown file with empty frontmatter.""" + content = ( + '---\n---\n# Test Content\nThis is a test markdown file with empty frontmatter.' + ) + path = Path('test.md') + + # Load the agent from content + agent = BaseMicroAgent.load(path, content) + + # Verify it's loaded as a repo agent with default values + assert isinstance(agent, RepoMicroAgent) + assert agent.name == 'default' + assert ( + agent.content + == '# Test Content\nThis is a test markdown file with empty frontmatter.' + ) + assert agent.type == MicroAgentType.REPO_KNOWLEDGE + assert agent.metadata.agent == 'CodeActAgent' + assert agent.metadata.version == '1.0.0' + + +def test_load_markdown_with_partial_frontmatter(): + """Test loading a markdown file with partial frontmatter.""" + content = """--- +name: custom_name +--- +# Test Content +This is a test markdown file with partial frontmatter.""" + path = Path('test.md') + + # Load the agent from content + agent = BaseMicroAgent.load(path, content) + + # Verify it uses provided name but default values for other fields + assert isinstance(agent, RepoMicroAgent) + assert agent.name == 'custom_name' + assert ( + agent.content + == '# Test Content\nThis is a test markdown file with partial frontmatter.' + ) + assert agent.type == MicroAgentType.REPO_KNOWLEDGE + assert agent.metadata.agent == 'CodeActAgent' + assert agent.metadata.version == '1.0.0' + + +def test_load_markdown_with_full_frontmatter(): + """Test loading a markdown file with full frontmatter still works.""" + content = """--- +name: test_agent +type: repo +agent: CustomAgent +version: 2.0.0 +--- +# Test Content +This is a test markdown file with full frontmatter.""" + path = Path('test.md') + + # Load the agent from content + agent = BaseMicroAgent.load(path, content) + + # Verify all provided values are used + assert isinstance(agent, RepoMicroAgent) + assert agent.name == 'test_agent' + assert ( + agent.content + == '# Test Content\nThis is a test markdown file with full frontmatter.' + ) + assert agent.type == MicroAgentType.REPO_KNOWLEDGE + assert agent.metadata.agent == 'CustomAgent' + assert agent.metadata.version == '2.0.0'