(Hotfix): Fix eval pipeline (#10466)

2026-03-22 13:47:19 +08:00 · 2025-08-18 12:51:51 -04:00
parent 3fea7fd2fc
commit 61d90c31eb
4 changed files with 4 additions and 17 deletions
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -32,7 +32,6 @@ from openhands.events.action.action import Action
 from openhands.events.event import Event
 from openhands.events.observation import AgentStateChangedObservation
 from openhands.io import read_input, read_task
-from openhands.llm.llm_registry import LLMRegistry
 from openhands.mcp import add_mcp_tools_to_agent
 from openhands.memory.memory import Memory
 from openhands.runtime.base import Runtime
@@ -59,7 +58,6 @@ async def run_controller(
    headless_mode: bool = True,
    memory: Memory | None = None,
    conversation_instructions: str | None = None,
-    llm_registry: LLMRegistry | None = None,
 ) -> State | None:
    """Main coroutine to run the agent controller with task input flexibility.

--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -35,7 +35,7 @@ from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync

 def create_runtime(
    config: OpenHandsConfig,
-    llm_registry: LLMRegistry,
+    llm_registry: LLMRegistry | None = None,
    sid: str | None = None,
    headless_mode: bool = True,
    agent: Agent | None = None,
@@ -84,7 +84,7 @@ def create_runtime(
        sid=session_id,
        plugins=agent_cls.sandbox_plugins,
        headless_mode=headless_mode,
-        llm_registry=llm_registry,
+        llm_registry=llm_registry or LLMRegistry(config),
        git_provider_tokens=git_provider_tokens,
    )

--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -345,7 +345,6 @@ async def test_run_controller_with_fatal_error(
            sid='test',
            fake_user_response_fn=lambda _: 'repeat',
            memory=mock_memory,
-            llm_registry=llm_registry,
        )
    print(f'state: {state}')
    events = list(test_event_stream.get_events())
@@ -413,7 +412,6 @@ async def test_run_controller_stop_with_stuck(
            sid='test',
            fake_user_response_fn=lambda _: 'repeat',
            memory=mock_memory,
-            llm_registry=llm_registry,
        )
    events = list(test_event_stream.get_events())
    print(f'state: {state}')
@@ -938,7 +936,6 @@ async def test_run_controller_max_iterations_has_metrics(
            sid='test',
            fake_user_response_fn=lambda _: 'repeat',
            memory=mock_memory,
-            llm_registry=llm_registry,
        )

    state.metrics = mock_agent.llm.metrics
@@ -1094,7 +1091,6 @@ async def test_context_window_exceeded_error_handling(
                sid='test',
                fake_user_response_fn=lambda _: 'repeat',
                memory=mock_memory,
-                llm_registry=llm_registry,
            ),
            timeout=10,
        )
@@ -1248,7 +1244,6 @@ async def test_run_controller_with_context_window_exceeded_with_truncation(
                    sid='test',
                    fake_user_response_fn=lambda _: 'repeat',
                    memory=mock_memory,
-                    llm_registry=llm_registry,
                ),
                timeout=10,
            )
@@ -1332,7 +1327,6 @@ async def test_run_controller_with_context_window_exceeded_without_truncation(
                    sid='test',
                    fake_user_response_fn=lambda _: 'repeat',
                    memory=mock_memory,
-                    llm_registry=llm_registry,
                ),
                timeout=10,
            )
@@ -1411,7 +1405,6 @@ async def test_run_controller_with_memory_error(
                sid='test',
                fake_user_response_fn=lambda _: 'repeat',
                memory=memory,
-                llm_registry=llm_registry,
            )

    assert state.iteration_flag.current_value == 0
--- a/tests/unit/test_memory.py
+++ b/tests/unit/test_memory.py
@@ -98,9 +98,7 @@ def mock_agent():


@pytest.mark.asyncio
-async def test_memory_on_event_exception_handling(
-    memory, event_stream, mock_agent, mock_llm_registry
-):
+async def test_memory_on_event_exception_handling(memory, event_stream, mock_agent):
    """Test that exceptions in Memory.on_event are properly handled via status callback."""
    # Create a mock runtime
    runtime = MagicMock(spec=ActionExecutionClient)
@@ -120,7 +118,6 @@ async def test_memory_on_event_exception_handling(
            sid='test',
            fake_user_response_fn=lambda _: 'repeat',
            memory=memory,
-            llm_registry=mock_llm_registry,
        )

        # Verify that the controller's last error was set
@@ -131,7 +128,7 @@ async def test_memory_on_event_exception_handling(

@pytest.mark.asyncio
 async def test_memory_on_workspace_context_recall_exception_handling(
-    memory, event_stream, mock_agent, mock_llm_registry
+    memory, event_stream, mock_agent
 ):
    """Test that exceptions in Memory._on_workspace_context_recall are properly handled via status callback."""
    # Create a mock runtime
@@ -154,7 +151,6 @@ async def test_memory_on_workspace_context_recall_exception_handling(
            sid='test',
            fake_user_response_fn=lambda _: 'repeat',
            memory=memory,
-            llm_registry=mock_llm_registry,
        )

        # Verify that the controller's last error was set