(Hotfix): Fix eval pipeline (#10466)

This commit is contained in:
Rohit Malhotra
2025-08-18 12:51:51 -04:00
committed by GitHub
parent 3fea7fd2fc
commit 61d90c31eb
4 changed files with 4 additions and 17 deletions

View File

@@ -32,7 +32,6 @@ from openhands.events.action.action import Action
from openhands.events.event import Event
from openhands.events.observation import AgentStateChangedObservation
from openhands.io import read_input, read_task
from openhands.llm.llm_registry import LLMRegistry
from openhands.mcp import add_mcp_tools_to_agent
from openhands.memory.memory import Memory
from openhands.runtime.base import Runtime
@@ -59,7 +58,6 @@ async def run_controller(
headless_mode: bool = True,
memory: Memory | None = None,
conversation_instructions: str | None = None,
llm_registry: LLMRegistry | None = None,
) -> State | None:
"""Main coroutine to run the agent controller with task input flexibility.

View File

@@ -35,7 +35,7 @@ from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync
def create_runtime(
config: OpenHandsConfig,
llm_registry: LLMRegistry,
llm_registry: LLMRegistry | None = None,
sid: str | None = None,
headless_mode: bool = True,
agent: Agent | None = None,
@@ -84,7 +84,7 @@ def create_runtime(
sid=session_id,
plugins=agent_cls.sandbox_plugins,
headless_mode=headless_mode,
llm_registry=llm_registry,
llm_registry=llm_registry or LLMRegistry(config),
git_provider_tokens=git_provider_tokens,
)

View File

@@ -345,7 +345,6 @@ async def test_run_controller_with_fatal_error(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
)
print(f'state: {state}')
events = list(test_event_stream.get_events())
@@ -413,7 +412,6 @@ async def test_run_controller_stop_with_stuck(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
)
events = list(test_event_stream.get_events())
print(f'state: {state}')
@@ -938,7 +936,6 @@ async def test_run_controller_max_iterations_has_metrics(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
)
state.metrics = mock_agent.llm.metrics
@@ -1094,7 +1091,6 @@ async def test_context_window_exceeded_error_handling(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
),
timeout=10,
)
@@ -1248,7 +1244,6 @@ async def test_run_controller_with_context_window_exceeded_with_truncation(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
),
timeout=10,
)
@@ -1332,7 +1327,6 @@ async def test_run_controller_with_context_window_exceeded_without_truncation(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=mock_memory,
llm_registry=llm_registry,
),
timeout=10,
)
@@ -1411,7 +1405,6 @@ async def test_run_controller_with_memory_error(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=memory,
llm_registry=llm_registry,
)
assert state.iteration_flag.current_value == 0

View File

@@ -98,9 +98,7 @@ def mock_agent():
@pytest.mark.asyncio
async def test_memory_on_event_exception_handling(
memory, event_stream, mock_agent, mock_llm_registry
):
async def test_memory_on_event_exception_handling(memory, event_stream, mock_agent):
"""Test that exceptions in Memory.on_event are properly handled via status callback."""
# Create a mock runtime
runtime = MagicMock(spec=ActionExecutionClient)
@@ -120,7 +118,6 @@ async def test_memory_on_event_exception_handling(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=memory,
llm_registry=mock_llm_registry,
)
# Verify that the controller's last error was set
@@ -131,7 +128,7 @@ async def test_memory_on_event_exception_handling(
@pytest.mark.asyncio
async def test_memory_on_workspace_context_recall_exception_handling(
memory, event_stream, mock_agent, mock_llm_registry
memory, event_stream, mock_agent
):
"""Test that exceptions in Memory._on_workspace_context_recall are properly handled via status callback."""
# Create a mock runtime
@@ -154,7 +151,6 @@ async def test_memory_on_workspace_context_recall_exception_handling(
sid='test',
fake_user_response_fn=lambda _: 'repeat',
memory=memory,
llm_registry=mock_llm_registry,
)
# Verify that the controller's last error was set