diff --git a/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py b/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py index 0a154190d7..f27ea74e9a 100644 --- a/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py +++ b/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py @@ -202,6 +202,7 @@ Note: tabs = '' last_obs = None last_action = None + set_of_marks = None # Initialize set_of_marks to None if len(state.history) == 1: # for visualwebarena, webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env @@ -217,6 +218,9 @@ Note: # agent has responded, task finished. return AgentFinishAction(outputs={'content': event.content}) elif isinstance(event, Observation): + # Only process BrowserOutputObservation and skip other observation types + if not isinstance(event, BrowserOutputObservation): + continue last_obs = event if len(prev_actions) >= 1: # ignore noop() diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py index b308fef142..3c43116e50 100644 --- a/openhands/memory/conversation_memory.py +++ b/openhands/memory/conversation_memory.py @@ -52,7 +52,6 @@ class ConversationMemory: initial_messages: list[Message], max_message_chars: int | None = None, vision_is_active: bool = False, - enable_som_visual_browsing: bool = False, ) -> list[Message]: """Process state history into a list of messages for the LLM. @@ -64,11 +63,13 @@ class ConversationMemory: max_message_chars: The maximum number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated. vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included. - enable_som_visual_browsing: Whether to enable visual browsing for the SOM model. """ events = condensed_history + # log visual browsing status + logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}') + # Process special events first (system prompts, etc.) messages = initial_messages