From c76a659cdee31fa8e1e864deac2f22d087c14df5 Mon Sep 17 00:00:00 2001 From: Aditya Bharat Soni Date: Tue, 4 Mar 2025 16:28:33 -0500 Subject: [PATCH] Condenser for Browser Output Observations (#6578) Signed-off-by: dependabot[bot] Co-authored-by: Rick van Hattem Co-authored-by: Engel Nyst Co-authored-by: Boxuan Li Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> Co-authored-by: Xingyao Wang Co-authored-by: tofarr Co-authored-by: Xingyao Wang Co-authored-by: Rohit Malhotra Co-authored-by: openhands Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Akim Tsvigun <36672861+Aktsvigun@users.noreply.github.com> Co-authored-by: Akim Tsvigun Co-authored-by: mamoodi Co-authored-by: OpenHands Co-authored-by: Calvin Smith Co-authored-by: Calvin Smith Co-authored-by: Graham Neubig Co-authored-by: Peter Dave Hello Co-authored-by: Ray Myers --- openhands/core/config/condenser_config.py | 12 +++++ openhands/memory/condenser/impl/__init__.py | 4 ++ .../impl/browser_output_condenser.py | 48 +++++++++++++++++++ tests/unit/test_condenser.py | 43 +++++++++++++++++ 4 files changed, 107 insertions(+) create mode 100644 openhands/memory/condenser/impl/browser_output_condenser.py diff --git a/openhands/core/config/condenser_config.py b/openhands/core/config/condenser_config.py index b2a3caccc0..e2ea3c4fa1 100644 --- a/openhands/core/config/condenser_config.py +++ b/openhands/core/config/condenser_config.py @@ -27,6 +27,17 @@ class ObservationMaskingCondenserConfig(BaseModel): model_config = {'extra': 'forbid'} +class BrowserOutputCondenserConfig(BaseModel): + """Configuration for the BrowserOutputCondenser.""" + + type: Literal['browser_output_masking'] = Field('browser_output_masking') + attention_window: int = Field( + default=1, + description='The number of most recent browser output observations that will not be masked.', + ge=1, + ) + + class RecentEventsCondenserConfig(BaseModel): """Configuration for RecentEventsCondenser.""" @@ -115,6 +126,7 @@ class LLMAttentionCondenserConfig(BaseModel): CondenserConfig = ( NoOpCondenserConfig | ObservationMaskingCondenserConfig + | BrowserOutputCondenserConfig | RecentEventsCondenserConfig | LLMSummarizingCondenserConfig | AmortizedForgettingCondenserConfig diff --git a/openhands/memory/condenser/impl/__init__.py b/openhands/memory/condenser/impl/__init__.py index cca01ac8e2..0a2150cc76 100644 --- a/openhands/memory/condenser/impl/__init__.py +++ b/openhands/memory/condenser/impl/__init__.py @@ -1,6 +1,9 @@ from openhands.memory.condenser.impl.amortized_forgetting_condenser import ( AmortizedForgettingCondenser, ) +from openhands.memory.condenser.impl.browser_output_condenser import ( + BrowserOutputCondenser, +) from openhands.memory.condenser.impl.llm_attention_condenser import ( ImportantEventSelection, LLMAttentionCondenser, @@ -23,5 +26,6 @@ __all__ = [ 'LLMSummarizingCondenser', 'NoOpCondenser', 'ObservationMaskingCondenser', + 'BrowserOutputCondenser', 'RecentEventsCondenser', ] diff --git a/openhands/memory/condenser/impl/browser_output_condenser.py b/openhands/memory/condenser/impl/browser_output_condenser.py new file mode 100644 index 0000000000..b0fd2e805d --- /dev/null +++ b/openhands/memory/condenser/impl/browser_output_condenser.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from openhands.core.config.condenser_config import BrowserOutputCondenserConfig +from openhands.events.event import Event +from openhands.events.observation import BrowserOutputObservation +from openhands.events.observation.agent import AgentCondensationObservation +from openhands.memory.condenser.condenser import Condenser + + +class BrowserOutputCondenser(Condenser): + """A condenser that masks the observations from browser outputs outside of a recent attention window. + + The intent here is to mask just the browser outputs and leave everything else untouched. This is important because currently we provide screenshots and accessibility trees as input to the model for browser observations. These are really large and consume a lot of tokens without any benefits in performance. So we want to mask all such observations from all previous timesteps, and leave only the most recent one in context. + """ + + def __init__(self, attention_window: int = 1): + self.attention_window = attention_window + super().__init__() + + def condense(self, events: list[Event]) -> list[Event]: + """Replace the content of browser observations outside of the attention window with a placeholder.""" + results: list[Event] = [] + cnt: int = 0 + for event in reversed(events): + if ( + isinstance(event, BrowserOutputObservation) + and cnt >= self.attention_window + ): + results.append( + AgentCondensationObservation( + f'Current URL: {event.url}\nContent Omitted' + ) + ) + else: + results.append(event) + if isinstance(event, BrowserOutputObservation): + cnt += 1 + + return list(reversed(results)) + + @classmethod + def from_config( + cls, config: BrowserOutputCondenserConfig + ) -> BrowserOutputCondenser: + return BrowserOutputCondenser(**config.model_dump(exclude=['type'])) + + +BrowserOutputCondenser.register_config(BrowserOutputCondenserConfig) diff --git a/tests/unit/test_condenser.py b/tests/unit/test_condenser.py index 99561ae63c..83c32d4c0d 100644 --- a/tests/unit/test_condenser.py +++ b/tests/unit/test_condenser.py @@ -7,6 +7,7 @@ import pytest from openhands.controller.state.state import State from openhands.core.config.condenser_config import ( AmortizedForgettingCondenserConfig, + BrowserOutputCondenserConfig, LLMAttentionCondenserConfig, LLMSummarizingCondenserConfig, NoOpCondenserConfig, @@ -15,6 +16,7 @@ from openhands.core.config.condenser_config import ( ) from openhands.core.config.llm_config import LLMConfig from openhands.events.event import Event, EventSource +from openhands.events.observation import BrowserOutputObservation from openhands.events.observation.agent import AgentCondensationObservation from openhands.events.observation.observation import Observation from openhands.llm import LLM @@ -22,6 +24,7 @@ from openhands.memory.condenser import Condenser from openhands.memory.condenser.condenser import RollingCondenser from openhands.memory.condenser.impl import ( AmortizedForgettingCondenser, + BrowserOutputCondenser, ImportantEventSelection, LLMAttentionCondenser, LLMSummarizingCondenser, @@ -154,6 +157,46 @@ def test_observation_masking_condenser_respects_attention_window(mock_state): assert event == condensed_event +def test_browser_output_condenser_from_config(): + """Test that BrowserOutputCondenser objects can be made from config.""" + attention_window = 5 + config = BrowserOutputCondenserConfig(attention_window=attention_window) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, BrowserOutputCondenser) + assert condenser.attention_window == attention_window + + +def test_browser_output_condenser_respects_attention_window(mock_state): + """Test that BrowserOutputCondenser only masks events outside the attention window.""" + attention_window = 3 + condenser = BrowserOutputCondenser(attention_window=attention_window) + + events = [ + BrowserOutputObservation('Observation 1', url='', trigger_by_action=''), + BrowserOutputObservation('Observation 2', url='', trigger_by_action=''), + create_test_event('Event 3'), + create_test_event('Event 4'), + BrowserOutputObservation('Observation 3', url='', trigger_by_action=''), + BrowserOutputObservation('Observation 4', url='', trigger_by_action=''), + ] + + mock_state.history = events + result = condenser.condensed_history(mock_state) + + assert len(result) == len(events) + cnt = 4 + for event, condensed_event in zip(events, result): + if isinstance(event, BrowserOutputObservation): + if cnt > attention_window: + assert 'Content Omitted' in str(condensed_event) + else: + assert event == condensed_event + cnt -= 1 + else: + assert event == condensed_event + + def test_recent_events_condenser_from_config(): """Test that RecentEventsCondenser objects can be made from config.""" max_events = 5