feature: Condenser Interface and Defaults (#5306)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
Calvin Smith 2025-01-07 13:36:30 -07:00 committed by GitHub
parent 561f308401
commit 6e4ff56934
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 2673 additions and 1795 deletions

View File

@ -15,6 +15,7 @@ from evaluation.utils.shared import (
EvalOutput,
assert_and_raise,
codeact_user_response,
get_metrics,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@ -148,6 +149,7 @@ def get_config(
codeact_enable_jupyter=False,
codeact_enable_browsing=RUN_WITH_BROWSING,
codeact_enable_llm_editor=False,
condenser=metadata.condenser_config,
)
config.set_agent_config(agent_config)
return config
@ -448,7 +450,7 @@ def process_instance(
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = state.metrics.get() if state.metrics else None
metrics = get_metrics(state)
# Save the output
output = EvalOutput(

View File

@ -17,6 +17,10 @@ from tqdm import tqdm
from openhands.controller.state.state import State
from openhands.core.config import LLMConfig
from openhands.core.config.condenser_config import (
CondenserConfig,
NoOpCondenserConfig,
)
from openhands.core.exceptions import (
AgentRuntimeBuildError,
AgentRuntimeDisconnectedError,
@ -33,6 +37,7 @@ from openhands.events.action.message import MessageAction
from openhands.events.event import Event
from openhands.events.serialization.event import event_to_dict
from openhands.events.utils import get_pairs_from_events
from openhands.memory.condenser import get_condensation_metadata
class EvalMetadata(BaseModel):
@ -45,11 +50,17 @@ class EvalMetadata(BaseModel):
dataset: str | None = None
data_split: str | None = None
details: dict[str, Any] | None = None
condenser_config: CondenserConfig | None = None
def model_dump(self, *args, **kwargs):
dumped_dict = super().model_dump(*args, **kwargs)
# avoid leaking sensitive information
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
if hasattr(self.condenser_config, 'llm_config'):
dumped_dict['condenser_config']['llm_config'] = (
self.condenser_config.llm_config.to_safe_dict()
)
return dumped_dict
def model_dump_json(self, *args, **kwargs):
@ -57,6 +68,11 @@ class EvalMetadata(BaseModel):
dumped_dict = json.loads(dumped)
# avoid leaking sensitive information
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
if hasattr(self.condenser_config, 'llm_config'):
dumped_dict['condenser_config']['llm_config'] = (
self.condenser_config.llm_config.to_safe_dict()
)
logger.debug(f'Dumped metadata: {dumped_dict}')
return json.dumps(dumped_dict)
@ -192,6 +208,7 @@ def make_metadata(
eval_output_dir: str,
data_split: str | None = None,
details: dict[str, Any] | None = None,
condenser_config: CondenserConfig | None = None,
) -> EvalMetadata:
model_name = llm_config.model.split('/')[-1]
model_path = model_name.replace(':', '_').replace('@', '-')
@ -222,6 +239,9 @@ def make_metadata(
dataset=dataset_name,
data_split=data_split,
details=details,
condenser_config=condenser_config
if condenser_config
else NoOpCondenserConfig(),
)
metadata_json = metadata.model_dump_json()
logger.info(f'Metadata: {metadata_json}')
@ -551,3 +571,10 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
return True
return False
def get_metrics(state: State) -> dict[str, Any]:
"""Extract metrics from the state."""
metrics = state.metrics.get() if state.metrics else {}
metrics['condenser'] = get_condensation_metadata(state)
return metrics

View File

@ -24,6 +24,7 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import (
AgentCondensationObservation,
AgentDelegateObservation,
BrowserOutputObservation,
CmdOutputObservation,
@ -36,6 +37,7 @@ from openhands.events.observation.error import ErrorObservation
from openhands.events.observation.observation import Observation
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.memory.condenser import Condenser
from openhands.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
@ -115,6 +117,9 @@ class CodeActAgent(Agent):
disabled_microagents=self.config.disabled_microagents,
)
self.condenser = Condenser.from_config(self.config.condenser)
logger.debug(f'Using condenser: {self.condenser}')
def get_action_message(
self,
action: Action,
@ -322,6 +327,9 @@ class CodeActAgent(Agent):
text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
text += '\n[Last action has been rejected by the user]'
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, AgentCondensationObservation):
text = truncate_content(obs.content, max_message_chars)
message = Message(role='user', content=[TextContent(text=text)])
else:
# If an observation message is not returned, it will cause an error
# when the LLM tries to return the next message
@ -442,7 +450,10 @@ class CodeActAgent(Agent):
pending_tool_call_action_messages: dict[str, Message] = {}
tool_call_id_to_message: dict[str, Message] = {}
events = list(state.history)
# Condense the events from the state.
events = self.condenser.condensed_history(state)
for event in events:
# create a regular message from an event
if isinstance(event, Action):

View File

@ -1,5 +1,6 @@
from dataclasses import dataclass, fields
from dataclasses import dataclass, field, fields
from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
from openhands.core.config.config_utils import get_field_info
@ -18,6 +19,7 @@ class AgentConfig:
llm_config: The name of the llm config to use. If specified, this will override global llm config.
use_microagents: Whether to use microagents at all. Default is True.
disabled_microagents: A list of microagents to disable. Default is None.
condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
"""
codeact_enable_browsing: bool = True
@ -29,6 +31,7 @@ class AgentConfig:
llm_config: str | None = None
use_microagents: bool = True
disabled_microagents: list[str] | None = None
condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig) # type: ignore
def defaults_to_dict(self) -> dict:
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

View File

@ -0,0 +1,90 @@
from typing import Literal
from pydantic import BaseModel, Field
from openhands.core.config.llm_config import LLMConfig
class NoOpCondenserConfig(BaseModel):
"""Configuration for NoOpCondenser."""
type: Literal['noop'] = Field('noop')
class ObservationMaskingCondenserConfig(BaseModel):
"""Configuration for ObservationMaskingCondenser."""
type: Literal['observation_masking'] = Field('observation_masking')
attention_window: int = Field(
default=10,
description='The number of most-recent events where observations will not be masked.',
ge=1,
)
class RecentEventsCondenserConfig(BaseModel):
"""Configuration for RecentEventsCondenser."""
type: Literal['recent'] = Field('recent')
keep_first: int = Field(
default=0,
description='The number of initial events to condense.',
ge=0,
)
max_events: int = Field(
default=10, description='Maximum number of events to keep.', ge=1
)
class LLMSummarizingCondenserConfig(BaseModel):
"""Configuration for LLMCondenser."""
type: Literal['llm'] = Field('llm')
llm_config: LLMConfig = Field(
..., description='Configuration for the LLM to use for condensing.'
)
class AmortizedForgettingCondenserConfig(BaseModel):
"""Configuration for AmortizedForgettingCondenser."""
type: Literal['amortized'] = Field('amortized')
max_size: int = Field(
default=100,
description='Maximum size of the condensed history before triggering forgetting.',
ge=2,
)
keep_first: int = Field(
default=0,
description='Number of initial events to always keep in history.',
ge=0,
)
class LLMAttentionCondenserConfig(BaseModel):
"""Configuration for LLMAttentionCondenser."""
type: Literal['llm_attention'] = Field('llm_attention')
llm_config: LLMConfig = Field(
..., description='Configuration for the LLM to use for attention.'
)
max_size: int = Field(
default=100,
description='Maximum size of the condensed history before triggering forgetting.',
ge=2,
)
keep_first: int = Field(
default=0,
description='Number of initial events to always keep in history.',
ge=0,
)
CondenserConfig = (
NoOpCondenserConfig
| ObservationMaskingCondenserConfig
| RecentEventsCondenserConfig
| LLMSummarizingCondenserConfig
| AmortizedForgettingCondenserConfig
| LLMAttentionCondenserConfig
)

View File

@ -44,5 +44,8 @@ class ObservationTypeSchema(BaseModel):
USER_REJECTED: str = Field(default='user_rejected')
CONDENSE: str = Field(default='condense')
"""Result of a condensation operation."""
ObservationType = ObservationTypeSchema()

View File

@ -1,4 +1,7 @@
from openhands.events.observation.agent import AgentStateChangedObservation
from openhands.events.observation.agent import (
AgentCondensationObservation,
AgentStateChangedObservation,
)
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.events.observation.commands import (
CmdOutputMetadata,
@ -32,4 +35,5 @@ __all__ = [
'AgentDelegateObservation',
'SuccessObservation',
'UserRejectObservation',
'AgentCondensationObservation',
]

View File

@ -14,3 +14,14 @@ class AgentStateChangedObservation(Observation):
@property
def message(self) -> str:
return ''
@dataclass
class AgentCondensationObservation(Observation):
"""The output of a condensation action."""
observation: str = ObservationType.CONDENSE
@property
def message(self) -> str:
return self.content

View File

@ -1,6 +1,9 @@
import copy
from openhands.events.observation.agent import AgentStateChangedObservation
from openhands.events.observation.agent import (
AgentCondensationObservation,
AgentStateChangedObservation,
)
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.events.observation.commands import (
CmdOutputMetadata,
@ -32,6 +35,7 @@ observations = (
ErrorObservation,
AgentStateChangedObservation,
UserRejectObservation,
AgentCondensationObservation,
)
OBSERVATION_TYPE_TO_CLASS = {

View File

@ -1,4 +1,4 @@
from openhands.memory.condenser import MemoryCondenser
from openhands.memory.condenser import Condenser
from openhands.memory.memory import LongTermMemory
__all__ = ['LongTermMemory', 'MemoryCondenser']
__all__ = ['LongTermMemory', 'Condenser']

View File

@ -1,24 +1,409 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from contextlib import contextmanager
from typing import Any
from litellm import supports_response_schema
from pydantic import BaseModel
from typing_extensions import override
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import (
AmortizedForgettingCondenserConfig,
CondenserConfig,
LLMAttentionCondenserConfig,
LLMSummarizingCondenserConfig,
NoOpCondenserConfig,
ObservationMaskingCondenserConfig,
RecentEventsCondenserConfig,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.event import Event
from openhands.events.observation import AgentCondensationObservation, Observation
from openhands.llm.llm import LLM
CONDENSER_METADATA_KEY = 'condenser_meta'
"""Key identifying where metadata is stored in a `State` object's `extra_data` field."""
class MemoryCondenser:
def condense(self, summarize_prompt: str, llm: LLM):
"""Attempts to condense the memory by using the llm
Parameters:
- llm (LLM): llm to be used for summarization
def get_condensation_metadata(state: State) -> list[dict[str, Any]]:
"""Utility function to retrieve a list of metadata batches from a `State`.
Args:
state: The state to retrieve metadata from.
Returns:
list[dict[str, Any]]: A list of metadata batches, each representing a condensation.
"""
if CONDENSER_METADATA_KEY in state.extra_data:
return state.extra_data[CONDENSER_METADATA_KEY]
return []
class Condenser(ABC):
"""Abstract condenser interface.
Condensers take a list of `Event` objects and reduce them into a potentially smaller list.
Agents can use condensers to reduce the amount of events they need to consider when deciding which action to take. To use a condenser, agents can call the `condensed_history` method on the current `State` being considered and use the results instead of the full history.
Example usage::
condenser = Condenser.from_config(condenser_config)
events = condenser.condensed_history(state)
"""
def __init__(self):
self._metadata_batch: dict[str, Any] = {}
def add_metadata(self, key: str, value: Any) -> None:
"""Add information to the current metadata batch.
Any key/value pairs added to the metadata batch will be recorded in the `State` at the end of the current condensation.
Args:
key: The key to store the metadata under.
value: The metadata to store.
"""
self._metadata_batch[key] = value
def write_metadata(self, state: State) -> None:
"""Write the current batch of metadata to the `State`.
Resets the current metadata batch: any metadata added after this call will be stored in a new batch and written to the `State` at the end of the next condensation.
"""
if CONDENSER_METADATA_KEY not in state.extra_data:
state.extra_data[CONDENSER_METADATA_KEY] = []
if self._metadata_batch:
state.extra_data[CONDENSER_METADATA_KEY].append(self._metadata_batch)
# Since the batch has been written, clear it for the next condensation
self._metadata_batch = {}
@contextmanager
def metadata_batch(self, state: State):
"""Context manager to ensure batched metadata is always written to the `State`."""
try:
yield
finally:
self.write_metadata(state)
@abstractmethod
def condense(self, events: list[Event]) -> list[Event]:
"""Condense a sequence of events into a potentially smaller list.
New condenser strategies should override this method to implement their own condensation logic. Call `self.add_metadata` in the implementation to record any relevant per-condensation diagnostic information.
Args:
events: A list of events representing the entire history of the agent.
Returns:
list[Event]: An event sequence representing a condensed history of the agent.
"""
def condensed_history(self, state: State) -> list[Event]:
"""Condense the state's history."""
with self.metadata_batch(state):
return self.condense(state.history)
@classmethod
def from_config(cls, config: CondenserConfig) -> Condenser:
"""Create a condenser from a configuration object.
Args:
config: Configuration for the condenser.
Returns:
Condenser: A condenser instance.
Raises:
- Exception: the same exception as it got from the llm or processing the response
ValueError: If the condenser type is not recognized.
"""
match config:
case NoOpCondenserConfig():
return NoOpCondenser()
case ObservationMaskingCondenserConfig():
return ObservationMaskingCondenser(
**config.model_dump(exclude=['type'])
)
case RecentEventsCondenserConfig():
return RecentEventsCondenser(**config.model_dump(exclude=['type']))
case LLMSummarizingCondenserConfig(llm_config=llm_config):
return LLMSummarizingCondenser(llm=LLM(config=llm_config))
case AmortizedForgettingCondenserConfig():
return AmortizedForgettingCondenser(
**config.model_dump(exclude=['type'])
)
case LLMAttentionCondenserConfig(llm_config=llm_config):
return LLMAttentionCondenser(
llm=LLM(config=llm_config),
**config.model_dump(exclude=['type', 'llm_config']),
)
case _:
raise ValueError(f'Unknown condenser config: {config}')
class RollingCondenser(Condenser, ABC):
"""Base class for a specialized condenser strategy that applies condensation to a rolling history.
The rolling history is computed by appending new events to the most recent condensation. For example, the sequence of calls::
assert state.history == [event1, event2, event3]
condensation = condenser.condensed_history(state)
# ...new events are added to the state...
assert state.history == [event1, event2, event3, event4, event5]
condenser.condensed_history(state)
will result in second call to `condensed_history` passing `condensation + [event4, event5]` to the `condense` method.
"""
def __init__(self) -> None:
self._condensation: list[Event] = []
self._last_history_length: int = 0
super().__init__()
@override
def condensed_history(self, state: State) -> list[Event]:
new_events = state.history[self._last_history_length :]
with self.metadata_batch(state):
results = self.condense(self._condensation + new_events)
self._condensation = results
self._last_history_length = len(state.history)
return results
class NoOpCondenser(Condenser):
"""A condenser that does nothing to the event sequence."""
def condense(self, events: list[Event]) -> list[Event]:
"""Returns the list of events unchanged."""
return events
class ObservationMaskingCondenser(Condenser):
"""A condenser that masks the values of observations outside of a recent attention window."""
def __init__(self, attention_window: int = 5):
self.attention_window = attention_window
super().__init__()
def condense(self, events: list[Event]) -> list[Event]:
"""Replace the content of observations outside of the attention window with a placeholder."""
results: list[Event] = []
for i, event in enumerate(events):
if (
isinstance(event, Observation)
and i < len(events) - self.attention_window
):
results.append(AgentCondensationObservation('<MASKED>'))
else:
results.append(event)
return results
class RecentEventsCondenser(Condenser):
"""A condenser that only keeps a certain number of the most recent events."""
def __init__(self, keep_first: int = 0, max_events: int = 10):
self.keep_first = keep_first
self.max_events = max_events
super().__init__()
def condense(self, events: list[Event]) -> list[Event]:
"""Keep only the most recent events (up to `max_events`)."""
head = events[: self.keep_first]
tail_length = max(0, self.max_events - len(head))
tail = events[-tail_length:]
return head + tail
class LLMSummarizingCondenser(Condenser):
"""A condenser that relies on a language model to summarize the event sequence as a single event."""
def __init__(self, llm: LLM):
self.llm = llm
super().__init__()
def condense(self, events: list[Event]) -> list[Event]:
"""Applies an LLM to summarize the list of events.
Raises:
Exception: If the LLM is unable to summarize the event sequence.
"""
try:
messages = [{'content': summarize_prompt, 'role': 'user'}]
resp = llm.completion(messages=messages)
summary_response = resp['choices'][0]['message']['content']
return summary_response
except Exception as e:
logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
# Convert events to a format suitable for summarization
events_text = '\n'.join(f'{e.timestamp}: {e.message}' for e in events)
summarize_prompt = f'Please summarize these events:\n{events_text}'
# TODO If the llm fails with ContextWindowExceededError, we can try to condense the memory chunk by chunk
raise
resp = self.llm.completion(
messages=[{'content': summarize_prompt, 'role': 'user'}]
)
summary_response = resp.choices[0].message.content
# Create a new summary event with the condensed content
summary_event = AgentCondensationObservation(summary_response)
# Add metrics to state
self.add_metadata('response', resp.model_dump())
self.add_metadata('metrics', self.llm.metrics.get())
return [summary_event]
except Exception as e:
logger.error('Error condensing events: %s', str(e), exc_info=False)
raise e
class AmortizedForgettingCondenser(RollingCondenser):
"""A condenser that maintains a condensed history and forgets old events when it grows too large."""
def __init__(self, max_size: int = 100, keep_first: int = 0):
"""Initialize the condenser.
Args:
max_size: Maximum size of history before forgetting.
keep_first: Number of initial events to always keep.
Raises:
ValueError: If keep_first is greater than max_size, keep_first is negative, or max_size is non-positive.
"""
if keep_first >= max_size // 2:
raise ValueError(
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
)
if keep_first < 0:
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
if max_size < 1:
raise ValueError(f'max_size ({keep_first}) cannot be non-positive')
self.max_size = max_size
self.keep_first = keep_first
super().__init__()
def condense(self, events: list[Event]) -> list[Event]:
"""Apply the amortized forgetting strategy to the given list of events."""
if len(events) <= self.max_size:
return events
target_size = self.max_size // 2
head = events[: self.keep_first]
events_from_tail = target_size - len(head)
tail = events[-events_from_tail:]
return head + tail
class ImportantEventSelection(BaseModel):
"""Utility class for the `LLMAttentionCondenser` that forces the LLM to return a list of integers."""
ids: list[int]
class LLMAttentionCondenser(RollingCondenser):
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
if keep_first >= max_size // 2:
raise ValueError(
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
)
if keep_first < 0:
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
if max_size < 1:
raise ValueError(f'max_size ({keep_first}) cannot be non-positive')
self.max_size = max_size
self.keep_first = keep_first
self.llm = llm
# This condenser relies on the `response_schema` feature, which is not supported by all LLMs
if not supports_response_schema(
model=self.llm.config.model,
custom_llm_provider=self.llm.config.custom_llm_provider,
):
raise ValueError(
"The LLM model must support the 'response_schema' parameter to use the LLMAttentionCondenser."
)
super().__init__()
def condense(self, events: list[Event]) -> list[Event]:
"""If the history is too long, use an LLM to select the most important events."""
if len(events) <= self.max_size:
return events
target_size = self.max_size // 2
head = events[: self.keep_first]
events_from_tail = target_size - len(head)
message: str = """You will be given a list of actions, observations, and thoughts from a coding agent.
Each item in the list has an identifier. Please sort the identifiers in order of how important the
contents of the item are for the next step of the coding agent's task, from most important to least
important."""
response = self.llm.completion(
messages=[
{'content': message, 'role': 'user'},
*[
{
'content': f'<ID>{e.id}</ID>\n<CONTENT>{e.message}</CONTENT>',
'role': 'user',
}
for e in events
],
],
response_format={
'type': 'json_schema',
'json_schema': {
'name': 'ImportantEventSelection',
'schema': ImportantEventSelection.model_json_schema(),
},
},
)
response_ids = ImportantEventSelection.model_validate_json(
response.choices[0].message.content
).ids
self.add_metadata('all_event_ids', [event.id for event in events])
self.add_metadata('response_ids', response_ids)
self.add_metadata('metrics', self.llm.metrics.get())
# Filter out any IDs from the head and trim the results down
head_ids = [event.id for event in head]
response_ids = [
response_id for response_id in response_ids if response_id not in head_ids
][:events_from_tail]
# If the response IDs aren't _long_ enough, iterate backwards through the events and add any unfound IDs to the list.
for event in reversed(events):
if len(response_ids) >= events_from_tail:
break
if event.id not in response_ids:
response_ids.append(event.id)
# Grab the events associated with the response IDs
tail = [event for event in events if event.id in response_ids]
return head + tail

3250
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,7 @@ packages = [
python = "^3.12"
datasets = "*"
pandas = "*"
litellm = "^1.54.1"
litellm = "^1.55.4"
google-generativeai = "*" # To use litellm with Gemini Pro API
google-api-python-client = "*" # For Google Sheets API
google-auth-httplib2 = "*" # For Google Sheets authentication

View File

@ -1,6 +1,7 @@
from unittest.mock import Mock
import pytest
from litellm import ChatCompletionMessageToolCall
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.agenthub.codeact_agent.function_calling import (
@ -15,6 +16,7 @@ from openhands.agenthub.codeact_agent.function_calling import (
get_tools,
response_to_actions,
)
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig, LLMConfig
from openhands.core.exceptions import FunctionCallNotExistsError
from openhands.core.message import ImageContent, TextContent
@ -48,6 +50,15 @@ def agent() -> CodeActAgent:
return agent
@pytest.fixture
def mock_state() -> State:
state = Mock(spec=State)
state.history = []
state.extra_data = {}
return state
def test_cmd_output_observation_message(agent: CodeActAgent):
agent.config.function_calling = False
obs = CmdOutputObservation(
@ -481,7 +492,7 @@ def test_response_to_actions_invalid_tool():
response_to_actions(mock_response)
def test_step_with_no_pending_actions():
def test_step_with_no_pending_actions(mock_state: State):
# Mock the LLM response
mock_response = Mock()
mock_response.id = 'mock_id'
@ -502,16 +513,68 @@ def test_step_with_no_pending_actions():
agent = CodeActAgent(llm=llm, config=config)
# Test step with no pending actions
state = Mock()
state.history = []
state.latest_user_message = None
state.latest_user_message_id = None
state.latest_user_message_timestamp = None
state.latest_user_message_cause = None
state.latest_user_message_timeout = None
state.latest_user_message_llm_metrics = None
state.latest_user_message_tool_call_metadata = None
mock_state.latest_user_message = None
mock_state.latest_user_message_id = None
mock_state.latest_user_message_timestamp = None
mock_state.latest_user_message_cause = None
mock_state.latest_user_message_timeout = None
mock_state.latest_user_message_llm_metrics = None
mock_state.latest_user_message_tool_call_metadata = None
action = agent.step(state)
action = agent.step(mock_state)
assert isinstance(action, MessageAction)
assert action.content == 'Task completed'
def test_mismatched_tool_call_events(mock_state: State):
"""Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages."""
agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())
tool_call_metadata = Mock(
spec=ToolCallMetadata,
model_response=Mock(
id='model_response_0',
choices=[
Mock(
message=Mock(
role='assistant',
content='',
tool_calls=[
Mock(spec=ChatCompletionMessageToolCall, id='tool_call_0')
],
)
)
],
),
tool_call_id='tool_call_0',
function_name='foo',
)
action = CmdRunAction('foo')
action._source = 'agent'
action.tool_call_metadata = tool_call_metadata
observation = CmdOutputObservation(content='', command_id=0, command='foo')
observation.tool_call_metadata = tool_call_metadata
# When both events are provided, the agent should get three messages:
# 1. The system message,
# 2. The action message, and
# 3. The observation message
mock_state.history = [action, observation]
messages = agent._get_messages(mock_state)
assert len(messages) == 3
# The same should hold if the events are presented out-of-order
mock_state.history = [observation, action]
messages = agent._get_messages(mock_state)
assert len(messages) == 3
# If only one of the two events is present, then we should just get the system message
mock_state.history = [action]
messages = agent._get_messages(mock_state)
assert len(messages) == 1
mock_state.history = [observation]
messages = agent._get_messages(mock_state)
assert len(messages) == 1

View File

@ -1,44 +1,520 @@
from unittest.mock import Mock, patch
from datetime import datetime
from typing import Any
from unittest.mock import MagicMock
import pytest
from openhands.core.exceptions import LLMResponseError
from openhands.llm.llm import LLM
from openhands.memory.condenser import MemoryCondenser
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import (
AmortizedForgettingCondenserConfig,
LLMAttentionCondenserConfig,
LLMSummarizingCondenserConfig,
NoOpCondenserConfig,
ObservationMaskingCondenserConfig,
RecentEventsCondenserConfig,
)
from openhands.core.config.llm_config import LLMConfig
from openhands.events.event import Event, EventSource
from openhands.events.observation.observation import Observation
from openhands.llm import LLM
from openhands.memory.condenser import (
AmortizedForgettingCondenser,
Condenser,
ImportantEventSelection,
LLMAttentionCondenser,
LLMSummarizingCondenser,
NoOpCondenser,
ObservationMaskingCondenser,
RecentEventsCondenser,
)
def create_test_event(
message: str, timestamp: datetime | None = None, id: int | None = None
) -> Event:
"""Create a simple test event."""
event = Event()
event._message = message
event.timestamp = timestamp if timestamp else datetime.now()
if id:
event._id = id
event._source = EventSource.USER
return event
@pytest.fixture
def memory_condenser():
return MemoryCondenser()
def mock_llm() -> LLM:
"""Mocks an LLM object with a utility function for setting and resetting response contents in unit tests."""
# Create a MagicMock for the LLM object
mock_llm = MagicMock(
spec=LLM,
config=MagicMock(
spec=LLMConfig, model='gpt-4o', api_key='test_key', custom_llm_provider=None
),
metrics=MagicMock(),
)
_mock_content = None
# Set a mock message with the mocked content
mock_message = MagicMock()
mock_message.content = _mock_content
def set_mock_response_content(content: Any):
"""Set the mock response for the LLM."""
nonlocal mock_message
mock_message.content = content
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_llm.completion.return_value = mock_response
# Attach helper methods to the mock object
mock_llm.set_mock_response_content = set_mock_response_content
return mock_llm
@pytest.fixture
def mock_llm():
return Mock(spec=LLM)
def mock_state() -> State:
"""Mocks a State object with the only parameters needed for testing condensers: history and extra_data."""
mock_state = MagicMock(spec=State)
mock_state.history = []
mock_state.extra_data = {}
return mock_state
def test_condense_success(memory_condenser, mock_llm):
mock_llm.completion.return_value = {
'choices': [{'message': {'content': 'Condensed memory'}}]
}
result = memory_condenser.condense('Summarize this', mock_llm)
assert result == 'Condensed memory'
mock_llm.completion.assert_called_once_with(
messages=[{'content': 'Summarize this', 'role': 'user'}]
def test_noop_condenser_from_config():
"""Test that the NoOpCondenser objects can be made from config."""
config = NoOpCondenserConfig()
condenser = Condenser.from_config(config)
assert isinstance(condenser, NoOpCondenser)
def test_noop_condenser():
"""Test that NoOpCondensers preserve their input events."""
events = [
create_test_event('Event 1'),
create_test_event('Event 2'),
create_test_event('Event 3'),
]
mock_state = MagicMock()
mock_state.history = events
condenser = NoOpCondenser()
result = condenser.condensed_history(mock_state)
assert result == events
def test_observation_masking_condenser_from_config():
"""Test that ObservationMaskingCondenser objects can be made from config."""
attention_window = 5
config = ObservationMaskingCondenserConfig(attention_window=attention_window)
condenser = Condenser.from_config(config)
assert isinstance(condenser, ObservationMaskingCondenser)
assert condenser.attention_window == attention_window
def test_observation_masking_condenser_respects_attention_window(mock_state):
"""Test that ObservationMaskingCondenser only masks events outside the attention window."""
attention_window = 3
condenser = ObservationMaskingCondenser(attention_window=attention_window)
events = [
create_test_event('Event 1'),
Observation('Observation 1'),
create_test_event('Event 3'),
create_test_event('Event 4'),
Observation('Observation 2'),
]
mock_state.history = events
result = condenser.condensed_history(mock_state)
assert len(result) == len(events)
for index, (event, condensed_event) in enumerate(zip(events, result)):
# If we're outside the attention window, observations should be masked.
if index < len(events) - attention_window:
if isinstance(event, Observation):
assert '<MASKED>' in str(condensed_event)
# If we're within the attention window, events are unchanged.
else:
assert event == condensed_event
def test_recent_events_condenser_from_config():
"""Test that RecentEventsCondenser objects can be made from config."""
max_events = 5
keep_first = True
config = RecentEventsCondenserConfig(keep_first=keep_first, max_events=max_events)
condenser = Condenser.from_config(config)
assert isinstance(condenser, RecentEventsCondenser)
assert condenser.max_events == max_events
assert condenser.keep_first == keep_first
def test_recent_events_condenser():
"""Test that RecentEventsCondensers keep just the most recent events."""
events = [
create_test_event('Event 1'),
create_test_event('Event 2'),
create_test_event('Event 3'),
create_test_event('Event 4'),
create_test_event('Event 5'),
]
mock_state = MagicMock()
mock_state.history = events
# If the max_events are larger than the number of events, equivalent to a NoOpCondenser.
condenser = RecentEventsCondenser(max_events=len(events))
result = condenser.condensed_history(mock_state)
assert result == events
# If the max_events are smaller than the number of events, only keep the last few.
max_events = 2
condenser = RecentEventsCondenser(max_events=max_events)
result = condenser.condensed_history(mock_state)
assert len(result) == max_events
assert result[0]._message == 'Event 4'
assert result[1]._message == 'Event 5'
# If the keep_first flag is set, the first event will always be present.
keep_first = 1
max_events = 2
condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events)
result = condenser.condensed_history(mock_state)
assert len(result) == max_events
assert result[0]._message == 'Event 1'
assert result[1]._message == 'Event 5'
# We should be able to keep more of the initial events.
keep_first = 2
max_events = 3
condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events)
result = condenser.condensed_history(mock_state)
assert len(result) == max_events
assert result[0]._message == 'Event 1'
assert result[1]._message == 'Event 2'
assert result[2]._message == 'Event 5'
def test_llm_condenser_from_config():
"""Test that LLMCondensers can be made from config."""
config = LLMSummarizingCondenserConfig(
llm_config=LLMConfig(
model='gpt-4o',
api_key='test_key',
)
)
condenser = Condenser.from_config(config)
assert isinstance(condenser, LLMSummarizingCondenser)
assert condenser.llm.config.model == 'gpt-4o'
assert condenser.llm.config.api_key == 'test_key'
def test_llm_condenser(mock_llm, mock_state):
"""Test that LLMCondensers use the LLM to generate a summary event."""
events = [
create_test_event('Event 1'),
create_test_event('Event 2'),
]
mock_state.history = events
mock_llm.metrics = MagicMock()
mock_llm.metrics.get.return_value = {'test_metric': 1.0}
mock_llm.set_mock_response_content('Summary of events')
condenser = LLMSummarizingCondenser(llm=mock_llm)
result = condenser.condensed_history(mock_state)
assert len(result) == 1
assert result[0].content == 'Summary of events'
# Verify LLM was called with correct prompt.
mock_llm.completion.assert_called_once()
call_args = mock_llm.completion.call_args[1]
assert 'messages' in call_args
assert len(call_args['messages']) == 1
assert 'Event 1' in call_args['messages'][0]['content']
assert 'Event 2' in call_args['messages'][0]['content']
# Verify metrics were added to state
assert 'condenser_meta' in mock_state.extra_data
assert len(mock_state.extra_data['condenser_meta']) == 1
assert mock_state.extra_data['condenser_meta'][0]['metrics'] == {'test_metric': 1.0}
def test_llm_condenser_error():
"""Test that LLM errors are propagated during condensation."""
events = [create_test_event('Event 1', datetime(2024, 1, 1, 10, 0))]
mock_state = MagicMock()
mock_state.history = events
mock_llm = MagicMock()
mock_llm.completion.side_effect = Exception('LLM error')
condenser = LLMSummarizingCondenser(llm=mock_llm)
try:
condenser.condensed_history(mock_state)
raise AssertionError('Expected exception was not raised.')
except Exception as e:
assert str(e) == 'LLM error'
def test_amortized_forgetting_condenser_from_config():
"""Test that AmortizedForgettingCondenser objects can be made from config."""
max_size = 50
keep_first = 10
config = AmortizedForgettingCondenserConfig(
max_size=max_size, keep_first=keep_first
)
condenser = Condenser.from_config(config)
assert isinstance(condenser, AmortizedForgettingCondenser)
assert condenser.max_size == max_size
assert condenser.keep_first == keep_first
def test_amortized_forgetting_condenser_invalid_config():
"""Test that AmortizedForgettingCondenser raises error when keep_first > max_size."""
pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=4, keep_first=2)
pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=0)
pytest.raises(ValueError, AmortizedForgettingCondenser, keep_first=-1)
def test_amortized_forgetting_condenser_grows_to_max_size():
"""Test that AmortizedForgettingCondenser correctly maintains an event context up to max size."""
max_size = 15
condenser = AmortizedForgettingCondenser(max_size=max_size)
mock_state = MagicMock()
mock_state.extra_data = {}
mock_state.history = []
for i in range(max_size):
event = create_test_event(f'Event {i}')
mock_state.history.append(event)
results = condenser.condensed_history(mock_state)
assert len(results) == i + 1
def test_amortized_forgetting_condenser_forgets_when_larger_than_max_size():
"""Test that the AmortizedForgettingCondenser forgets events when the context grows too large."""
max_size = 2
condenser = AmortizedForgettingCondenser(max_size=max_size)
mock_state = MagicMock()
mock_state.extra_data = {}
mock_state.history = []
for i in range(max_size * 10):
event = create_test_event(f'Event {i}')
mock_state.history.append(event)
results = condenser.condensed_history(mock_state)
# The last event in the results is always the event we just added.
assert results[-1] == event
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
assert len(results) == (i % 2) + 1
def test_amortized_forgetting_condenser_keeps_first_events():
"""Test that the AmortizedForgettingCondenser keeps the right number of initial events when forgetting."""
max_size = 4
keep_first = 1
condenser = AmortizedForgettingCondenser(max_size=max_size, keep_first=keep_first)
first_event = create_test_event('Event 0')
mock_state = MagicMock()
mock_state.extra_data = {}
mock_state.history = [first_event]
for i in range(max_size * 10):
event = create_test_event(f'Event {i+1}', datetime(2024, 1, 1, 10, i + 1))
mock_state.history.append(event)
results = condenser.condensed_history(mock_state)
# The last event is always the event we just added.
assert results[-1] == event
# The first event is always the first event.
assert results[0] == first_event
# The number of results should bounce back between 2, 3, 4, 2, 3, 4, ...
print(len(results))
assert len(results) == (i % 3) + 2
def test_llm_attention_condenser_from_config():
"""Test that LLMAttentionCondenser objects can be made from config."""
config = LLMAttentionCondenserConfig(
max_size=50,
keep_first=10,
llm_config=LLMConfig(
model='gpt-4o',
api_key='test_key',
),
)
condenser = Condenser.from_config(config)
assert isinstance(condenser, LLMAttentionCondenser)
assert condenser.llm.config.model == 'gpt-4o'
assert condenser.llm.config.api_key == 'test_key'
assert condenser.max_size == 50
assert condenser.keep_first == 10
def test_llm_attention_condenser_invalid_config():
"""Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema."""
config = LLMAttentionCondenserConfig(
max_size=50,
keep_first=10,
llm_config=LLMConfig(
model='claude-2', # Older model that doesn't support response schema
api_key='test_key',
),
)
def test_condense_exception(memory_condenser, mock_llm):
mock_llm.completion.side_effect = LLMResponseError('LLM error')
with pytest.raises(LLMResponseError, match='LLM error'):
memory_condenser.condense('Summarize this', mock_llm)
pytest.raises(ValueError, LLMAttentionCondenser.from_config, config)
@patch('openhands.memory.condenser.logger')
def test_condense_logs_error(mock_logger, memory_condenser, mock_llm):
mock_llm.completion.side_effect = LLMResponseError('LLM error')
with pytest.raises(LLMResponseError):
memory_condenser.condense('Summarize this', mock_llm)
mock_logger.error.assert_called_once_with(
'Error condensing thoughts: %s', 'LLM error', exc_info=False
)
def test_llm_attention_condenser_keeps_first_events(mock_llm, mock_state):
"""Test that the LLMAttentionCondenser keeps the right number of initial events when forgetting."""
max_size = 4
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm)
first_event = create_test_event('Event 0', id=0)
mock_state.history.append(first_event)
for i in range(max_size * 10):
event = create_test_event(f'Event {i+1}', id=i + 1)
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(
ids=[event.id for event in mock_state.history]
).model_dump_json()
)
results = condenser.condensed_history(mock_state)
# The first event is always the first event.
assert results[0] == first_event
def test_llm_attention_condenser_grows_to_max_size(mock_llm, mock_state):
"""Test that LLMAttentionCondenser correctly maintains an event context up to max size."""
max_size = 15
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
for i in range(max_size):
event = create_test_event(f'Event {i}')
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(ids=[event.id for event in mock_state.history])
)
results = condenser.condensed_history(mock_state)
assert len(results) == i + 1
def test_llm_attention_condenser_forgets_when_larger_than_max_size(
mock_llm, mock_state
):
"""Test that the LLMAttentionCondenser forgets events when the context grows too large."""
max_size = 2
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
for i in range(max_size * 10):
event = create_test_event(f'Event {i}', id=i)
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(
ids=[event.id for event in mock_state.history]
).model_dump_json()
)
results = condenser.condensed_history(mock_state)
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
assert len(results) == (i % 2) + 1
def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state):
"""Test that the LLMAttentionCondenser handles event IDs that aren't from the event history."""
max_size = 2
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
for i in range(max_size * 10):
event = create_test_event(f'Event {i}', id=i)
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(
ids=[event.id for event in mock_state.history] + [-1, -2, -3, -4]
).model_dump_json()
)
results = condenser.condensed_history(mock_state)
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
assert len(results) == (i % 2) + 1
def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
"""Test that the LLMAttentionCondenser handles when the response contains too many event IDs."""
max_size = 2
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
for i in range(max_size * 10):
event = create_test_event(f'Event {i}', id=i)
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(
ids=[event.id for event in mock_state.history]
+ [event.id for event in mock_state.history]
).model_dump_json()
)
results = condenser.condensed_history(mock_state)
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
assert len(results) == (i % 2) + 1
def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):
"""Test that the LLMAttentionCondenser handles when the response contains too few event IDs."""
max_size = 2
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
for i in range(max_size * 10):
event = create_test_event(f'Event {i}', id=i)
mock_state.history.append(event)
mock_llm.set_mock_response_content(
ImportantEventSelection(ids=[]).model_dump_json()
)
results = condenser.condensed_history(mock_state)
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
assert len(results) == (i % 2) + 1

View File

@ -13,6 +13,9 @@ from openhands.core.config import (
load_from_env,
load_from_toml,
)
from openhands.core.config.condenser_config import (
NoOpCondenserConfig,
)
from openhands.core.logger import openhands_logger
@ -618,6 +621,13 @@ def test_cache_dir_creation(default_config, tmpdir):
assert os.path.exists(default_config.cache_dir)
def test_agent_config_condenser_default():
"""Test that default agent condenser is NoOpCondenser."""
config = AppConfig()
agent_config = config.get_agent_config()
assert isinstance(agent_config.condenser, NoOpCondenserConfig)
def test_api_keys_repr_str():
# Test LLMConfig
llm_config = LLMConfig(

View File

@ -75,7 +75,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
codeact_agent.reset()
messages = codeact_agent._get_messages(
Mock(history=history, max_iterations=5, iteration=0)
Mock(history=history, max_iterations=5, iteration=0, extra_data={})
)
assert (
@ -111,7 +111,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
codeact_agent.reset()
messages = codeact_agent._get_messages(
Mock(history=history, max_iterations=10, iteration=5)
Mock(history=history, max_iterations=10, iteration=5, extra_data={})
)
# Check that only the last two user messages have cache_prompt=True
@ -144,6 +144,7 @@ def test_prompt_caching_headers(codeact_agent: CodeActAgent):
mock_state.history = history
mock_state.max_iterations = 5
mock_state.iteration = 0
mock_state.extra_data = {}
codeact_agent.reset()