mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
feature: Condenser Interface and Defaults (#5306)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <calvin@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
parent
561f308401
commit
6e4ff56934
@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@ -148,6 +149,7 @@ def get_config(
|
||||
codeact_enable_jupyter=False,
|
||||
codeact_enable_browsing=RUN_WITH_BROWSING,
|
||||
codeact_enable_llm_editor=False,
|
||||
condenser=metadata.condenser_config,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
@ -448,7 +450,7 @@ def process_instance(
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@ -17,6 +17,10 @@ from tqdm import tqdm
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.core.config.condenser_config import (
|
||||
CondenserConfig,
|
||||
NoOpCondenserConfig,
|
||||
)
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeBuildError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
@ -33,6 +37,7 @@ from openhands.events.action.message import MessageAction
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.events.utils import get_pairs_from_events
|
||||
from openhands.memory.condenser import get_condensation_metadata
|
||||
|
||||
|
||||
class EvalMetadata(BaseModel):
|
||||
@ -45,11 +50,17 @@ class EvalMetadata(BaseModel):
|
||||
dataset: str | None = None
|
||||
data_split: str | None = None
|
||||
details: dict[str, Any] | None = None
|
||||
condenser_config: CondenserConfig | None = None
|
||||
|
||||
def model_dump(self, *args, **kwargs):
|
||||
dumped_dict = super().model_dump(*args, **kwargs)
|
||||
# avoid leaking sensitive information
|
||||
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
|
||||
if hasattr(self.condenser_config, 'llm_config'):
|
||||
dumped_dict['condenser_config']['llm_config'] = (
|
||||
self.condenser_config.llm_config.to_safe_dict()
|
||||
)
|
||||
|
||||
return dumped_dict
|
||||
|
||||
def model_dump_json(self, *args, **kwargs):
|
||||
@ -57,6 +68,11 @@ class EvalMetadata(BaseModel):
|
||||
dumped_dict = json.loads(dumped)
|
||||
# avoid leaking sensitive information
|
||||
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
|
||||
if hasattr(self.condenser_config, 'llm_config'):
|
||||
dumped_dict['condenser_config']['llm_config'] = (
|
||||
self.condenser_config.llm_config.to_safe_dict()
|
||||
)
|
||||
|
||||
logger.debug(f'Dumped metadata: {dumped_dict}')
|
||||
return json.dumps(dumped_dict)
|
||||
|
||||
@ -192,6 +208,7 @@ def make_metadata(
|
||||
eval_output_dir: str,
|
||||
data_split: str | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
condenser_config: CondenserConfig | None = None,
|
||||
) -> EvalMetadata:
|
||||
model_name = llm_config.model.split('/')[-1]
|
||||
model_path = model_name.replace(':', '_').replace('@', '-')
|
||||
@ -222,6 +239,9 @@ def make_metadata(
|
||||
dataset=dataset_name,
|
||||
data_split=data_split,
|
||||
details=details,
|
||||
condenser_config=condenser_config
|
||||
if condenser_config
|
||||
else NoOpCondenserConfig(),
|
||||
)
|
||||
metadata_json = metadata.model_dump_json()
|
||||
logger.info(f'Metadata: {metadata_json}')
|
||||
@ -551,3 +571,10 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_metrics(state: State) -> dict[str, Any]:
|
||||
"""Extract metrics from the state."""
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
metrics['condenser'] = get_condensation_metadata(state)
|
||||
return metrics
|
||||
|
||||
@ -24,6 +24,7 @@ from openhands.events.action import (
|
||||
MessageAction,
|
||||
)
|
||||
from openhands.events.observation import (
|
||||
AgentCondensationObservation,
|
||||
AgentDelegateObservation,
|
||||
BrowserOutputObservation,
|
||||
CmdOutputObservation,
|
||||
@ -36,6 +37,7 @@ from openhands.events.observation.error import ErrorObservation
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.events.serialization.event import truncate_content
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.memory.condenser import Condenser
|
||||
from openhands.runtime.plugins import (
|
||||
AgentSkillsRequirement,
|
||||
JupyterRequirement,
|
||||
@ -115,6 +117,9 @@ class CodeActAgent(Agent):
|
||||
disabled_microagents=self.config.disabled_microagents,
|
||||
)
|
||||
|
||||
self.condenser = Condenser.from_config(self.config.condenser)
|
||||
logger.debug(f'Using condenser: {self.condenser}')
|
||||
|
||||
def get_action_message(
|
||||
self,
|
||||
action: Action,
|
||||
@ -322,6 +327,9 @@ class CodeActAgent(Agent):
|
||||
text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
|
||||
text += '\n[Last action has been rejected by the user]'
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, AgentCondensationObservation):
|
||||
text = truncate_content(obs.content, max_message_chars)
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
else:
|
||||
# If an observation message is not returned, it will cause an error
|
||||
# when the LLM tries to return the next message
|
||||
@ -442,7 +450,10 @@ class CodeActAgent(Agent):
|
||||
|
||||
pending_tool_call_action_messages: dict[str, Message] = {}
|
||||
tool_call_id_to_message: dict[str, Message] = {}
|
||||
events = list(state.history)
|
||||
|
||||
# Condense the events from the state.
|
||||
events = self.condenser.condensed_history(state)
|
||||
|
||||
for event in events:
|
||||
# create a regular message from an event
|
||||
if isinstance(event, Action):
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from dataclasses import dataclass, fields
|
||||
from dataclasses import dataclass, field, fields
|
||||
|
||||
from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
|
||||
from openhands.core.config.config_utils import get_field_info
|
||||
|
||||
|
||||
@ -18,6 +19,7 @@ class AgentConfig:
|
||||
llm_config: The name of the llm config to use. If specified, this will override global llm config.
|
||||
use_microagents: Whether to use microagents at all. Default is True.
|
||||
disabled_microagents: A list of microagents to disable. Default is None.
|
||||
condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
|
||||
"""
|
||||
|
||||
codeact_enable_browsing: bool = True
|
||||
@ -29,6 +31,7 @@ class AgentConfig:
|
||||
llm_config: str | None = None
|
||||
use_microagents: bool = True
|
||||
disabled_microagents: list[str] | None = None
|
||||
condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig) # type: ignore
|
||||
|
||||
def defaults_to_dict(self) -> dict:
|
||||
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
|
||||
|
||||
90
openhands/core/config/condenser_config.py
Normal file
90
openhands/core/config/condenser_config.py
Normal file
@ -0,0 +1,90 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
|
||||
|
||||
class NoOpCondenserConfig(BaseModel):
|
||||
"""Configuration for NoOpCondenser."""
|
||||
|
||||
type: Literal['noop'] = Field('noop')
|
||||
|
||||
|
||||
class ObservationMaskingCondenserConfig(BaseModel):
|
||||
"""Configuration for ObservationMaskingCondenser."""
|
||||
|
||||
type: Literal['observation_masking'] = Field('observation_masking')
|
||||
attention_window: int = Field(
|
||||
default=10,
|
||||
description='The number of most-recent events where observations will not be masked.',
|
||||
ge=1,
|
||||
)
|
||||
|
||||
|
||||
class RecentEventsCondenserConfig(BaseModel):
|
||||
"""Configuration for RecentEventsCondenser."""
|
||||
|
||||
type: Literal['recent'] = Field('recent')
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
description='The number of initial events to condense.',
|
||||
ge=0,
|
||||
)
|
||||
max_events: int = Field(
|
||||
default=10, description='Maximum number of events to keep.', ge=1
|
||||
)
|
||||
|
||||
|
||||
class LLMSummarizingCondenserConfig(BaseModel):
|
||||
"""Configuration for LLMCondenser."""
|
||||
|
||||
type: Literal['llm'] = Field('llm')
|
||||
llm_config: LLMConfig = Field(
|
||||
..., description='Configuration for the LLM to use for condensing.'
|
||||
)
|
||||
|
||||
|
||||
class AmortizedForgettingCondenserConfig(BaseModel):
|
||||
"""Configuration for AmortizedForgettingCondenser."""
|
||||
|
||||
type: Literal['amortized'] = Field('amortized')
|
||||
max_size: int = Field(
|
||||
default=100,
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
|
||||
|
||||
class LLMAttentionCondenserConfig(BaseModel):
|
||||
"""Configuration for LLMAttentionCondenser."""
|
||||
|
||||
type: Literal['llm_attention'] = Field('llm_attention')
|
||||
llm_config: LLMConfig = Field(
|
||||
..., description='Configuration for the LLM to use for attention.'
|
||||
)
|
||||
max_size: int = Field(
|
||||
default=100,
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
|
||||
|
||||
CondenserConfig = (
|
||||
NoOpCondenserConfig
|
||||
| ObservationMaskingCondenserConfig
|
||||
| RecentEventsCondenserConfig
|
||||
| LLMSummarizingCondenserConfig
|
||||
| AmortizedForgettingCondenserConfig
|
||||
| LLMAttentionCondenserConfig
|
||||
)
|
||||
@ -44,5 +44,8 @@ class ObservationTypeSchema(BaseModel):
|
||||
|
||||
USER_REJECTED: str = Field(default='user_rejected')
|
||||
|
||||
CONDENSE: str = Field(default='condense')
|
||||
"""Result of a condensation operation."""
|
||||
|
||||
|
||||
ObservationType = ObservationTypeSchema()
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
from openhands.events.observation.agent import AgentStateChangedObservation
|
||||
from openhands.events.observation.agent import (
|
||||
AgentCondensationObservation,
|
||||
AgentStateChangedObservation,
|
||||
)
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
@ -32,4 +35,5 @@ __all__ = [
|
||||
'AgentDelegateObservation',
|
||||
'SuccessObservation',
|
||||
'UserRejectObservation',
|
||||
'AgentCondensationObservation',
|
||||
]
|
||||
|
||||
@ -14,3 +14,14 @@ class AgentStateChangedObservation(Observation):
|
||||
@property
|
||||
def message(self) -> str:
|
||||
return ''
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentCondensationObservation(Observation):
|
||||
"""The output of a condensation action."""
|
||||
|
||||
observation: str = ObservationType.CONDENSE
|
||||
|
||||
@property
|
||||
def message(self) -> str:
|
||||
return self.content
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
import copy
|
||||
|
||||
from openhands.events.observation.agent import AgentStateChangedObservation
|
||||
from openhands.events.observation.agent import (
|
||||
AgentCondensationObservation,
|
||||
AgentStateChangedObservation,
|
||||
)
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
@ -32,6 +35,7 @@ observations = (
|
||||
ErrorObservation,
|
||||
AgentStateChangedObservation,
|
||||
UserRejectObservation,
|
||||
AgentCondensationObservation,
|
||||
)
|
||||
|
||||
OBSERVATION_TYPE_TO_CLASS = {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from openhands.memory.condenser import MemoryCondenser
|
||||
from openhands.memory.condenser import Condenser
|
||||
from openhands.memory.memory import LongTermMemory
|
||||
|
||||
__all__ = ['LongTermMemory', 'MemoryCondenser']
|
||||
__all__ = ['LongTermMemory', 'Condenser']
|
||||
|
||||
@ -1,24 +1,409 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
from litellm import supports_response_schema
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import override
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import (
|
||||
AmortizedForgettingCondenserConfig,
|
||||
CondenserConfig,
|
||||
LLMAttentionCondenserConfig,
|
||||
LLMSummarizingCondenserConfig,
|
||||
NoOpCondenserConfig,
|
||||
ObservationMaskingCondenserConfig,
|
||||
RecentEventsCondenserConfig,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import AgentCondensationObservation, Observation
|
||||
from openhands.llm.llm import LLM
|
||||
|
||||
CONDENSER_METADATA_KEY = 'condenser_meta'
|
||||
"""Key identifying where metadata is stored in a `State` object's `extra_data` field."""
|
||||
|
||||
class MemoryCondenser:
|
||||
def condense(self, summarize_prompt: str, llm: LLM):
|
||||
"""Attempts to condense the memory by using the llm
|
||||
|
||||
Parameters:
|
||||
- llm (LLM): llm to be used for summarization
|
||||
def get_condensation_metadata(state: State) -> list[dict[str, Any]]:
|
||||
"""Utility function to retrieve a list of metadata batches from a `State`.
|
||||
|
||||
Args:
|
||||
state: The state to retrieve metadata from.
|
||||
|
||||
Returns:
|
||||
list[dict[str, Any]]: A list of metadata batches, each representing a condensation.
|
||||
"""
|
||||
if CONDENSER_METADATA_KEY in state.extra_data:
|
||||
return state.extra_data[CONDENSER_METADATA_KEY]
|
||||
return []
|
||||
|
||||
|
||||
class Condenser(ABC):
|
||||
"""Abstract condenser interface.
|
||||
|
||||
Condensers take a list of `Event` objects and reduce them into a potentially smaller list.
|
||||
|
||||
Agents can use condensers to reduce the amount of events they need to consider when deciding which action to take. To use a condenser, agents can call the `condensed_history` method on the current `State` being considered and use the results instead of the full history.
|
||||
|
||||
Example usage::
|
||||
|
||||
condenser = Condenser.from_config(condenser_config)
|
||||
events = condenser.condensed_history(state)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._metadata_batch: dict[str, Any] = {}
|
||||
|
||||
def add_metadata(self, key: str, value: Any) -> None:
|
||||
"""Add information to the current metadata batch.
|
||||
|
||||
Any key/value pairs added to the metadata batch will be recorded in the `State` at the end of the current condensation.
|
||||
|
||||
Args:
|
||||
key: The key to store the metadata under.
|
||||
|
||||
value: The metadata to store.
|
||||
"""
|
||||
self._metadata_batch[key] = value
|
||||
|
||||
def write_metadata(self, state: State) -> None:
|
||||
"""Write the current batch of metadata to the `State`.
|
||||
|
||||
Resets the current metadata batch: any metadata added after this call will be stored in a new batch and written to the `State` at the end of the next condensation.
|
||||
"""
|
||||
if CONDENSER_METADATA_KEY not in state.extra_data:
|
||||
state.extra_data[CONDENSER_METADATA_KEY] = []
|
||||
if self._metadata_batch:
|
||||
state.extra_data[CONDENSER_METADATA_KEY].append(self._metadata_batch)
|
||||
|
||||
# Since the batch has been written, clear it for the next condensation
|
||||
self._metadata_batch = {}
|
||||
|
||||
@contextmanager
|
||||
def metadata_batch(self, state: State):
|
||||
"""Context manager to ensure batched metadata is always written to the `State`."""
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.write_metadata(state)
|
||||
|
||||
@abstractmethod
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Condense a sequence of events into a potentially smaller list.
|
||||
|
||||
New condenser strategies should override this method to implement their own condensation logic. Call `self.add_metadata` in the implementation to record any relevant per-condensation diagnostic information.
|
||||
|
||||
Args:
|
||||
events: A list of events representing the entire history of the agent.
|
||||
|
||||
Returns:
|
||||
list[Event]: An event sequence representing a condensed history of the agent.
|
||||
"""
|
||||
|
||||
def condensed_history(self, state: State) -> list[Event]:
|
||||
"""Condense the state's history."""
|
||||
with self.metadata_batch(state):
|
||||
return self.condense(state.history)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config: CondenserConfig) -> Condenser:
|
||||
"""Create a condenser from a configuration object.
|
||||
|
||||
Args:
|
||||
config: Configuration for the condenser.
|
||||
|
||||
Returns:
|
||||
Condenser: A condenser instance.
|
||||
|
||||
Raises:
|
||||
- Exception: the same exception as it got from the llm or processing the response
|
||||
ValueError: If the condenser type is not recognized.
|
||||
"""
|
||||
match config:
|
||||
case NoOpCondenserConfig():
|
||||
return NoOpCondenser()
|
||||
|
||||
case ObservationMaskingCondenserConfig():
|
||||
return ObservationMaskingCondenser(
|
||||
**config.model_dump(exclude=['type'])
|
||||
)
|
||||
|
||||
case RecentEventsCondenserConfig():
|
||||
return RecentEventsCondenser(**config.model_dump(exclude=['type']))
|
||||
|
||||
case LLMSummarizingCondenserConfig(llm_config=llm_config):
|
||||
return LLMSummarizingCondenser(llm=LLM(config=llm_config))
|
||||
|
||||
case AmortizedForgettingCondenserConfig():
|
||||
return AmortizedForgettingCondenser(
|
||||
**config.model_dump(exclude=['type'])
|
||||
)
|
||||
|
||||
case LLMAttentionCondenserConfig(llm_config=llm_config):
|
||||
return LLMAttentionCondenser(
|
||||
llm=LLM(config=llm_config),
|
||||
**config.model_dump(exclude=['type', 'llm_config']),
|
||||
)
|
||||
|
||||
case _:
|
||||
raise ValueError(f'Unknown condenser config: {config}')
|
||||
|
||||
|
||||
class RollingCondenser(Condenser, ABC):
|
||||
"""Base class for a specialized condenser strategy that applies condensation to a rolling history.
|
||||
|
||||
The rolling history is computed by appending new events to the most recent condensation. For example, the sequence of calls::
|
||||
|
||||
assert state.history == [event1, event2, event3]
|
||||
condensation = condenser.condensed_history(state)
|
||||
|
||||
# ...new events are added to the state...
|
||||
|
||||
assert state.history == [event1, event2, event3, event4, event5]
|
||||
condenser.condensed_history(state)
|
||||
|
||||
will result in second call to `condensed_history` passing `condensation + [event4, event5]` to the `condense` method.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._condensation: list[Event] = []
|
||||
self._last_history_length: int = 0
|
||||
|
||||
super().__init__()
|
||||
|
||||
@override
|
||||
def condensed_history(self, state: State) -> list[Event]:
|
||||
new_events = state.history[self._last_history_length :]
|
||||
|
||||
with self.metadata_batch(state):
|
||||
results = self.condense(self._condensation + new_events)
|
||||
|
||||
self._condensation = results
|
||||
self._last_history_length = len(state.history)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class NoOpCondenser(Condenser):
|
||||
"""A condenser that does nothing to the event sequence."""
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Returns the list of events unchanged."""
|
||||
return events
|
||||
|
||||
|
||||
class ObservationMaskingCondenser(Condenser):
|
||||
"""A condenser that masks the values of observations outside of a recent attention window."""
|
||||
|
||||
def __init__(self, attention_window: int = 5):
|
||||
self.attention_window = attention_window
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Replace the content of observations outside of the attention window with a placeholder."""
|
||||
results: list[Event] = []
|
||||
for i, event in enumerate(events):
|
||||
if (
|
||||
isinstance(event, Observation)
|
||||
and i < len(events) - self.attention_window
|
||||
):
|
||||
results.append(AgentCondensationObservation('<MASKED>'))
|
||||
else:
|
||||
results.append(event)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class RecentEventsCondenser(Condenser):
|
||||
"""A condenser that only keeps a certain number of the most recent events."""
|
||||
|
||||
def __init__(self, keep_first: int = 0, max_events: int = 10):
|
||||
self.keep_first = keep_first
|
||||
self.max_events = max_events
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Keep only the most recent events (up to `max_events`)."""
|
||||
head = events[: self.keep_first]
|
||||
tail_length = max(0, self.max_events - len(head))
|
||||
tail = events[-tail_length:]
|
||||
return head + tail
|
||||
|
||||
|
||||
class LLMSummarizingCondenser(Condenser):
|
||||
"""A condenser that relies on a language model to summarize the event sequence as a single event."""
|
||||
|
||||
def __init__(self, llm: LLM):
|
||||
self.llm = llm
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Applies an LLM to summarize the list of events.
|
||||
|
||||
Raises:
|
||||
Exception: If the LLM is unable to summarize the event sequence.
|
||||
"""
|
||||
try:
|
||||
messages = [{'content': summarize_prompt, 'role': 'user'}]
|
||||
resp = llm.completion(messages=messages)
|
||||
summary_response = resp['choices'][0]['message']['content']
|
||||
return summary_response
|
||||
except Exception as e:
|
||||
logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
|
||||
# Convert events to a format suitable for summarization
|
||||
events_text = '\n'.join(f'{e.timestamp}: {e.message}' for e in events)
|
||||
summarize_prompt = f'Please summarize these events:\n{events_text}'
|
||||
|
||||
# TODO If the llm fails with ContextWindowExceededError, we can try to condense the memory chunk by chunk
|
||||
raise
|
||||
resp = self.llm.completion(
|
||||
messages=[{'content': summarize_prompt, 'role': 'user'}]
|
||||
)
|
||||
summary_response = resp.choices[0].message.content
|
||||
|
||||
# Create a new summary event with the condensed content
|
||||
summary_event = AgentCondensationObservation(summary_response)
|
||||
|
||||
# Add metrics to state
|
||||
self.add_metadata('response', resp.model_dump())
|
||||
self.add_metadata('metrics', self.llm.metrics.get())
|
||||
|
||||
return [summary_event]
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Error condensing events: %s', str(e), exc_info=False)
|
||||
raise e
|
||||
|
||||
|
||||
class AmortizedForgettingCondenser(RollingCondenser):
|
||||
"""A condenser that maintains a condensed history and forgets old events when it grows too large."""
|
||||
|
||||
def __init__(self, max_size: int = 100, keep_first: int = 0):
|
||||
"""Initialize the condenser.
|
||||
|
||||
Args:
|
||||
max_size: Maximum size of history before forgetting.
|
||||
keep_first: Number of initial events to always keep.
|
||||
|
||||
Raises:
|
||||
ValueError: If keep_first is greater than max_size, keep_first is negative, or max_size is non-positive.
|
||||
"""
|
||||
if keep_first >= max_size // 2:
|
||||
raise ValueError(
|
||||
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
|
||||
)
|
||||
if keep_first < 0:
|
||||
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
|
||||
if max_size < 1:
|
||||
raise ValueError(f'max_size ({keep_first}) cannot be non-positive')
|
||||
|
||||
self.max_size = max_size
|
||||
self.keep_first = keep_first
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""Apply the amortized forgetting strategy to the given list of events."""
|
||||
if len(events) <= self.max_size:
|
||||
return events
|
||||
|
||||
target_size = self.max_size // 2
|
||||
head = events[: self.keep_first]
|
||||
|
||||
events_from_tail = target_size - len(head)
|
||||
tail = events[-events_from_tail:]
|
||||
|
||||
return head + tail
|
||||
|
||||
|
||||
class ImportantEventSelection(BaseModel):
|
||||
"""Utility class for the `LLMAttentionCondenser` that forces the LLM to return a list of integers."""
|
||||
|
||||
ids: list[int]
|
||||
|
||||
|
||||
class LLMAttentionCondenser(RollingCondenser):
|
||||
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""
|
||||
|
||||
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
|
||||
if keep_first >= max_size // 2:
|
||||
raise ValueError(
|
||||
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
|
||||
)
|
||||
if keep_first < 0:
|
||||
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
|
||||
if max_size < 1:
|
||||
raise ValueError(f'max_size ({keep_first}) cannot be non-positive')
|
||||
|
||||
self.max_size = max_size
|
||||
self.keep_first = keep_first
|
||||
self.llm = llm
|
||||
|
||||
# This condenser relies on the `response_schema` feature, which is not supported by all LLMs
|
||||
if not supports_response_schema(
|
||||
model=self.llm.config.model,
|
||||
custom_llm_provider=self.llm.config.custom_llm_provider,
|
||||
):
|
||||
raise ValueError(
|
||||
"The LLM model must support the 'response_schema' parameter to use the LLMAttentionCondenser."
|
||||
)
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, events: list[Event]) -> list[Event]:
|
||||
"""If the history is too long, use an LLM to select the most important events."""
|
||||
if len(events) <= self.max_size:
|
||||
return events
|
||||
|
||||
target_size = self.max_size // 2
|
||||
head = events[: self.keep_first]
|
||||
|
||||
events_from_tail = target_size - len(head)
|
||||
|
||||
message: str = """You will be given a list of actions, observations, and thoughts from a coding agent.
|
||||
Each item in the list has an identifier. Please sort the identifiers in order of how important the
|
||||
contents of the item are for the next step of the coding agent's task, from most important to least
|
||||
important."""
|
||||
|
||||
response = self.llm.completion(
|
||||
messages=[
|
||||
{'content': message, 'role': 'user'},
|
||||
*[
|
||||
{
|
||||
'content': f'<ID>{e.id}</ID>\n<CONTENT>{e.message}</CONTENT>',
|
||||
'role': 'user',
|
||||
}
|
||||
for e in events
|
||||
],
|
||||
],
|
||||
response_format={
|
||||
'type': 'json_schema',
|
||||
'json_schema': {
|
||||
'name': 'ImportantEventSelection',
|
||||
'schema': ImportantEventSelection.model_json_schema(),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
response_ids = ImportantEventSelection.model_validate_json(
|
||||
response.choices[0].message.content
|
||||
).ids
|
||||
|
||||
self.add_metadata('all_event_ids', [event.id for event in events])
|
||||
self.add_metadata('response_ids', response_ids)
|
||||
self.add_metadata('metrics', self.llm.metrics.get())
|
||||
|
||||
# Filter out any IDs from the head and trim the results down
|
||||
head_ids = [event.id for event in head]
|
||||
response_ids = [
|
||||
response_id for response_id in response_ids if response_id not in head_ids
|
||||
][:events_from_tail]
|
||||
|
||||
# If the response IDs aren't _long_ enough, iterate backwards through the events and add any unfound IDs to the list.
|
||||
for event in reversed(events):
|
||||
if len(response_ids) >= events_from_tail:
|
||||
break
|
||||
if event.id not in response_ids:
|
||||
response_ids.append(event.id)
|
||||
|
||||
# Grab the events associated with the response IDs
|
||||
tail = [event for event in events if event.id in response_ids]
|
||||
|
||||
return head + tail
|
||||
|
||||
3250
poetry.lock
generated
3250
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -14,7 +14,7 @@ packages = [
|
||||
python = "^3.12"
|
||||
datasets = "*"
|
||||
pandas = "*"
|
||||
litellm = "^1.54.1"
|
||||
litellm = "^1.55.4"
|
||||
google-generativeai = "*" # To use litellm with Gemini Pro API
|
||||
google-api-python-client = "*" # For Google Sheets API
|
||||
google-auth-httplib2 = "*" # For Google Sheets authentication
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from litellm import ChatCompletionMessageToolCall
|
||||
|
||||
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
|
||||
from openhands.agenthub.codeact_agent.function_calling import (
|
||||
@ -15,6 +16,7 @@ from openhands.agenthub.codeact_agent.function_calling import (
|
||||
get_tools,
|
||||
response_to_actions,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig, LLMConfig
|
||||
from openhands.core.exceptions import FunctionCallNotExistsError
|
||||
from openhands.core.message import ImageContent, TextContent
|
||||
@ -48,6 +50,15 @@ def agent() -> CodeActAgent:
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_state() -> State:
|
||||
state = Mock(spec=State)
|
||||
state.history = []
|
||||
state.extra_data = {}
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def test_cmd_output_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = CmdOutputObservation(
|
||||
@ -481,7 +492,7 @@ def test_response_to_actions_invalid_tool():
|
||||
response_to_actions(mock_response)
|
||||
|
||||
|
||||
def test_step_with_no_pending_actions():
|
||||
def test_step_with_no_pending_actions(mock_state: State):
|
||||
# Mock the LLM response
|
||||
mock_response = Mock()
|
||||
mock_response.id = 'mock_id'
|
||||
@ -502,16 +513,68 @@ def test_step_with_no_pending_actions():
|
||||
agent = CodeActAgent(llm=llm, config=config)
|
||||
|
||||
# Test step with no pending actions
|
||||
state = Mock()
|
||||
state.history = []
|
||||
state.latest_user_message = None
|
||||
state.latest_user_message_id = None
|
||||
state.latest_user_message_timestamp = None
|
||||
state.latest_user_message_cause = None
|
||||
state.latest_user_message_timeout = None
|
||||
state.latest_user_message_llm_metrics = None
|
||||
state.latest_user_message_tool_call_metadata = None
|
||||
mock_state.latest_user_message = None
|
||||
mock_state.latest_user_message_id = None
|
||||
mock_state.latest_user_message_timestamp = None
|
||||
mock_state.latest_user_message_cause = None
|
||||
mock_state.latest_user_message_timeout = None
|
||||
mock_state.latest_user_message_llm_metrics = None
|
||||
mock_state.latest_user_message_tool_call_metadata = None
|
||||
|
||||
action = agent.step(state)
|
||||
action = agent.step(mock_state)
|
||||
assert isinstance(action, MessageAction)
|
||||
assert action.content == 'Task completed'
|
||||
|
||||
|
||||
def test_mismatched_tool_call_events(mock_state: State):
|
||||
"""Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages."""
|
||||
agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())
|
||||
|
||||
tool_call_metadata = Mock(
|
||||
spec=ToolCallMetadata,
|
||||
model_response=Mock(
|
||||
id='model_response_0',
|
||||
choices=[
|
||||
Mock(
|
||||
message=Mock(
|
||||
role='assistant',
|
||||
content='',
|
||||
tool_calls=[
|
||||
Mock(spec=ChatCompletionMessageToolCall, id='tool_call_0')
|
||||
],
|
||||
)
|
||||
)
|
||||
],
|
||||
),
|
||||
tool_call_id='tool_call_0',
|
||||
function_name='foo',
|
||||
)
|
||||
|
||||
action = CmdRunAction('foo')
|
||||
action._source = 'agent'
|
||||
action.tool_call_metadata = tool_call_metadata
|
||||
|
||||
observation = CmdOutputObservation(content='', command_id=0, command='foo')
|
||||
observation.tool_call_metadata = tool_call_metadata
|
||||
|
||||
# When both events are provided, the agent should get three messages:
|
||||
# 1. The system message,
|
||||
# 2. The action message, and
|
||||
# 3. The observation message
|
||||
mock_state.history = [action, observation]
|
||||
messages = agent._get_messages(mock_state)
|
||||
assert len(messages) == 3
|
||||
|
||||
# The same should hold if the events are presented out-of-order
|
||||
mock_state.history = [observation, action]
|
||||
messages = agent._get_messages(mock_state)
|
||||
assert len(messages) == 3
|
||||
|
||||
# If only one of the two events is present, then we should just get the system message
|
||||
mock_state.history = [action]
|
||||
messages = agent._get_messages(mock_state)
|
||||
assert len(messages) == 1
|
||||
|
||||
mock_state.history = [observation]
|
||||
messages = agent._get_messages(mock_state)
|
||||
assert len(messages) == 1
|
||||
|
||||
@ -1,44 +1,520 @@
|
||||
from unittest.mock import Mock, patch
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.exceptions import LLMResponseError
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.memory.condenser import MemoryCondenser
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import (
|
||||
AmortizedForgettingCondenserConfig,
|
||||
LLMAttentionCondenserConfig,
|
||||
LLMSummarizingCondenserConfig,
|
||||
NoOpCondenserConfig,
|
||||
ObservationMaskingCondenserConfig,
|
||||
RecentEventsCondenserConfig,
|
||||
)
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.events.event import Event, EventSource
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.llm import LLM
|
||||
from openhands.memory.condenser import (
|
||||
AmortizedForgettingCondenser,
|
||||
Condenser,
|
||||
ImportantEventSelection,
|
||||
LLMAttentionCondenser,
|
||||
LLMSummarizingCondenser,
|
||||
NoOpCondenser,
|
||||
ObservationMaskingCondenser,
|
||||
RecentEventsCondenser,
|
||||
)
|
||||
|
||||
|
||||
def create_test_event(
|
||||
message: str, timestamp: datetime | None = None, id: int | None = None
|
||||
) -> Event:
|
||||
"""Create a simple test event."""
|
||||
event = Event()
|
||||
event._message = message
|
||||
event.timestamp = timestamp if timestamp else datetime.now()
|
||||
if id:
|
||||
event._id = id
|
||||
event._source = EventSource.USER
|
||||
return event
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_condenser():
|
||||
return MemoryCondenser()
|
||||
def mock_llm() -> LLM:
|
||||
"""Mocks an LLM object with a utility function for setting and resetting response contents in unit tests."""
|
||||
# Create a MagicMock for the LLM object
|
||||
mock_llm = MagicMock(
|
||||
spec=LLM,
|
||||
config=MagicMock(
|
||||
spec=LLMConfig, model='gpt-4o', api_key='test_key', custom_llm_provider=None
|
||||
),
|
||||
metrics=MagicMock(),
|
||||
)
|
||||
_mock_content = None
|
||||
|
||||
# Set a mock message with the mocked content
|
||||
mock_message = MagicMock()
|
||||
mock_message.content = _mock_content
|
||||
|
||||
def set_mock_response_content(content: Any):
|
||||
"""Set the mock response for the LLM."""
|
||||
nonlocal mock_message
|
||||
mock_message.content = content
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message = mock_message
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
|
||||
mock_llm.completion.return_value = mock_response
|
||||
|
||||
# Attach helper methods to the mock object
|
||||
mock_llm.set_mock_response_content = set_mock_response_content
|
||||
|
||||
return mock_llm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm():
|
||||
return Mock(spec=LLM)
|
||||
def mock_state() -> State:
|
||||
"""Mocks a State object with the only parameters needed for testing condensers: history and extra_data."""
|
||||
mock_state = MagicMock(spec=State)
|
||||
mock_state.history = []
|
||||
mock_state.extra_data = {}
|
||||
|
||||
return mock_state
|
||||
|
||||
|
||||
def test_condense_success(memory_condenser, mock_llm):
|
||||
mock_llm.completion.return_value = {
|
||||
'choices': [{'message': {'content': 'Condensed memory'}}]
|
||||
}
|
||||
result = memory_condenser.condense('Summarize this', mock_llm)
|
||||
assert result == 'Condensed memory'
|
||||
mock_llm.completion.assert_called_once_with(
|
||||
messages=[{'content': 'Summarize this', 'role': 'user'}]
|
||||
def test_noop_condenser_from_config():
|
||||
"""Test that the NoOpCondenser objects can be made from config."""
|
||||
config = NoOpCondenserConfig()
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, NoOpCondenser)
|
||||
|
||||
|
||||
def test_noop_condenser():
|
||||
"""Test that NoOpCondensers preserve their input events."""
|
||||
events = [
|
||||
create_test_event('Event 1'),
|
||||
create_test_event('Event 2'),
|
||||
create_test_event('Event 3'),
|
||||
]
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.history = events
|
||||
|
||||
condenser = NoOpCondenser()
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert result == events
|
||||
|
||||
|
||||
def test_observation_masking_condenser_from_config():
|
||||
"""Test that ObservationMaskingCondenser objects can be made from config."""
|
||||
attention_window = 5
|
||||
config = ObservationMaskingCondenserConfig(attention_window=attention_window)
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, ObservationMaskingCondenser)
|
||||
assert condenser.attention_window == attention_window
|
||||
|
||||
|
||||
def test_observation_masking_condenser_respects_attention_window(mock_state):
|
||||
"""Test that ObservationMaskingCondenser only masks events outside the attention window."""
|
||||
attention_window = 3
|
||||
condenser = ObservationMaskingCondenser(attention_window=attention_window)
|
||||
|
||||
events = [
|
||||
create_test_event('Event 1'),
|
||||
Observation('Observation 1'),
|
||||
create_test_event('Event 3'),
|
||||
create_test_event('Event 4'),
|
||||
Observation('Observation 2'),
|
||||
]
|
||||
|
||||
mock_state.history = events
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == len(events)
|
||||
|
||||
for index, (event, condensed_event) in enumerate(zip(events, result)):
|
||||
# If we're outside the attention window, observations should be masked.
|
||||
if index < len(events) - attention_window:
|
||||
if isinstance(event, Observation):
|
||||
assert '<MASKED>' in str(condensed_event)
|
||||
|
||||
# If we're within the attention window, events are unchanged.
|
||||
else:
|
||||
assert event == condensed_event
|
||||
|
||||
|
||||
def test_recent_events_condenser_from_config():
|
||||
"""Test that RecentEventsCondenser objects can be made from config."""
|
||||
max_events = 5
|
||||
keep_first = True
|
||||
config = RecentEventsCondenserConfig(keep_first=keep_first, max_events=max_events)
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, RecentEventsCondenser)
|
||||
assert condenser.max_events == max_events
|
||||
assert condenser.keep_first == keep_first
|
||||
|
||||
|
||||
def test_recent_events_condenser():
|
||||
"""Test that RecentEventsCondensers keep just the most recent events."""
|
||||
events = [
|
||||
create_test_event('Event 1'),
|
||||
create_test_event('Event 2'),
|
||||
create_test_event('Event 3'),
|
||||
create_test_event('Event 4'),
|
||||
create_test_event('Event 5'),
|
||||
]
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.history = events
|
||||
|
||||
# If the max_events are larger than the number of events, equivalent to a NoOpCondenser.
|
||||
condenser = RecentEventsCondenser(max_events=len(events))
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert result == events
|
||||
|
||||
# If the max_events are smaller than the number of events, only keep the last few.
|
||||
max_events = 2
|
||||
condenser = RecentEventsCondenser(max_events=max_events)
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == max_events
|
||||
assert result[0]._message == 'Event 4'
|
||||
assert result[1]._message == 'Event 5'
|
||||
|
||||
# If the keep_first flag is set, the first event will always be present.
|
||||
keep_first = 1
|
||||
max_events = 2
|
||||
condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events)
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == max_events
|
||||
assert result[0]._message == 'Event 1'
|
||||
assert result[1]._message == 'Event 5'
|
||||
|
||||
# We should be able to keep more of the initial events.
|
||||
keep_first = 2
|
||||
max_events = 3
|
||||
condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events)
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == max_events
|
||||
assert result[0]._message == 'Event 1'
|
||||
assert result[1]._message == 'Event 2'
|
||||
assert result[2]._message == 'Event 5'
|
||||
|
||||
|
||||
def test_llm_condenser_from_config():
|
||||
"""Test that LLMCondensers can be made from config."""
|
||||
config = LLMSummarizingCondenserConfig(
|
||||
llm_config=LLMConfig(
|
||||
model='gpt-4o',
|
||||
api_key='test_key',
|
||||
)
|
||||
)
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, LLMSummarizingCondenser)
|
||||
assert condenser.llm.config.model == 'gpt-4o'
|
||||
assert condenser.llm.config.api_key == 'test_key'
|
||||
|
||||
|
||||
def test_llm_condenser(mock_llm, mock_state):
|
||||
"""Test that LLMCondensers use the LLM to generate a summary event."""
|
||||
events = [
|
||||
create_test_event('Event 1'),
|
||||
create_test_event('Event 2'),
|
||||
]
|
||||
mock_state.history = events
|
||||
|
||||
mock_llm.metrics = MagicMock()
|
||||
mock_llm.metrics.get.return_value = {'test_metric': 1.0}
|
||||
|
||||
mock_llm.set_mock_response_content('Summary of events')
|
||||
|
||||
condenser = LLMSummarizingCondenser(llm=mock_llm)
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].content == 'Summary of events'
|
||||
|
||||
# Verify LLM was called with correct prompt.
|
||||
mock_llm.completion.assert_called_once()
|
||||
call_args = mock_llm.completion.call_args[1]
|
||||
assert 'messages' in call_args
|
||||
assert len(call_args['messages']) == 1
|
||||
assert 'Event 1' in call_args['messages'][0]['content']
|
||||
assert 'Event 2' in call_args['messages'][0]['content']
|
||||
|
||||
# Verify metrics were added to state
|
||||
assert 'condenser_meta' in mock_state.extra_data
|
||||
assert len(mock_state.extra_data['condenser_meta']) == 1
|
||||
assert mock_state.extra_data['condenser_meta'][0]['metrics'] == {'test_metric': 1.0}
|
||||
|
||||
|
||||
def test_llm_condenser_error():
|
||||
"""Test that LLM errors are propagated during condensation."""
|
||||
events = [create_test_event('Event 1', datetime(2024, 1, 1, 10, 0))]
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.history = events
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.completion.side_effect = Exception('LLM error')
|
||||
|
||||
condenser = LLMSummarizingCondenser(llm=mock_llm)
|
||||
|
||||
try:
|
||||
condenser.condensed_history(mock_state)
|
||||
raise AssertionError('Expected exception was not raised.')
|
||||
except Exception as e:
|
||||
assert str(e) == 'LLM error'
|
||||
|
||||
|
||||
def test_amortized_forgetting_condenser_from_config():
|
||||
"""Test that AmortizedForgettingCondenser objects can be made from config."""
|
||||
max_size = 50
|
||||
keep_first = 10
|
||||
config = AmortizedForgettingCondenserConfig(
|
||||
max_size=max_size, keep_first=keep_first
|
||||
)
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, AmortizedForgettingCondenser)
|
||||
assert condenser.max_size == max_size
|
||||
assert condenser.keep_first == keep_first
|
||||
|
||||
|
||||
def test_amortized_forgetting_condenser_invalid_config():
|
||||
"""Test that AmortizedForgettingCondenser raises error when keep_first > max_size."""
|
||||
pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=4, keep_first=2)
|
||||
pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=0)
|
||||
pytest.raises(ValueError, AmortizedForgettingCondenser, keep_first=-1)
|
||||
|
||||
|
||||
def test_amortized_forgetting_condenser_grows_to_max_size():
|
||||
"""Test that AmortizedForgettingCondenser correctly maintains an event context up to max size."""
|
||||
max_size = 15
|
||||
condenser = AmortizedForgettingCondenser(max_size=max_size)
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.extra_data = {}
|
||||
mock_state.history = []
|
||||
|
||||
for i in range(max_size):
|
||||
event = create_test_event(f'Event {i}')
|
||||
mock_state.history.append(event)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
assert len(results) == i + 1
|
||||
|
||||
|
||||
def test_amortized_forgetting_condenser_forgets_when_larger_than_max_size():
|
||||
"""Test that the AmortizedForgettingCondenser forgets events when the context grows too large."""
|
||||
max_size = 2
|
||||
condenser = AmortizedForgettingCondenser(max_size=max_size)
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.extra_data = {}
|
||||
mock_state.history = []
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}')
|
||||
mock_state.history.append(event)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The last event in the results is always the event we just added.
|
||||
assert results[-1] == event
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
|
||||
def test_amortized_forgetting_condenser_keeps_first_events():
|
||||
"""Test that the AmortizedForgettingCondenser keeps the right number of initial events when forgetting."""
|
||||
max_size = 4
|
||||
keep_first = 1
|
||||
condenser = AmortizedForgettingCondenser(max_size=max_size, keep_first=keep_first)
|
||||
|
||||
first_event = create_test_event('Event 0')
|
||||
|
||||
mock_state = MagicMock()
|
||||
mock_state.extra_data = {}
|
||||
mock_state.history = [first_event]
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i+1}', datetime(2024, 1, 1, 10, i + 1))
|
||||
mock_state.history.append(event)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The last event is always the event we just added.
|
||||
assert results[-1] == event
|
||||
|
||||
# The first event is always the first event.
|
||||
assert results[0] == first_event
|
||||
|
||||
# The number of results should bounce back between 2, 3, 4, 2, 3, 4, ...
|
||||
print(len(results))
|
||||
assert len(results) == (i % 3) + 2
|
||||
|
||||
|
||||
def test_llm_attention_condenser_from_config():
|
||||
"""Test that LLMAttentionCondenser objects can be made from config."""
|
||||
config = LLMAttentionCondenserConfig(
|
||||
max_size=50,
|
||||
keep_first=10,
|
||||
llm_config=LLMConfig(
|
||||
model='gpt-4o',
|
||||
api_key='test_key',
|
||||
),
|
||||
)
|
||||
condenser = Condenser.from_config(config)
|
||||
|
||||
assert isinstance(condenser, LLMAttentionCondenser)
|
||||
assert condenser.llm.config.model == 'gpt-4o'
|
||||
assert condenser.llm.config.api_key == 'test_key'
|
||||
assert condenser.max_size == 50
|
||||
assert condenser.keep_first == 10
|
||||
|
||||
|
||||
def test_llm_attention_condenser_invalid_config():
|
||||
"""Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema."""
|
||||
config = LLMAttentionCondenserConfig(
|
||||
max_size=50,
|
||||
keep_first=10,
|
||||
llm_config=LLMConfig(
|
||||
model='claude-2', # Older model that doesn't support response schema
|
||||
api_key='test_key',
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_condense_exception(memory_condenser, mock_llm):
|
||||
mock_llm.completion.side_effect = LLMResponseError('LLM error')
|
||||
with pytest.raises(LLMResponseError, match='LLM error'):
|
||||
memory_condenser.condense('Summarize this', mock_llm)
|
||||
pytest.raises(ValueError, LLMAttentionCondenser.from_config, config)
|
||||
|
||||
|
||||
@patch('openhands.memory.condenser.logger')
|
||||
def test_condense_logs_error(mock_logger, memory_condenser, mock_llm):
|
||||
mock_llm.completion.side_effect = LLMResponseError('LLM error')
|
||||
with pytest.raises(LLMResponseError):
|
||||
memory_condenser.condense('Summarize this', mock_llm)
|
||||
mock_logger.error.assert_called_once_with(
|
||||
'Error condensing thoughts: %s', 'LLM error', exc_info=False
|
||||
)
|
||||
def test_llm_attention_condenser_keeps_first_events(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser keeps the right number of initial events when forgetting."""
|
||||
max_size = 4
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm)
|
||||
|
||||
first_event = create_test_event('Event 0', id=0)
|
||||
mock_state.history.append(first_event)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i+1}', id=i + 1)
|
||||
mock_state.history.append(event)
|
||||
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(
|
||||
ids=[event.id for event in mock_state.history]
|
||||
).model_dump_json()
|
||||
)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The first event is always the first event.
|
||||
assert results[0] == first_event
|
||||
|
||||
|
||||
def test_llm_attention_condenser_grows_to_max_size(mock_llm, mock_state):
|
||||
"""Test that LLMAttentionCondenser correctly maintains an event context up to max size."""
|
||||
max_size = 15
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
|
||||
for i in range(max_size):
|
||||
event = create_test_event(f'Event {i}')
|
||||
mock_state.history.append(event)
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(ids=[event.id for event in mock_state.history])
|
||||
)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
assert len(results) == i + 1
|
||||
|
||||
|
||||
def test_llm_attention_condenser_forgets_when_larger_than_max_size(
|
||||
mock_llm, mock_state
|
||||
):
|
||||
"""Test that the LLMAttentionCondenser forgets events when the context grows too large."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
mock_state.history.append(event)
|
||||
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(
|
||||
ids=[event.id for event in mock_state.history]
|
||||
).model_dump_json()
|
||||
)
|
||||
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
|
||||
def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles event IDs that aren't from the event history."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
mock_state.history.append(event)
|
||||
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(
|
||||
ids=[event.id for event in mock_state.history] + [-1, -2, -3, -4]
|
||||
).model_dump_json()
|
||||
)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
|
||||
def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles when the response contains too many event IDs."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
mock_state.history.append(event)
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(
|
||||
ids=[event.id for event in mock_state.history]
|
||||
+ [event.id for event in mock_state.history]
|
||||
).model_dump_json()
|
||||
)
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
|
||||
def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles when the response contains too few event IDs."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
mock_state.history.append(event)
|
||||
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(ids=[]).model_dump_json()
|
||||
)
|
||||
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
@ -13,6 +13,9 @@ from openhands.core.config import (
|
||||
load_from_env,
|
||||
load_from_toml,
|
||||
)
|
||||
from openhands.core.config.condenser_config import (
|
||||
NoOpCondenserConfig,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger
|
||||
|
||||
|
||||
@ -618,6 +621,13 @@ def test_cache_dir_creation(default_config, tmpdir):
|
||||
assert os.path.exists(default_config.cache_dir)
|
||||
|
||||
|
||||
def test_agent_config_condenser_default():
|
||||
"""Test that default agent condenser is NoOpCondenser."""
|
||||
config = AppConfig()
|
||||
agent_config = config.get_agent_config()
|
||||
assert isinstance(agent_config.condenser, NoOpCondenserConfig)
|
||||
|
||||
|
||||
def test_api_keys_repr_str():
|
||||
# Test LLMConfig
|
||||
llm_config = LLMConfig(
|
||||
|
||||
@ -75,7 +75,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(
|
||||
Mock(history=history, max_iterations=5, iteration=0)
|
||||
Mock(history=history, max_iterations=5, iteration=0, extra_data={})
|
||||
)
|
||||
|
||||
assert (
|
||||
@ -111,7 +111,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(
|
||||
Mock(history=history, max_iterations=10, iteration=5)
|
||||
Mock(history=history, max_iterations=10, iteration=5, extra_data={})
|
||||
)
|
||||
|
||||
# Check that only the last two user messages have cache_prompt=True
|
||||
@ -144,6 +144,7 @@ def test_prompt_caching_headers(codeact_agent: CodeActAgent):
|
||||
mock_state.history = history
|
||||
mock_state.max_iterations = 5
|
||||
mock_state.iteration = 0
|
||||
mock_state.extra_data = {}
|
||||
|
||||
codeact_agent.reset()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user