Add enable_history_truncation option to disable history truncation (#6820)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-12-26 05:48:36 +08:00 · 2025-02-20 22:43:02 -08:00 · 2025-02-20 22:43:02 -08:00 · d33913e036
commit d33913e036
parent e52aee168e
6 changed files with 110 additions and 18 deletions
--- a/config.template.toml
+++ b/config.template.toml
@ -234,6 +234,10 @@ codeact_enable_jupyter = true
 # List of microagents to disable
 #disabled_microagents = []

+# Whether history should be truncated to continue the session when hitting LLM context
+# length limit
+enable_history_truncation = true
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
--- a/docs/modules/usage/configuration-options.md
+++ b/docs/modules/usage/configuration-options.md
@ -340,6 +340,11 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
  - Default: `false`
  - Description: Whether Jupyter is enabled in the action space

+- `enable_history_truncation`
+  - Type: `bool`
+  - Default: `true`
+  - Description: Whether history should be truncated to continue the session when hitting LLM context length limit
+
 ### Microagent Usage
 - `enable_prompt_extensions`
  - Type: `bool`
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -21,6 +21,7 @@ from openhands.core.exceptions import (
    AgentStuckInLoopError,
    FunctionCallNotExistsError,
    FunctionCallValidationError,
+    LLMContextWindowExceedError,
    LLMMalformedActionError,
    LLMNoActionError,
    LLMResponseError,
@ -251,6 +252,7 @@ class AgentController:
                isinstance(e, litellm.AuthenticationError)
                or isinstance(e, litellm.BadRequestError)
                or isinstance(e, RateLimitError)
+                or isinstance(e, LLMContextWindowExceedError)
            ):
                reported = e
            await self._react_to_exception(reported)
@ -698,24 +700,28 @@ class AgentController:
                    or 'prompt is too long' in error_str
                    or isinstance(e, ContextWindowExceededError)
                ):
-                    # When context window is exceeded, keep roughly half of agent interactions
-                    self.state.history = self._apply_conversation_window(
-                        self.state.history
-                    )
+                    if self.agent.config.enable_history_truncation:
+                        # When context window is exceeded, keep roughly half of agent interactions
+                        self.state.history = self._apply_conversation_window(
+                            self.state.history
+                        )

-                    # Save the ID of the first event in our truncated history for future reloading
-                    if self.state.history:
-                        self.state.start_id = self.state.history[0].id
+                        # Save the ID of the first event in our truncated history for future reloading
+                        if self.state.history:
+                            self.state.start_id = self.state.history[0].id

-                    # Add an error event to trigger another step by the agent
-                    self.event_stream.add_event(
-                        AgentCondensationObservation(
-                            content='Trimming prompt to meet context window limitations'
-                        ),
-                        EventSource.AGENT,
-                    )
-                    return
-                raise e
+                        # Add an error event to trigger another step by the agent
+                        self.event_stream.add_event(
+                            AgentCondensationObservation(
+                                content='Trimming prompt to meet context window limitations'
+                            ),
+                            EventSource.AGENT,
+                        )
+                        return
+                    else:
+                        raise LLMContextWindowExceedError()
+                else:
+                    raise e

        if action.runnable:
            if self.state.confirmation_mode and (
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@ -18,6 +18,7 @@ class AgentConfig(BaseModel):
        enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
        disabled_microagents: A list of microagents to disable. Default is None.
        condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
+        enable_history_truncation: If history should be truncated once LLM context limit is hit.
    """

    codeact_enable_browsing: bool = Field(default=True)
@ -31,3 +32,4 @@ class AgentConfig(BaseModel):
    enable_prompt_extensions: bool = Field(default=True)
    disabled_microagents: list[str] | None = Field(default=None)
    condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
+    enable_history_truncation: bool = Field(default=True)
--- a/openhands/core/exceptions.py
+++ b/openhands/core/exceptions.py
@ -98,6 +98,14 @@ class OperationCancelled(Exception):
        super().__init__(message)


+class LLMContextWindowExceedError(RuntimeError):
+    def __init__(
+        self,
+        message='Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error',
+    ):
+        super().__init__(message)
+
+
 # ============================================
 # LLM function calling Exceptions
 # ============================================
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@ -9,6 +9,7 @@ from openhands.controller.agent import Agent
 from openhands.controller.agent_controller import AgentController
 from openhands.controller.state.state import State, TrafficControlState
 from openhands.core.config import AppConfig
+from openhands.core.config.agent_config import AgentConfig
 from openhands.core.main import run_controller
 from openhands.core.schema import AgentState
 from openhands.events import Event, EventSource, EventStream, EventStreamSubscriber
@ -605,6 +606,7 @@ async def test_context_window_exceeded_error_handling(mock_agent, mock_event_str

    state = StepState()
    mock_agent.step = state.step
+    mock_agent.config = AgentConfig()

    controller = AgentController(
        agent=mock_agent,
@ -627,8 +629,10 @@ async def test_context_window_exceeded_error_handling(mock_agent, mock_event_str


@pytest.mark.asyncio
-async def test_run_controller_with_context_window_exceeded(mock_agent, mock_runtime):
-    """Tests that the controller can make progress after handling context window exceeded errors."""
+async def test_run_controller_with_context_window_exceeded_with_truncation(
+    mock_agent, mock_runtime
+):
+    """Tests that the controller can make progress after handling context window exceeded errors, as long as enable_history_truncation is ON"""

    class StepState:
        def __init__(self):
@ -650,6 +654,7 @@ async def test_run_controller_with_context_window_exceeded(mock_agent, mock_runt

    step_state = StepState()
    mock_agent.step = step_state.step
+    mock_agent.config = AgentConfig()

    try:
        state = await asyncio.wait_for(
@ -682,3 +687,65 @@ async def test_run_controller_with_context_window_exceeded(mock_agent, mock_runt

    # Check that the context window exceeded error was raised during the run
    assert step_state.has_errored
+
+
+@pytest.mark.asyncio
+async def test_run_controller_with_context_window_exceeded_without_truncation(
+    mock_agent, mock_runtime
+):
+    """Tests that the controller would quit upon context window exceeded errors without enable_history_truncation ON."""
+
+    class StepState:
+        def __init__(self):
+            self.has_errored = False
+
+        def step(self, state: State):
+            # If the state has more than one message and we haven't errored yet,
+            # throw the context window exceeded error
+            if len(state.history) > 1 and not self.has_errored:
+                error = ContextWindowExceededError(
+                    message='prompt is too long: 233885 tokens > 200000 maximum',
+                    model='',
+                    llm_provider='',
+                )
+                self.has_errored = True
+                raise error
+
+            return MessageAction(content=f'STEP {len(state.history)}')
+
+    step_state = StepState()
+    mock_agent.step = step_state.step
+    mock_agent.config = AgentConfig()
+    mock_agent.config.enable_history_truncation = False
+
+    try:
+        state = await asyncio.wait_for(
+            run_controller(
+                config=AppConfig(max_iterations=3),
+                initial_user_action=MessageAction(content='INITIAL'),
+                runtime=mock_runtime,
+                sid='test',
+                agent=mock_agent,
+                fake_user_response_fn=lambda _: 'repeat',
+            ),
+            timeout=10,
+        )
+
+    # A timeout error indicates the run_controller entrypoint is not making
+    # progress
+    except asyncio.TimeoutError as e:
+        raise AssertionError(
+            'The run_controller function did not complete in time.'
+        ) from e
+
+    # Hitting the iteration limit indicates the controller is failing for the
+    # expected reason
+    assert state.iteration == 2
+    assert state.agent_state == AgentState.ERROR
+    assert (
+        state.last_error
+        == 'LLMContextWindowExceedError: Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error'
+    )
+
+    # Check that the context window exceeded error was raised during the run
+    assert step_state.has_errored