Fix conversation ID validation to return 400 instead of 500 for long IDs (#10496)

2026-03-22 13:47:19 +08:00 · 2025-08-19 18:03:05 +04:00
parent 476954f3a4
commit 0297b3da18
3 changed files with 171 additions and 8 deletions
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@@ -63,7 +63,7 @@ from openhands.server.user_auth import (
 )
 from openhands.server.user_auth.user_auth import AuthType
 from openhands.server.utils import get_conversation as get_conversation_metadata
-from openhands.server.utils import get_conversation_store
+from openhands.server.utils import get_conversation_store, validate_conversation_id
 from openhands.storage.conversation.conversation_store import ConversationStore
 from openhands.storage.data_models.conversation_metadata import (
    ConversationMetadata,
@@ -297,7 +297,7 @@ async def search_conversations(

@app.get('/conversations/{conversation_id}')
 async def get_conversation(
-    conversation_id: str,
+    conversation_id: str = Depends(validate_conversation_id),
    conversation_store: ConversationStore = Depends(get_conversation_store),
 ) -> ConversationInfo | None:
    try:
@@ -319,7 +319,7 @@ async def get_conversation(

@app.delete('/conversations/{conversation_id}')
 async def delete_conversation(
-    conversation_id: str,
+    conversation_id: str = Depends(validate_conversation_id),
    user_id: str | None = Depends(get_user_id),
 ) -> bool:
    conversation_store = await ConversationStoreImpl.get_instance(config, user_id)
@@ -338,8 +338,8 @@ async def delete_conversation(

@app.get('/conversations/{conversation_id}/remember-prompt')
 async def get_prompt(
-    conversation_id: str,
    event_id: int,
+    conversation_id: str = Depends(validate_conversation_id),
    user_settings: SettingsStore = Depends(get_user_settings_store),
    metadata: ConversationMetadata = Depends(get_conversation_metadata),
 ):
@@ -440,8 +440,8 @@ async def _get_conversation_info(

@app.post('/conversations/{conversation_id}/start')
 async def start_conversation(
-    conversation_id: str,
    providers_set: ProvidersSetModel,
+    conversation_id: str = Depends(validate_conversation_id),
    user_id: str = Depends(get_user_id),
    settings: Settings = Depends(get_user_settings),
    conversation_store: ConversationStore = Depends(get_conversation_store),
@@ -501,7 +501,7 @@ async def start_conversation(

@app.post('/conversations/{conversation_id}/stop')
 async def stop_conversation(
-    conversation_id: str,
+    conversation_id: str = Depends(validate_conversation_id),
    user_id: str = Depends(get_user_id),
 ) -> ConversationResponse:
    """Stop an agent loop for a conversation.
@@ -606,8 +606,8 @@ class UpdateConversationRequest(BaseModel):

@app.patch('/conversations/{conversation_id}')
 async def update_conversation(
-    conversation_id: str,
    data: UpdateConversationRequest,
+    conversation_id: str = Depends(validate_conversation_id),
    user_id: str | None = Depends(get_user_id),
    conversation_store: ConversationStore = Depends(get_conversation_store),
 ) -> bool:
@@ -714,7 +714,8 @@ async def update_conversation(

@app.post('/conversations/{conversation_id}/exp-config')
 def add_experiment_config_for_conversation(
-    conversation_id: str, exp_config: ExperimentConfig
+    exp_config: ExperimentConfig,
+    conversation_id: str = Depends(validate_conversation_id),
 ) -> bool:
    exp_config_filepath = get_experiment_config_filename(conversation_id)
    exists = False
--- a/openhands/server/utils.py
+++ b/openhands/server/utils.py
@@ -13,6 +13,50 @@ from openhands.storage.conversation.conversation_store import ConversationStore
 from openhands.storage.data_models.conversation_metadata import ConversationMetadata


+def validate_conversation_id(conversation_id: str) -> str:
+    """
+    Validate conversation ID format and length.
+
+    Args:
+        conversation_id: The conversation ID to validate
+
+    Returns:
+        The validated conversation ID
+
+    Raises:
+        HTTPException: If the conversation ID is invalid
+    """
+    # Check length - UUID hex is 32 characters, allow some flexibility but not excessive
+    if len(conversation_id) > 100:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail='Conversation ID is too long',
+        )
+
+    # Check for null bytes and other problematic characters
+    if '\x00' in conversation_id:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail='Conversation ID contains invalid characters',
+        )
+
+    # Check for path traversal attempts
+    if '..' in conversation_id or '/' in conversation_id or '\\' in conversation_id:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail='Conversation ID contains invalid path characters',
+        )
+
+    # Check for control characters and newlines
+    if any(ord(c) < 32 for c in conversation_id):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail='Conversation ID contains control characters',
+        )
+
+    return conversation_id
+
+
 async def get_conversation_store(request: Request) -> ConversationStore | None:
    conversation_store: ConversationStore | None = getattr(
        request.state, 'conversation_store', None
--- a/tests/unit/server/routes/test_conversation_id_validation.py
+++ b/tests/unit/server/routes/test_conversation_id_validation.py
@@ -0,0 +1,118 @@
+"""
+Test cases for conversation ID validation to ensure proper error handling.
+
+This addresses GitHub issue #10489 where long conversation IDs were returning
+500 Internal Server Error instead of proper 4xx errors.
+"""
+
+import pytest
+from fastapi import HTTPException, status
+
+from openhands.server.utils import validate_conversation_id
+
+
+class TestConversationIdValidation:
+    """Test conversation ID validation function."""
+
+    def test_valid_conversation_id(self):
+        """Test that valid conversation IDs pass validation."""
+        # Test normal UUID hex format (32 characters)
+        valid_id = 'a1b2c3d4e5f6789012345678901234ab'
+        result = validate_conversation_id(valid_id)
+        assert result == valid_id
+
+        # Test shorter valid ID
+        valid_id = 'abc123'
+        result = validate_conversation_id(valid_id)
+        assert result == valid_id
+
+        # Test alphanumeric with hyphens (UUID format)
+        valid_id = 'a1b2c3d4-e5f6-7890-1234-5678901234ab'
+        result = validate_conversation_id(valid_id)
+        assert result == valid_id
+
+    def test_long_conversation_id_rejected(self):
+        """Test that very long conversation IDs are rejected with 400 Bad Request.
+
+        This is the main test case for GitHub issue #10489.
+        """
+        # Test with 1000 character ID (similar to the reported issue)
+        long_id = 'a' * 1000
+
+        with pytest.raises(HTTPException) as exc_info:
+            validate_conversation_id(long_id)
+
+        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+        assert 'too long' in exc_info.value.detail
+
+    def test_conversation_id_with_null_bytes_rejected(self):
+        """Test that conversation IDs with null bytes are rejected."""
+        invalid_id = 'valid\x00id'
+
+        with pytest.raises(HTTPException) as exc_info:
+            validate_conversation_id(invalid_id)
+
+        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+        assert 'invalid characters' in exc_info.value.detail
+
+    def test_conversation_id_with_path_traversal_rejected(self):
+        """Test that conversation IDs with path traversal attempts are rejected."""
+        invalid_ids = [
+            '../../../etc/passwd',
+            'id/../other',
+            'id\\..\\other',
+            'id/with/slashes',
+            'id\\with\\backslashes',
+        ]
+
+        for invalid_id in invalid_ids:
+            with pytest.raises(HTTPException) as exc_info:
+                validate_conversation_id(invalid_id)
+
+            assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+            assert 'invalid path characters' in exc_info.value.detail
+
+    def test_conversation_id_with_control_characters_rejected(self):
+        """Test that conversation IDs with control characters are rejected."""
+        invalid_ids = [
+            'id\nwith\nnewlines',
+            'id\twith\ttabs',
+            'id\rwith\rcarriage',
+            'id\x01with\x02control',
+        ]
+
+        for invalid_id in invalid_ids:
+            with pytest.raises(HTTPException) as exc_info:
+                validate_conversation_id(invalid_id)
+
+            assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+            assert 'control characters' in exc_info.value.detail
+
+    def test_conversation_id_boundary_length(self):
+        """Test conversation ID length boundaries."""
+        # Test exactly 100 characters (should pass)
+        boundary_id = 'a' * 100
+        result = validate_conversation_id(boundary_id)
+        assert result == boundary_id
+
+        # Test 101 characters (should fail)
+        too_long_id = 'a' * 101
+        with pytest.raises(HTTPException) as exc_info:
+            validate_conversation_id(too_long_id)
+
+        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+        assert 'too long' in exc_info.value.detail
+
+    def test_empty_conversation_id(self):
+        """Test that empty conversation ID is handled."""
+        # Empty string should pass validation (will fail later in business logic)
+        result = validate_conversation_id('')
+        assert result == ''
+
+    def test_conversation_id_with_spaces(self):
+        """Test that conversation IDs with spaces are allowed."""
+        # Spaces are printable characters (ASCII 32), so they should be allowed
+        # The business logic might reject them, but validation should pass
+        id_with_spaces = 'id with spaces'
+        result = validate_conversation_id(id_with_spaces)
+        assert result == id_with_spaces