From 843cc00e79e1b7cb2d926e17feeb25a26d89bebf Mon Sep 17 00:00:00 2001
From: Graham Neubig <neubig@gmail.com>
Date: Wed, 8 Oct 2025 11:26:45 -0400
Subject: [PATCH] feat(logging): JSON log config for Uvicorn when LOG_JSON=1
 (#11264)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/controller/agent_controller.py     | 18 +++--
 openhands/core/logger.py                     | 79 +++++++++++++++++++-
 openhands/events/observation/commands.py     |  5 +-
 openhands/runtime/action_execution_server.py | 22 +++---
 openhands/runtime/utils/bash.py              |  9 +--
 openhands/runtime/utils/windows_bash.py      | 19 ++---
 openhands/server/__main__.py                 | 10 +++
 7 files changed, 123 insertions(+), 39 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index d54238f3b9..1106a28a49 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -4,7 +4,6 @@ import asyncio
 import copy
 import os
 import time
-import traceback
 from typing import TYPE_CHECKING, Callable
 
 if TYPE_CHECKING:
@@ -285,19 +284,28 @@ class AgentController:
             )
         self._closed = True
 
-    def log(self, level: str, message: str, extra: dict | None = None) -> None:
+    def log(
+        self,
+        level: str,
+        message: str,
+        extra: dict | None = None,
+        exc_info: bool = False,
+    ) -> None:
         """Logs a message to the agent controller's logger.
 
         Args:
             level (str): The logging level to use (e.g., 'info', 'debug', 'error').
             message (str): The message to log.
             extra (dict | None, optional): Additional fields to log. Includes session_id by default.
+            exc_info (bool, optional): Whether to include exception info. Defaults to False.
         """
         message = f'[Agent Controller {self.id}] {message}'
         if extra is None:
             extra = {}
         extra_merged = {'session_id': self.id, **extra}
-        getattr(logger, level)(message, extra=extra_merged, stacklevel=2)
+        getattr(logger, level)(
+            message, extra=extra_merged, exc_info=exc_info, stacklevel=2
+        )
 
     async def _react_to_exception(
         self,
@@ -364,8 +372,8 @@ class AgentController:
         except Exception as e:
             self.log(
                 'error',
-                f'Error while running the agent (session ID: {self.id}): {e}. '
-                f'Traceback: {traceback.format_exc()}',
+                f'Error while running the agent (session ID: {self.id}): {e}',
+                exc_info=True,
             )
             reported = RuntimeError(
                 f'There was an unexpected error while running the agent: {e.__class__.__name__}. You can refresh the page or ask the agent to try again.'
diff --git a/openhands/core/logger.py b/openhands/core/logger.py
index 9c28d79641..682246c1e9 100644
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@@ -345,7 +345,7 @@ logging.basicConfig(level=logging.ERROR)
 def log_uncaught_exceptions(
     ex_cls: type[BaseException], ex: BaseException, tb: TracebackType | None
 ) -> Any:
-    """Logs uncaught exceptions along with the traceback.
+    """Logs uncaught exceptions in structured form when JSON logging is enabled.
 
     Args:
         ex_cls: The type of the exception.
@@ -355,9 +355,9 @@ def log_uncaught_exceptions(
     Returns:
         None
     """
-    if tb:  # Add check since tb can be None
-        logging.error(''.join(traceback.format_tb(tb)))
-    logging.error('{0}: {1}'.format(ex_cls, ex))
+    # Route uncaught exceptions through our logger with proper exc_info
+    # Avoid manual formatting which creates multi-line plain-text log entries
+    openhands_logger.error('Uncaught exception', exc_info=(ex_cls, ex, tb))
 
 
 sys.excepthook = log_uncaught_exceptions
@@ -514,3 +514,74 @@ class OpenHandsLoggerAdapter(logging.LoggerAdapter):
         else:
             kwargs['extra'] = self.extra
         return msg, kwargs
+
+
+def get_uvicorn_json_log_config() -> dict:
+    """Returns a uvicorn log config dict for JSON structured logging.
+
+    This configuration ensures Uvicorn's error and access logs are emitted
+    as single-line JSON when LOG_JSON=1, avoiding multi-line plain-text
+    tracebacks in log aggregators like Datadog.
+
+    Returns:
+        A dict suitable for passing to uvicorn.run(..., log_config=...).
+    """
+    return {
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            # Uvicorn mutates 'default' and 'access' to set use_colors;
+            # keep them present using Uvicorn formatters
+            'default': {
+                '()': 'uvicorn.logging.DefaultFormatter',
+                'fmt': '%(levelprefix)s %(message)s',
+                'use_colors': None,
+            },
+            'access': {
+                '()': 'uvicorn.logging.AccessFormatter',
+                'fmt': '%(levelprefix)s %(client_addr)s - "%(request_line)s" %(status_code)s',
+                'use_colors': None,
+            },
+            # Actual JSON formatters used by handlers below
+            'json': {
+                '()': 'pythonjsonlogger.jsonlogger.JsonFormatter',
+                'fmt': '%(message)s %(levelname)s %(name)s %(asctime)s %(exc_info)s',
+            },
+            'json_access': {
+                '()': 'pythonjsonlogger.jsonlogger.JsonFormatter',
+                'fmt': '%(message)s %(levelname)s %(name)s %(asctime)s %(client_addr)s %(request_line)s %(status_code)s',
+            },
+        },
+        'handlers': {
+            'default': {
+                'class': 'logging.StreamHandler',
+                'level': 'INFO',
+                'formatter': 'json',
+                'stream': 'ext://sys.stdout',
+            },
+            'access': {
+                'class': 'logging.StreamHandler',
+                'level': 'INFO',
+                'formatter': 'json_access',
+                'stream': 'ext://sys.stdout',
+            },
+        },
+        'loggers': {
+            'uvicorn': {
+                'handlers': ['default'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+            'uvicorn.error': {
+                'handlers': ['default'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+            'uvicorn.access': {
+                'handlers': ['access'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+        },
+        'root': {'level': 'INFO', 'handlers': ['default']},
+    }
diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py
index 7c532fbe0e..da49ef641a 100644
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@@ -1,6 +1,5 @@
 import json
 import re
-import traceback
 from dataclasses import dataclass, field
 from typing import Any, Self
 
@@ -65,8 +64,8 @@ class CmdOutputMetadata(BaseModel):
                 matches.append(match)
             except json.JSONDecodeError:
                 logger.warning(
-                    f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.'
-                    + traceback.format_exc()
+                    f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.',
+                    exc_info=True,
                 )
                 continue  # Skip if not valid JSON
         return matches
diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py
index 757c97e524..896b25ffe9 100644
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -14,7 +14,6 @@ import shutil
 import sys
 import tempfile
 import time
-import traceback
 from contextlib import asynccontextmanager
 from pathlib import Path
 from zipfile import ZipFile
@@ -36,6 +35,7 @@ from uvicorn import run
 
 from openhands.core.config.mcp_config import MCPStdioServerConfig
 from openhands.core.exceptions import BrowserUnavailableException
+from openhands.core.logger import get_uvicorn_json_log_config
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
     Action,
@@ -241,7 +241,7 @@ class ActionExecutor:
             self.browser = BrowserEnv(self.browsergym_eval_env)
             logger.debug('Browser initialized asynchronously')
         except Exception as e:
-            logger.error(f'Failed to initialize browser: {e}')
+            logger.exception(f'Failed to initialize browser: {e}')
             self.browser = None
 
     async def _ensure_browser_ready(self):
@@ -393,7 +393,7 @@ class ActionExecutor:
             obs = await call_sync_from_async(bash_session.execute, action)
             return obs
         except Exception as e:
-            logger.error(f'Error running command: {e}')
+            logger.exception(f'Error running command: {e}')
             return ErrorObservation(str(e))
 
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
@@ -768,14 +768,14 @@ if __name__ == '__main__':
 
     @app.exception_handler(StarletteHTTPException)
     async def http_exception_handler(request: Request, exc: StarletteHTTPException):
-        logger.error(f'HTTP exception occurred: {exc.detail}')
+        logger.exception(f'HTTP exception occurred: {exc.detail}')
         return JSONResponse(status_code=exc.status_code, content={'detail': exc.detail})
 
     @app.exception_handler(RequestValidationError)
     async def validation_exception_handler(
         request: Request, exc: RequestValidationError
     ):
-        logger.error(f'Validation error occurred: {exc}')
+        logger.exception(f'Validation error occurred: {exc}')
         return JSONResponse(
             status_code=422,
             content={
@@ -822,10 +822,10 @@ if __name__ == '__main__':
             observation = await client.run_action(action)
             return event_to_dict(observation)
         except Exception as e:
-            logger.error(f'Error while running /execute_action: {str(e)}')
+            logger.exception(f'Error while running /execute_action: {str(e)}')
             raise HTTPException(
                 status_code=500,
-                detail=traceback.format_exc(),
+                detail=f'Internal server error: {str(e)}',
             )
         finally:
             update_last_execution_time()
@@ -1067,8 +1067,12 @@ if __name__ == '__main__':
             return JSONResponse(content=sorted_entries)
 
         except Exception as e:
-            logger.error(f'Error listing files: {e}')
+            logger.exception(f'Error listing files: {e}')
             return JSONResponse(content=[])
 
     logger.debug(f'Starting action execution API on port {args.port}')
-    run(app, host='0.0.0.0', port=args.port)
+    # When LOG_JSON=1, provide a JSON log config to Uvicorn so error/access logs are structured
+    log_config = None
+    if os.getenv('LOG_JSON', '0') in ('1', 'true', 'True'):
+        log_config = get_uvicorn_json_log_config()
+    run(app, host='0.0.0.0', port=args.port, log_config=log_config, use_colors=False)
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index 8d929d948f..f7078352a9 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -1,7 +1,6 @@
 import os
 import re
 import time
-import traceback
 import uuid
 from enum import Enum
 from typing import Any
@@ -46,8 +45,8 @@ def split_bash_commands(commands: str) -> list[str]:
         logger.debug(
             f'Failed to parse bash commands\n'
             f'[input]: {commands}\n'
-            f'[warning]: {traceback.format_exc()}\n'
-            f'The original command will be returned as is.'
+            f'The original command will be returned as is.',
+            exc_info=True,
         )
         # If parsing fails, return the original commands
         return [commands]
@@ -165,8 +164,8 @@ def escape_bash_special_chars(command: str) -> str:
         logger.debug(
             f'Failed to parse bash commands for special characters escape\n'
             f'[input]: {command}\n'
-            f'[warning]: {traceback.format_exc()}\n'
-            f'The original command will be returned as is.'
+            f'The original command will be returned as is.',
+            exc_info=True,
         )
         return command
 
diff --git a/openhands/runtime/utils/windows_bash.py b/openhands/runtime/utils/windows_bash.py
index 7c787789ff..6ecaaa2da6 100644
--- a/openhands/runtime/utils/windows_bash.py
+++ b/openhands/runtime/utils/windows_bash.py
@@ -6,7 +6,6 @@ way to manage PowerShell processes compared to using temporary script files.
 
 import os
 import time
-import traceback
 from pathlib import Path
 from threading import RLock
 
@@ -155,8 +154,7 @@ class WindowsPowershellSession:
             self._initialized = True  # Set to True only on successful initialization
             logger.info(f'PowerShell runspace created. Initial CWD set to: {self._cwd}')
         except Exception as e:
-            logger.error(f'Failed to create or open PowerShell runspace: {e}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Failed to create or open PowerShell runspace: {e}')
             self.close()  # Ensure cleanup if init fails partially
             raise RuntimeError(f'Failed to initialize PowerShell runspace: {e}')
 
@@ -177,8 +175,7 @@ class WindowsPowershellSession:
                 # Optional: Confirm CWD even on success for robustness
                 # self._confirm_cwd()
         except Exception as e:
-            logger.error(f'Exception setting initial CWD: {e}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception setting initial CWD: {e}')
             # Attempt to confirm CWD even if setting threw an exception
             self._confirm_cwd()
         finally:
@@ -945,8 +942,7 @@ class WindowsPowershellSession:
                 )
 
         except Exception as parse_ex:
-            logger.error(f'Exception during PowerShell command parsing: {parse_ex}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception during PowerShell command parsing: {parse_ex}')
             return ErrorObservation(
                 content=f'ERROR: An exception occurred while parsing the command: {parse_ex}'
             )
@@ -1118,10 +1114,9 @@ class WindowsPowershellSession:
                         with self._job_lock:
                             self.active_job = None
                 except AttributeError as e:
-                    logger.error(
+                    logger.exception(
                         f'Get-Job returned an object without expected properties on BaseObject: {e}'
                     )
-                    logger.error(traceback.format_exc())
                     all_errors.append('Get-Job did not return a valid Job object.')
                     job_start_failed = True
 
@@ -1131,8 +1126,7 @@ class WindowsPowershellSession:
                 job_start_failed = True
 
         except Exception as start_ex:
-            logger.error(f'Exception during job start/retrieval: {start_ex}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception during job start/retrieval: {start_ex}')
             all_errors.append(f'[Job Start/Get Exception: {start_ex}]')
             job_start_failed = True
         finally:
@@ -1403,8 +1397,7 @@ class WindowsPowershellSession:
                 self.runspace.Dispose()
                 logger.info('PowerShell runspace closed and disposed.')
             except Exception as e:
-                logger.error(f'Error closing/disposing PowerShell runspace: {e}')
-                logger.error(traceback.format_exc())
+                logger.exception(f'Error closing/disposing PowerShell runspace: {e}')
 
         self.runspace = None
         self._initialized = False
diff --git a/openhands/server/__main__.py b/openhands/server/__main__.py
index 2f57570992..adf4ad0dbc 100644
--- a/openhands/server/__main__.py
+++ b/openhands/server/__main__.py
@@ -3,16 +3,26 @@ import warnings
 
 import uvicorn
 
+from openhands.core.logger import get_uvicorn_json_log_config
+
 
 def main():
     # Suppress SyntaxWarnings from pydub.utils about invalid escape sequences
     warnings.filterwarnings('ignore', category=SyntaxWarning, module=r'pydub\.utils')
 
+    # When LOG_JSON=1, configure Uvicorn to emit JSON logs for error/access
+    log_config = None
+    if os.getenv('LOG_JSON', '0') in ('1', 'true', 'True'):
+        log_config = get_uvicorn_json_log_config()
+
     uvicorn.run(
         'openhands.server.listen:app',
         host='0.0.0.0',
         port=int(os.environ.get('port') or '3000'),
         log_level='debug' if os.environ.get('DEBUG') else 'info',
+        log_config=log_config,
+        # If LOG_JSON enabled, force colors off; otherwise let uvicorn default
+        use_colors=False if log_config else None,
     )