feat(logging): JSON log config for Uvicorn when LOG_JSON=1 (#11264)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-10-08 11:26:45 -04:00 · 2025-10-08 11:26:45 -04:00 · 843cc00e79
commit 843cc00e79
parent f3c6fd2122
7 changed files with 123 additions and 39 deletions
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -4,7 +4,6 @@ import asyncio
 import copy
 import os
 import time
-import traceback
 from typing import TYPE_CHECKING, Callable

 if TYPE_CHECKING:
@ -285,19 +284,28 @@ class AgentController:
            )
        self._closed = True

-    def log(self, level: str, message: str, extra: dict | None = None) -> None:
+    def log(
+        self,
+        level: str,
+        message: str,
+        extra: dict | None = None,
+        exc_info: bool = False,
+    ) -> None:
        """Logs a message to the agent controller's logger.

        Args:
            level (str): The logging level to use (e.g., 'info', 'debug', 'error').
            message (str): The message to log.
            extra (dict | None, optional): Additional fields to log. Includes session_id by default.
+            exc_info (bool, optional): Whether to include exception info. Defaults to False.
        """
        message = f'[Agent Controller {self.id}] {message}'
        if extra is None:
            extra = {}
        extra_merged = {'session_id': self.id, **extra}
-        getattr(logger, level)(message, extra=extra_merged, stacklevel=2)
+        getattr(logger, level)(
+            message, extra=extra_merged, exc_info=exc_info, stacklevel=2
+        )

    async def _react_to_exception(
        self,
@ -364,8 +372,8 @@ class AgentController:
        except Exception as e:
            self.log(
                'error',
-                f'Error while running the agent (session ID: {self.id}): {e}. '
-                f'Traceback: {traceback.format_exc()}',
+                f'Error while running the agent (session ID: {self.id}): {e}',
+                exc_info=True,
            )
            reported = RuntimeError(
                f'There was an unexpected error while running the agent: {e.__class__.__name__}. You can refresh the page or ask the agent to try again.'
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@ -345,7 +345,7 @@ logging.basicConfig(level=logging.ERROR)
 def log_uncaught_exceptions(
    ex_cls: type[BaseException], ex: BaseException, tb: TracebackType | None
 ) -> Any:
-    """Logs uncaught exceptions along with the traceback.
+    """Logs uncaught exceptions in structured form when JSON logging is enabled.

    Args:
        ex_cls: The type of the exception.
@ -355,9 +355,9 @@ def log_uncaught_exceptions(
    Returns:
        None
    """
-    if tb:  # Add check since tb can be None
-        logging.error(''.join(traceback.format_tb(tb)))
-    logging.error('{0}: {1}'.format(ex_cls, ex))
+    # Route uncaught exceptions through our logger with proper exc_info
+    # Avoid manual formatting which creates multi-line plain-text log entries
+    openhands_logger.error('Uncaught exception', exc_info=(ex_cls, ex, tb))


 sys.excepthook = log_uncaught_exceptions
@ -514,3 +514,74 @@ class OpenHandsLoggerAdapter(logging.LoggerAdapter):
        else:
            kwargs['extra'] = self.extra
        return msg, kwargs
+
+
+def get_uvicorn_json_log_config() -> dict:
+    """Returns a uvicorn log config dict for JSON structured logging.
+
+    This configuration ensures Uvicorn's error and access logs are emitted
+    as single-line JSON when LOG_JSON=1, avoiding multi-line plain-text
+    tracebacks in log aggregators like Datadog.
+
+    Returns:
+        A dict suitable for passing to uvicorn.run(..., log_config=...).
+    """
+    return {
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            # Uvicorn mutates 'default' and 'access' to set use_colors;
+            # keep them present using Uvicorn formatters
+            'default': {
+                '()': 'uvicorn.logging.DefaultFormatter',
+                'fmt': '%(levelprefix)s %(message)s',
+                'use_colors': None,
+            },
+            'access': {
+                '()': 'uvicorn.logging.AccessFormatter',
+                'fmt': '%(levelprefix)s %(client_addr)s - "%(request_line)s" %(status_code)s',
+                'use_colors': None,
+            },
+            # Actual JSON formatters used by handlers below
+            'json': {
+                '()': 'pythonjsonlogger.jsonlogger.JsonFormatter',
+                'fmt': '%(message)s %(levelname)s %(name)s %(asctime)s %(exc_info)s',
+            },
+            'json_access': {
+                '()': 'pythonjsonlogger.jsonlogger.JsonFormatter',
+                'fmt': '%(message)s %(levelname)s %(name)s %(asctime)s %(client_addr)s %(request_line)s %(status_code)s',
+            },
+        },
+        'handlers': {
+            'default': {
+                'class': 'logging.StreamHandler',
+                'level': 'INFO',
+                'formatter': 'json',
+                'stream': 'ext://sys.stdout',
+            },
+            'access': {
+                'class': 'logging.StreamHandler',
+                'level': 'INFO',
+                'formatter': 'json_access',
+                'stream': 'ext://sys.stdout',
+            },
+        },
+        'loggers': {
+            'uvicorn': {
+                'handlers': ['default'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+            'uvicorn.error': {
+                'handlers': ['default'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+            'uvicorn.access': {
+                'handlers': ['access'],
+                'level': 'INFO',
+                'propagate': False,
+            },
+        },
+        'root': {'level': 'INFO', 'handlers': ['default']},
+    }
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@ -1,6 +1,5 @@
 import json
 import re
-import traceback
 from dataclasses import dataclass, field
 from typing import Any, Self

@ -65,8 +64,8 @@ class CmdOutputMetadata(BaseModel):
                matches.append(match)
            except json.JSONDecodeError:
                logger.warning(
-                    f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.'
-                    + traceback.format_exc()
+                    f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.',
+                    exc_info=True,
                )
                continue  # Skip if not valid JSON
        return matches
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@ -14,7 +14,6 @@ import shutil
 import sys
 import tempfile
 import time
-import traceback
 from contextlib import asynccontextmanager
 from pathlib import Path
 from zipfile import ZipFile
@ -36,6 +35,7 @@ from uvicorn import run

 from openhands.core.config.mcp_config import MCPStdioServerConfig
 from openhands.core.exceptions import BrowserUnavailableException
+from openhands.core.logger import get_uvicorn_json_log_config
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
    Action,
@ -241,7 +241,7 @@ class ActionExecutor:
            self.browser = BrowserEnv(self.browsergym_eval_env)
            logger.debug('Browser initialized asynchronously')
        except Exception as e:
-            logger.error(f'Failed to initialize browser: {e}')
+            logger.exception(f'Failed to initialize browser: {e}')
            self.browser = None

    async def _ensure_browser_ready(self):
@ -393,7 +393,7 @@ class ActionExecutor:
            obs = await call_sync_from_async(bash_session.execute, action)
            return obs
        except Exception as e:
-            logger.error(f'Error running command: {e}')
+            logger.exception(f'Error running command: {e}')
            return ErrorObservation(str(e))

    async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
@ -768,14 +768,14 @@ if __name__ == '__main__':

    @app.exception_handler(StarletteHTTPException)
    async def http_exception_handler(request: Request, exc: StarletteHTTPException):
-        logger.error(f'HTTP exception occurred: {exc.detail}')
+        logger.exception(f'HTTP exception occurred: {exc.detail}')
        return JSONResponse(status_code=exc.status_code, content={'detail': exc.detail})

    @app.exception_handler(RequestValidationError)
    async def validation_exception_handler(
        request: Request, exc: RequestValidationError
    ):
-        logger.error(f'Validation error occurred: {exc}')
+        logger.exception(f'Validation error occurred: {exc}')
        return JSONResponse(
            status_code=422,
            content={
@ -822,10 +822,10 @@ if __name__ == '__main__':
            observation = await client.run_action(action)
            return event_to_dict(observation)
        except Exception as e:
-            logger.error(f'Error while running /execute_action: {str(e)}')
+            logger.exception(f'Error while running /execute_action: {str(e)}')
            raise HTTPException(
                status_code=500,
-                detail=traceback.format_exc(),
+                detail=f'Internal server error: {str(e)}',
            )
        finally:
            update_last_execution_time()
@ -1067,8 +1067,12 @@ if __name__ == '__main__':
            return JSONResponse(content=sorted_entries)

        except Exception as e:
-            logger.error(f'Error listing files: {e}')
+            logger.exception(f'Error listing files: {e}')
            return JSONResponse(content=[])

    logger.debug(f'Starting action execution API on port {args.port}')
-    run(app, host='0.0.0.0', port=args.port)
+    # When LOG_JSON=1, provide a JSON log config to Uvicorn so error/access logs are structured
+    log_config = None
+    if os.getenv('LOG_JSON', '0') in ('1', 'true', 'True'):
+        log_config = get_uvicorn_json_log_config()
+    run(app, host='0.0.0.0', port=args.port, log_config=log_config, use_colors=False)
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@ -1,7 +1,6 @@
 import os
 import re
 import time
-import traceback
 import uuid
 from enum import Enum
 from typing import Any
@ -46,8 +45,8 @@ def split_bash_commands(commands: str) -> list[str]:
        logger.debug(
            f'Failed to parse bash commands\n'
            f'[input]: {commands}\n'
-            f'[warning]: {traceback.format_exc()}\n'
-            f'The original command will be returned as is.'
+            f'The original command will be returned as is.',
+            exc_info=True,
        )
        # If parsing fails, return the original commands
        return [commands]
@ -165,8 +164,8 @@ def escape_bash_special_chars(command: str) -> str:
        logger.debug(
            f'Failed to parse bash commands for special characters escape\n'
            f'[input]: {command}\n'
-            f'[warning]: {traceback.format_exc()}\n'
-            f'The original command will be returned as is.'
+            f'The original command will be returned as is.',
+            exc_info=True,
        )
        return command

--- a/openhands/runtime/utils/windows_bash.py
+++ b/openhands/runtime/utils/windows_bash.py
@ -6,7 +6,6 @@ way to manage PowerShell processes compared to using temporary script files.

 import os
 import time
-import traceback
 from pathlib import Path
 from threading import RLock

@ -155,8 +154,7 @@ class WindowsPowershellSession:
            self._initialized = True  # Set to True only on successful initialization
            logger.info(f'PowerShell runspace created. Initial CWD set to: {self._cwd}')
        except Exception as e:
-            logger.error(f'Failed to create or open PowerShell runspace: {e}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Failed to create or open PowerShell runspace: {e}')
            self.close()  # Ensure cleanup if init fails partially
            raise RuntimeError(f'Failed to initialize PowerShell runspace: {e}')

@ -177,8 +175,7 @@ class WindowsPowershellSession:
                # Optional: Confirm CWD even on success for robustness
                # self._confirm_cwd()
        except Exception as e:
-            logger.error(f'Exception setting initial CWD: {e}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception setting initial CWD: {e}')
            # Attempt to confirm CWD even if setting threw an exception
            self._confirm_cwd()
        finally:
@ -945,8 +942,7 @@ class WindowsPowershellSession:
                )

        except Exception as parse_ex:
-            logger.error(f'Exception during PowerShell command parsing: {parse_ex}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception during PowerShell command parsing: {parse_ex}')
            return ErrorObservation(
                content=f'ERROR: An exception occurred while parsing the command: {parse_ex}'
            )
@ -1118,10 +1114,9 @@ class WindowsPowershellSession:
                        with self._job_lock:
                            self.active_job = None
                except AttributeError as e:
-                    logger.error(
+                    logger.exception(
                        f'Get-Job returned an object without expected properties on BaseObject: {e}'
                    )
-                    logger.error(traceback.format_exc())
                    all_errors.append('Get-Job did not return a valid Job object.')
                    job_start_failed = True

@ -1131,8 +1126,7 @@ class WindowsPowershellSession:
                job_start_failed = True

        except Exception as start_ex:
-            logger.error(f'Exception during job start/retrieval: {start_ex}')
-            logger.error(traceback.format_exc())
+            logger.exception(f'Exception during job start/retrieval: {start_ex}')
            all_errors.append(f'[Job Start/Get Exception: {start_ex}]')
            job_start_failed = True
        finally:
@ -1403,8 +1397,7 @@ class WindowsPowershellSession:
                self.runspace.Dispose()
                logger.info('PowerShell runspace closed and disposed.')
            except Exception as e:
-                logger.error(f'Error closing/disposing PowerShell runspace: {e}')
-                logger.error(traceback.format_exc())
+                logger.exception(f'Error closing/disposing PowerShell runspace: {e}')

        self.runspace = None
        self._initialized = False
--- a/openhands/server/main.py
+++ b/openhands/server/main.py
@ -3,16 +3,26 @@ import warnings

 import uvicorn

+from openhands.core.logger import get_uvicorn_json_log_config
+

 def main():
    # Suppress SyntaxWarnings from pydub.utils about invalid escape sequences
    warnings.filterwarnings('ignore', category=SyntaxWarning, module=r'pydub\.utils')

+    # When LOG_JSON=1, configure Uvicorn to emit JSON logs for error/access
+    log_config = None
+    if os.getenv('LOG_JSON', '0') in ('1', 'true', 'True'):
+        log_config = get_uvicorn_json_log_config()
+
    uvicorn.run(
        'openhands.server.listen:app',
        host='0.0.0.0',
        port=int(os.environ.get('port') or '3000'),
        log_level='debug' if os.environ.get('DEBUG') else 'info',
+        log_config=log_config,
+        # If LOG_JSON enabled, force colors off; otherwise let uvicorn default
+        use_colors=False if log_config else None,
    )