[agent] Add LLM risk analyzer (#9349)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Co-authored-by: llamantino <213239228+llamantino@users.noreply.github.com>
Co-authored-by: mamoodi <mamoodiha@gmail.com>
Co-authored-by: Tim O'Farrell <tofarr@gmail.com>
Co-authored-by: Hiep Le <69354317+hieptl@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com>
Co-authored-by: Neeraj Panwar <49247372+npneeraj@users.noreply.github.com>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
Co-authored-by: Insop <1240382+insop@users.noreply.github.com>
Co-authored-by: test <test@test.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Zhonghao Jiang <zhonghao.J@outlook.com>
Co-authored-by: Ray Myers <ray.myers@gmail.com>
This commit is contained in:
Xingyao Wang
2025-08-22 10:02:36 -04:00
committed by GitHub
parent 4507a25b85
commit ca424ec15d
53 changed files with 729 additions and 563 deletions

View File

@@ -1,4 +1,8 @@
from openhands.events.action.action import Action, ActionConfirmationStatus
from openhands.events.action.action import (
Action,
ActionConfirmationStatus,
ActionSecurityRisk,
)
from openhands.events.action.agent import (
AgentDelegateAction,
AgentFinishAction,
@@ -40,4 +44,5 @@ __all__ = [
'RecallAction',
'MCPAction',
'TaskTrackingAction',
'ActionSecurityRisk',
]

View File

@@ -11,7 +11,7 @@ class BrowseURLAction(Action):
thought: str = ''
action: str = ActionType.BROWSE
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
return_axtree: bool = False
@property
@@ -33,7 +33,7 @@ class BrowseInteractiveAction(Action):
browsergym_send_msg_to_user: str = ''
action: str = ActionType.BROWSE_INTERACTIVE
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
return_axtree: bool = False
@property

View File

@@ -25,7 +25,7 @@ class CmdRunAction(Action):
action: str = ActionType.RUN
runnable: ClassVar[bool] = True
confirmation_state: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
@property
def message(self) -> str:
@@ -49,7 +49,7 @@ class IPythonRunCellAction(Action):
action: str = ActionType.RUN_IPYTHON
runnable: ClassVar[bool] = True
confirmation_state: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
kernel_init_code: str = '' # code to run in the kernel (if the kernel is restarted)
def __str__(self) -> str:

View File

@@ -19,7 +19,7 @@ class FileReadAction(Action):
thought: str = ''
action: str = ActionType.READ
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
impl_source: FileReadSource = FileReadSource.DEFAULT
view_range: list[int] | None = None # ONLY used in OH_ACI mode
@@ -42,7 +42,7 @@ class FileWriteAction(Action):
thought: str = ''
action: str = ActionType.WRITE
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
@property
def message(self) -> str:
@@ -111,7 +111,7 @@ class FileEditAction(Action):
thought: str = ''
action: str = ActionType.EDIT
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
impl_source: FileEditSource = FileEditSource.OH_ACI
def __repr__(self) -> str:

View File

@@ -12,7 +12,7 @@ class MCPAction(Action):
thought: str = ''
action: str = ActionType.MCP
runnable: ClassVar[bool] = True
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
@property
def message(self) -> str:

View File

@@ -13,7 +13,7 @@ class MessageAction(Action):
image_urls: list[str] | None = None
wait_for_response: bool = False
action: str = ActionType.MESSAGE
security_risk: ActionSecurityRisk | None = None
security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN
@property
def message(self) -> str:

View File

@@ -1,7 +1,7 @@
from typing import Any
from openhands.core.exceptions import LLMMalformedActionError
from openhands.events.action.action import Action
from openhands.events.action.action import Action, ActionSecurityRisk
from openhands.events.action.agent import (
AgentDelegateAction,
AgentFinishAction,
@@ -124,6 +124,15 @@ def action_from_dict(action: dict) -> Action:
if 'images_urls' in args:
args['image_urls'] = args.pop('images_urls')
# Handle security_risk deserialization
if 'security_risk' in args and args['security_risk'] is not None:
try:
# Convert numeric value (int) back to enum
args['security_risk'] = ActionSecurityRisk(args['security_risk'])
except (ValueError, TypeError):
# If conversion fails, remove the invalid value
args.pop('security_risk')
# handle deprecated args
args = handle_action_deprecated_args(args)

View File

@@ -119,12 +119,17 @@ def event_to_dict(event: 'Event') -> dict:
if key == 'llm_metrics' and 'llm_metrics' in d:
d['llm_metrics'] = d['llm_metrics'].get()
props.pop(key, None)
if 'security_risk' in props and props['security_risk'] is None:
props.pop('security_risk')
# Remove task_completed from serialization when it's None (backward compatibility)
if 'task_completed' in props and props['task_completed'] is None:
props.pop('task_completed')
if 'action' in d:
# Handle security_risk for actions - include it in args
if 'security_risk' in props:
props['security_risk'] = props['security_risk'].value
d['args'] = props
if event.timeout is not None:
d['timeout'] = event.timeout

View File

@@ -22,7 +22,6 @@ from openhands.utils.shutdown_listener import should_continue
class EventStreamSubscriber(str, Enum):
AGENT_CONTROLLER = 'agent_controller'
SECURITY_ANALYZER = 'security_analyzer'
RESOLVER = 'openhands_resolver'
SERVER = 'server'
RUNTIME = 'runtime'