mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
Logging security (#1943)
* update .gitignore * Rename the confusing 'INFO' style to 'DETAIL' * override str and repr * feat: api_key desensitize * feat: add SensitiveDataFilter in file handler * tweak regex, add tests * more tweaks, include other attrs * add env vars, those with equivalent config * fix tests * tests are invaluable --------- Co-authored-by: Shimada666 <649940882@qq.com>
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -126,6 +126,7 @@ env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
.env.bak
|
||||
venv.bak/
|
||||
*venv/
|
||||
|
||||
@@ -202,6 +203,7 @@ cache
|
||||
|
||||
# configuration
|
||||
config.toml
|
||||
config.toml.bak
|
||||
evaluation/swe_bench/eval_workspace
|
||||
evaluation/outputs
|
||||
evaluation/evaluation_outputs
|
||||
|
||||
@@ -155,7 +155,7 @@ def get_prompt(state: State) -> str:
|
||||
else:
|
||||
plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
|
||||
hint = get_hint(event_to_memory(latest_action).get('action', ''))
|
||||
logger.info('HINT:\n' + hint, extra={'msg_type': 'INFO'})
|
||||
logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
|
||||
task = state.get_current_user_intent()
|
||||
return prompt % {
|
||||
'task': task,
|
||||
|
||||
@@ -75,6 +75,22 @@ class LLMConfig(metaclass=Singleton):
|
||||
dict[f.name] = get_field_info(f)
|
||||
return dict
|
||||
|
||||
def __str__(self):
|
||||
attr_str = []
|
||||
for f in fields(self):
|
||||
attr_name = f.name
|
||||
attr_value = getattr(self, f.name)
|
||||
|
||||
if attr_name in ['api_key', 'aws_access_key_id', 'aws_secret_access_key']:
|
||||
attr_value = '******' if attr_value else None
|
||||
|
||||
attr_str.append(f'{attr_name}={repr(attr_value)}')
|
||||
|
||||
return f"LLMConfig({', '.join(attr_str)})"
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig(metaclass=Singleton):
|
||||
@@ -185,6 +201,22 @@ class AppConfig(metaclass=Singleton):
|
||||
dict[f.name] = get_field_info(f)
|
||||
return dict
|
||||
|
||||
def __str__(self):
|
||||
attr_str = []
|
||||
for f in fields(self):
|
||||
attr_name = f.name
|
||||
attr_value = getattr(self, f.name)
|
||||
|
||||
if attr_name in ['e2b_api_key', 'github_token']:
|
||||
attr_value = '******' if attr_value else None
|
||||
|
||||
attr_str.append(f'{attr_name}={repr(attr_value)}')
|
||||
|
||||
return f"AppConfig({', '.join(attr_str)}"
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
def get_field_info(field):
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
@@ -33,7 +34,7 @@ LOG_COLORS: Mapping[str, ColorType] = {
|
||||
'BACKGROUND LOG': 'blue',
|
||||
'ACTION': 'green',
|
||||
'OBSERVATION': 'yellow',
|
||||
'INFO': 'cyan',
|
||||
'DETAIL': 'cyan',
|
||||
'ERROR': 'red',
|
||||
'PLAN': 'light_magenta',
|
||||
}
|
||||
@@ -48,9 +49,9 @@ class ColoredFormatter(logging.Formatter):
|
||||
time_str = colored(
|
||||
self.formatTime(record, self.datefmt), LOG_COLORS[msg_type]
|
||||
)
|
||||
name_str = colored(record.name, 'cyan')
|
||||
level_str = colored(record.levelname, 'yellow')
|
||||
if msg_type in ['ERROR', 'INFO']:
|
||||
name_str = colored(record.name, LOG_COLORS[msg_type])
|
||||
level_str = colored(record.levelname, LOG_COLORS[msg_type])
|
||||
if msg_type in ['ERROR']:
|
||||
return f'{time_str} - {name_str}:{level_str}: {record.filename}:{record.lineno}\n{msg_type_color}\n{msg}'
|
||||
return f'{time_str} - {msg_type_color}\n{msg}'
|
||||
elif msg_type == 'STEP':
|
||||
@@ -71,6 +72,38 @@ file_formatter = logging.Formatter(
|
||||
llm_formatter = logging.Formatter('%(message)s')
|
||||
|
||||
|
||||
class SensitiveDataFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
# start with attributes
|
||||
sensitive_patterns = [
|
||||
'api_key',
|
||||
'aws_access_key_id',
|
||||
'aws_secret_access_key',
|
||||
'e2b_api_key',
|
||||
'github_token',
|
||||
]
|
||||
|
||||
# add env var names
|
||||
env_vars = [attr.upper() for attr in sensitive_patterns]
|
||||
sensitive_patterns.extend(env_vars)
|
||||
|
||||
# and some special cases
|
||||
sensitive_patterns.append('LLM_API_KEY')
|
||||
sensitive_patterns.append('SANDBOX_ENV_GITHUB_TOKEN')
|
||||
|
||||
# this also formats the message with % args
|
||||
msg = record.getMessage()
|
||||
record.args = ()
|
||||
|
||||
for attr in sensitive_patterns:
|
||||
pattern = rf"{attr}='?([\w-]+)'?"
|
||||
msg = re.sub(pattern, f"{attr}='******'", msg)
|
||||
|
||||
# passed with msg
|
||||
record.msg = msg
|
||||
return True
|
||||
|
||||
|
||||
def get_console_handler():
|
||||
"""
|
||||
Returns a console handler for logging.
|
||||
@@ -121,6 +154,7 @@ opendevin_logger = logging.getLogger('opendevin')
|
||||
opendevin_logger.setLevel(logging.INFO)
|
||||
opendevin_logger.addHandler(get_file_handler())
|
||||
opendevin_logger.addHandler(get_console_handler())
|
||||
opendevin_logger.addFilter(SensitiveDataFilter(opendevin_logger.name))
|
||||
opendevin_logger.propagate = False
|
||||
opendevin_logger.debug('Logging initialized')
|
||||
opendevin_logger.debug(
|
||||
|
||||
@@ -282,3 +282,6 @@ class LLM:
|
||||
elif self.base_url:
|
||||
return f'LLM(model={self.model_name}, base_url={self.base_url})'
|
||||
return f'LLM(model={self.model_name})'
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
@@ -235,4 +235,4 @@ NOW, LET'S START!
|
||||
|
||||
Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point.
|
||||
|
||||
ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
|
||||
ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
|
||||
@@ -254,4 +254,4 @@ Once the installation is complete, we'll import `pymsgbox` and print its version
|
||||
OBSERVATION:
|
||||
Package installed successfully
|
||||
|
||||
ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
|
||||
ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
|
||||
@@ -279,4 +279,4 @@ with open("/workspace/test.txt", "w") as f:
|
||||
OBSERVATION:
|
||||
[Code executed successfully with no output]
|
||||
|
||||
ENVIRONMENT REMINDER: You have 7 turns left to complete the task.
|
||||
ENVIRONMENT REMINDER: You have 7 turns left to complete the task.
|
||||
@@ -381,3 +381,5 @@ Notes:
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
|
||||
|
||||
|
||||
@@ -210,3 +210,78 @@ def test_cache_dir_creation(default_config, tmpdir):
|
||||
default_config.cache_dir = str(tmpdir.join('test_cache'))
|
||||
finalize_config(default_config)
|
||||
assert os.path.exists(default_config.cache_dir)
|
||||
|
||||
|
||||
def test_api_keys_repr_str():
|
||||
# Test LLMConfig
|
||||
llm_config = LLMConfig(
|
||||
api_key='my_api_key',
|
||||
aws_access_key_id='my_access_key',
|
||||
aws_secret_access_key='my_secret_key',
|
||||
)
|
||||
assert "api_key='******'" in repr(llm_config)
|
||||
assert "aws_access_key_id='******'" in repr(llm_config)
|
||||
assert "aws_secret_access_key='******'" in repr(llm_config)
|
||||
assert "api_key='******'" in str(llm_config)
|
||||
assert "aws_access_key_id='******'" in str(llm_config)
|
||||
assert "aws_secret_access_key='******'" in str(llm_config)
|
||||
|
||||
# Check that no other attrs in LLMConfig have 'key' or 'token' in their name
|
||||
# This will fail when new attrs are added, and attract attention
|
||||
known_key_token_attrs_llm = [
|
||||
'api_key',
|
||||
'aws_access_key_id',
|
||||
'aws_secret_access_key',
|
||||
]
|
||||
for attr_name in dir(LLMConfig):
|
||||
if (
|
||||
not attr_name.startswith('__')
|
||||
and attr_name not in known_key_token_attrs_llm
|
||||
):
|
||||
assert (
|
||||
'key' not in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'key' in LLMConfig"
|
||||
assert (
|
||||
'token' not in attr_name.lower() or 'tokens' in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'token' in LLMConfig"
|
||||
|
||||
# Test AgentConfig
|
||||
# No attrs in AgentConfig have 'key' or 'token' in their name
|
||||
agent_config = AgentConfig(
|
||||
name='my_agent', memory_enabled=True, memory_max_threads=4
|
||||
)
|
||||
for attr_name in dir(AgentConfig):
|
||||
if not attr_name.startswith('__'):
|
||||
assert (
|
||||
'key' not in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'key' in AgentConfig"
|
||||
assert (
|
||||
'token' not in attr_name.lower() or 'tokens' in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'token' in AgentConfig"
|
||||
|
||||
# Test AppConfig
|
||||
app_config = AppConfig(
|
||||
llm=llm_config,
|
||||
agent=agent_config,
|
||||
e2b_api_key='my_e2b_api_key',
|
||||
github_token='my_github_token',
|
||||
)
|
||||
assert "e2b_api_key='******'" in repr(app_config)
|
||||
assert "github_token='******'" in repr(app_config)
|
||||
assert "e2b_api_key='******'" in str(app_config)
|
||||
assert "github_token='******'" in str(app_config)
|
||||
|
||||
# Check that no other attrs in AppConfig have 'key' or 'token' in their name
|
||||
# This will fail when new attrs are added, and attract attention
|
||||
known_key_token_attrs_app = ['e2b_api_key', 'github_token']
|
||||
for attr_name in dir(AppConfig):
|
||||
if (
|
||||
not attr_name.startswith('__')
|
||||
and attr_name not in known_key_token_attrs_app
|
||||
):
|
||||
assert (
|
||||
'key' not in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'key' in AppConfig"
|
||||
assert (
|
||||
'token' not in attr_name.lower() or 'tokens' in attr_name.lower()
|
||||
), f"Unexpected attribute '{attr_name}' contains 'token' in AppConfig"
|
||||
|
||||
136
tests/unit/test_logging.py
Normal file
136
tests/unit/test_logging.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import logging
|
||||
from io import StringIO
|
||||
|
||||
import pytest
|
||||
|
||||
from opendevin.core.config import AppConfig, LLMConfig
|
||||
from opendevin.core.logger import opendevin_logger as opendevin_logger
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_handler():
|
||||
stream = StringIO()
|
||||
handler = logging.StreamHandler(stream)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
opendevin_logger.addHandler(handler)
|
||||
yield opendevin_logger, stream
|
||||
opendevin_logger.removeHandler(handler)
|
||||
|
||||
|
||||
def test_openai_api_key_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
|
||||
api_key = 'sk-1234567890abcdef'
|
||||
message = f"OpenAI API key: api_key='{api_key}'and there's some stuff here"
|
||||
logger.info(message)
|
||||
log_output = stream.getvalue()
|
||||
assert "api_key='******'" in log_output
|
||||
assert api_key not in log_output
|
||||
|
||||
|
||||
def test_azure_api_key_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
api_key = '1a2b3c4d5e6f7g8h9i0j'
|
||||
message = f"Azure API key: api_key='{api_key}' and chatty chat with ' and \" and '"
|
||||
logger.info(message)
|
||||
log_output = stream.getvalue()
|
||||
assert "api_key='******'" in log_output
|
||||
assert api_key not in log_output
|
||||
|
||||
|
||||
def test_google_vertex_api_key_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
api_key = 'AIzaSyA1B2C3D4E5F6G7H8I9J0'
|
||||
message = f"Google Vertex API key: api_key='{api_key}' or not"
|
||||
logger.info(message)
|
||||
log_output = stream.getvalue()
|
||||
assert "api_key='******'" in log_output
|
||||
assert api_key not in log_output
|
||||
|
||||
|
||||
def test_anthropic_api_key_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
api_key = 'sk-ant-1234567890abcdef-some-more-stuff-here'
|
||||
message = f"Anthropic API key: api_key='{api_key}' and there's some 'stuff' here"
|
||||
logger.info(message)
|
||||
log_output = stream.getvalue()
|
||||
assert "api_key='******'" in log_output
|
||||
assert api_key not in log_output
|
||||
|
||||
|
||||
def test_llm_config_attributes_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
llm_config = LLMConfig(
|
||||
api_key='sk-abc123',
|
||||
aws_access_key_id='AKIAIOSFODNN7EXAMPLE',
|
||||
aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
|
||||
)
|
||||
logger.info(f'LLM Config: {llm_config}')
|
||||
log_output = stream.getvalue()
|
||||
assert "api_key='******'" in log_output
|
||||
assert "aws_access_key_id='******'" in log_output
|
||||
assert "aws_secret_access_key='******'" in log_output
|
||||
assert 'sk-abc123' not in log_output
|
||||
assert 'AKIAIOSFODNN7EXAMPLE' not in log_output
|
||||
assert 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' not in log_output
|
||||
|
||||
# reset the LLMConfig
|
||||
LLMConfig.reset()
|
||||
|
||||
|
||||
def test_app_config_attributes_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
app_config = AppConfig(
|
||||
e2b_api_key='e2b-xyz789', github_token='ghp_abcdefghijklmnopqrstuvwxyz'
|
||||
)
|
||||
logger.info(f'App Config: {app_config}')
|
||||
log_output = stream.getvalue()
|
||||
assert "e2b_api_key='******'" in log_output
|
||||
assert "github_token='******'" in log_output
|
||||
assert 'e2b-xyz789' not in log_output
|
||||
assert 'ghp_abcdefghijklmnopqrstuvwxyz' not in log_output
|
||||
|
||||
# reset the AppConfig
|
||||
AppConfig.reset()
|
||||
|
||||
|
||||
def test_sensitive_env_vars_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
sensitive_data = {
|
||||
'API_KEY': 'API_KEY_VALUE',
|
||||
'AWS_ACCESS_KEY_ID': 'AWS_ACCESS_KEY_ID_VALUE',
|
||||
'AWS_SECRET_ACCESS_KEY': 'AWS_SECRET_ACCESS_KEY_VALUE',
|
||||
'E2B_API_KEY': 'E2B_API_KEY_VALUE',
|
||||
'GITHUB_TOKEN': 'GITHUB_TOKEN_VALUE',
|
||||
}
|
||||
|
||||
log_message = ' '.join(
|
||||
f"{attr}='{value}'" for attr, value in sensitive_data.items()
|
||||
)
|
||||
logger.info(log_message)
|
||||
|
||||
log_output = stream.getvalue()
|
||||
for attr, value in sensitive_data.items():
|
||||
assert f"{attr}='******'" in log_output
|
||||
assert value not in log_output
|
||||
|
||||
|
||||
def test_special_cases_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
sensitive_data = {
|
||||
'LLM_API_KEY': 'LLM_API_KEY_VALUE',
|
||||
'SANDBOX_ENV_GITHUB_TOKEN': 'SANDBOX_ENV_GITHUB_TOKEN_VALUE',
|
||||
}
|
||||
|
||||
log_message = ' '.join(
|
||||
f"{attr}={value} with no single quotes' and something"
|
||||
for attr, value in sensitive_data.items()
|
||||
)
|
||||
logger.info(log_message)
|
||||
|
||||
log_output = stream.getvalue()
|
||||
for attr, value in sensitive_data.items():
|
||||
assert f"{attr}='******'" in log_output
|
||||
assert value not in log_output
|
||||
Reference in New Issue
Block a user