Obfuscate passwords in more places where it is displayed

This commit is contained in:
Toshio Kuratomi 2015-02-09 10:13:13 -08:00
parent 8f06ba2bc1
commit 4902c06304
4 changed files with 182 additions and 193 deletions

View file

@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
directory_mode = dict(), # used by copy directory_mode = dict(), # used by copy
) )
PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
def get_platform(): def get_platform():
''' what's the platform? example: Linux is a platform. ''' ''' what's the platform? example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
else: else:
return d return d
def heuristic_log_sanitize(data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
class AnsibleModule(object): class AnsibleModule(object):
@ -1019,65 +1079,6 @@ class AnsibleModule(object):
params2.update(params) params2.update(params)
return (params2, args) return (params2, args)
def _heuristic_log_sanitize(self, data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
def _log_invocation(self): def _log_invocation(self):
''' log that ansible ran the module ''' ''' log that ansible ran the module '''
# TODO: generalize a separate log function and make log_invocation use it # TODO: generalize a separate log function and make log_invocation use it
@ -1100,7 +1101,7 @@ class AnsibleModule(object):
param_val = str(param_val) param_val = str(param_val)
elif isinstance(param_val, unicode): elif isinstance(param_val, unicode):
param_val = param_val.encode('utf-8') param_val = param_val.encode('utf-8')
log_args[param] = self._heuristic_log_sanitize(param_val) log_args[param] = heuristic_log_sanitize(param_val)
module = 'ansible-%s' % os.path.basename(__file__) module = 'ansible-%s' % os.path.basename(__file__)
msg = [] msg = []
@ -1444,27 +1445,27 @@ class AnsibleModule(object):
# create a printable version of the command for use # create a printable version of the command for use
# in reporting later, which strips out things like # in reporting later, which strips out things like
# passwords from the args list # passwords from the args list
if isinstance(args, list): if isinstance(args, basestring):
clean_args = " ".join(pipes.quote(arg) for arg in args) to_clean_args = shlex.split(args.encode('utf-8'))
else: else:
clean_args = args to_clean_args = args
# all clean strings should return two match groups, clean_args = []
# where the first is the CLI argument and the second is_passwd = False
# is the password/key/phrase that will be hidden for arg in to_clean_args:
clean_re_strings = [ if is_passwd:
# this removes things like --password, --pass, --pass-wd, etc. is_passwd = False
# optionally followed by an '=' or a space. The password can clean_args.append('********')
# be quoted or not too, though it does not care about quotes continue
# that are not balanced if PASSWD_ARG_RE.match(arg):
# source: http://blog.stevenlevithan.com/archives/match-quoted-string sep_idx = arg.find('=')
r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)', if sep_idx > -1:
r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', clean_args.append('%s=********' % arg[:sep_idx])
# TODO: add more regex checks here continue
] else:
for re_str in clean_re_strings: is_passwd = True
r = re.compile(re_str) clean_args.append(heuristic_log_sanitize(arg))
clean_args = r.sub(r'\1********', clean_args) clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
if data: if data:
st_in = subprocess.PIPE st_in = subprocess.PIPE
@ -1549,7 +1550,7 @@ class AnsibleModule(object):
self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args) self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
if rc != 0 and check_rc: if rc != 0 and check_rc:
msg = stderr.rstrip() msg = heuristic_log_sanitize(stderr.rstrip())
self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg) self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
# reset the pwd # reset the pwd

View file

@ -32,6 +32,7 @@ from ansible.utils.su_prompts import *
from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s
from ansible.callbacks import display from ansible.callbacks import display
from ansible.module_utils.splitter import split_args, unquote from ansible.module_utils.splitter import split_args, unquote
from ansible.module_utils.basic import heuristic_log_sanitize
import ansible.constants as C import ansible.constants as C
import ast import ast
import time import time
@ -932,34 +933,18 @@ def sanitize_output(str):
private_keys = ['password', 'login_password'] private_keys = ['password', 'login_password']
filter_re = [ parts = parse_kv(str)
# filter out things like user:pass@foo/whatever output = []
# and http://username:pass@wherever/foo for (k, v) in parts.items():
re.compile('^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$'),
]
parts = str.split()
output = ''
for part in parts:
try:
(k,v) = part.split('=', 1)
if k in private_keys: if k in private_keys:
output += " %s=VALUE_HIDDEN" % k output.append("%s=VALUE_HIDDEN" % k)
continue
else: else:
found = False v = heuristic_log_sanitize(v)
for filter in filter_re: output.append('%s=%s' % (k, v))
m = filter.match(v) output = ' '.join(output)
if m: return output
d = m.groupdict()
output += " %s=%s" % (k, d['before'] + "********" + d['after'])
found = True
break
if not found:
output += " %s" % part
except:
output += " %s" % part
return output.strip()
#################################################################### ####################################################################
# option handling code for /usr/bin/ansible and ansible-playbook # option handling code for /usr/bin/ansible and ansible-playbook

View file

@ -7,6 +7,7 @@ from nose.tools import timed
from ansible import errors from ansible import errors
from ansible.module_common import ModuleReplacer from ansible.module_common import ModuleReplacer
from ansible.module_utils.basic import heuristic_log_sanitize
from ansible.utils import checksum as utils_checksum from ansible.utils import checksum as utils_checksum
TEST_MODULE_DATA = """ TEST_MODULE_DATA = """
@ -264,23 +265,23 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
@timed(5) @timed(5)
def test_log_sanitize_speed_many_url(self): def test_log_sanitize_speed_many_url(self):
self.module._heuristic_log_sanitize(self.many_url) heuristic_log_sanitize(self.many_url)
@timed(5) @timed(5)
def test_log_sanitize_speed_many_ssh(self): def test_log_sanitize_speed_many_ssh(self):
self.module._heuristic_log_sanitize(self.many_ssh) heuristic_log_sanitize(self.many_ssh)
@timed(5) @timed(5)
def test_log_sanitize_speed_one_url(self): def test_log_sanitize_speed_one_url(self):
self.module._heuristic_log_sanitize(self.one_url) heuristic_log_sanitize(self.one_url)
@timed(5) @timed(5)
def test_log_sanitize_speed_one_ssh(self): def test_log_sanitize_speed_one_ssh(self):
self.module._heuristic_log_sanitize(self.one_ssh) heuristic_log_sanitize(self.one_ssh)
@timed(5) @timed(5)
def test_log_sanitize_speed_zero_secrets(self): def test_log_sanitize_speed_zero_secrets(self):
self.module._heuristic_log_sanitize(self.zero_secrets) heuristic_log_sanitize(self.zero_secrets)
# #
# Test that the password obfuscation sanitizes somewhat cleanly. # Test that the password obfuscation sanitizes somewhat cleanly.
@ -290,8 +291,8 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
url_data = repr(self._gen_data(3, True, True, self.URL_SECRET)) url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET)) ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))
url_output = self.module._heuristic_log_sanitize(url_data) url_output = heuristic_log_sanitize(url_data)
ssh_output = self.module._heuristic_log_sanitize(ssh_data) ssh_output = heuristic_log_sanitize(ssh_data)
# Basic functionality: Successfully hid the password # Basic functionality: Successfully hid the password
try: try:

View file

@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
directory_mode = dict(), # used by copy directory_mode = dict(), # used by copy
) )
PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
def get_platform(): def get_platform():
''' what's the platform? example: Linux is a platform. ''' ''' what's the platform? example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
else: else:
return d return d
def heuristic_log_sanitize(data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
class AnsibleModule(object): class AnsibleModule(object):
@ -1009,64 +1069,6 @@ class AnsibleModule(object):
params = dict() params = dict()
return params return params
def _heuristic_log_sanitize(self, data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
def _log_invocation(self): def _log_invocation(self):
''' log that ansible ran the module ''' ''' log that ansible ran the module '''
@ -1090,7 +1092,7 @@ class AnsibleModule(object):
param_val = str(param_val) param_val = str(param_val)
elif isinstance(param_val, unicode): elif isinstance(param_val, unicode):
param_val = param_val.encode('utf-8') param_val = param_val.encode('utf-8')
log_args[param] = self._heuristic_log_sanitize(param_val) log_args[param] = heuristic_log_sanitize(param_val)
module = 'ansible-%s' % os.path.basename(__file__) module = 'ansible-%s' % os.path.basename(__file__)
msg = [] msg = []
@ -1434,27 +1436,27 @@ class AnsibleModule(object):
# create a printable version of the command for use # create a printable version of the command for use
# in reporting later, which strips out things like # in reporting later, which strips out things like
# passwords from the args list # passwords from the args list
if isinstance(args, list): if isinstance(args, basestring):
clean_args = " ".join(pipes.quote(arg) for arg in args) to_clean_args = shlex.split(args.encode('utf-8'))
else: else:
clean_args = args to_clean_args = args
# all clean strings should return two match groups, clean_args = []
# where the first is the CLI argument and the second is_passwd = False
# is the password/key/phrase that will be hidden for arg in to_clean_args:
clean_re_strings = [ if is_passwd:
# this removes things like --password, --pass, --pass-wd, etc. is_passwd = False
# optionally followed by an '=' or a space. The password can clean_args.append('********')
# be quoted or not too, though it does not care about quotes continue
# that are not balanced if PASSWD_ARG_RE.match(arg):
# source: http://blog.stevenlevithan.com/archives/match-quoted-string sep_idx = arg.find('=')
r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)', if sep_idx > -1:
r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', clean_args.append('%s=********' % arg[:sep_idx])
# TODO: add more regex checks here continue
] else:
for re_str in clean_re_strings: is_passwd = True
r = re.compile(re_str) clean_args.append(heuristic_log_sanitize(arg))
clean_args = r.sub(r'\1********', clean_args) clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
if data: if data:
st_in = subprocess.PIPE st_in = subprocess.PIPE
@ -1539,7 +1541,7 @@ class AnsibleModule(object):
self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args) self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
if rc != 0 and check_rc: if rc != 0 and check_rc:
msg = stderr.rstrip() msg = heuristic_log_sanitize(stderr.rstrip())
self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg) self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
# reset the pwd # reset the pwd