From 4902c063045458278146914b87638822785ed67a Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Mon, 9 Feb 2015 10:13:13 -0800 Subject: [PATCH] Obfuscate passwords in more places where it is displayed --- lib/ansible/module_utils/basic.py | 161 +++++++++++++++-------------- lib/ansible/utils/__init__.py | 39 +++---- test/units/TestModuleUtilsBasic.py | 15 +-- v2/ansible/module_utils/basic.py | 160 ++++++++++++++-------------- 4 files changed, 182 insertions(+), 193 deletions(-) diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index 8603976c5a..8b14536ab5 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict( directory_mode = dict(), # used by copy ) +PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?') def get_platform(): ''' what's the platform? example: Linux is a platform. ''' @@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d): else: return d +def heuristic_log_sanitize(data): + ''' Remove strings that look like passwords from log messages ''' + # Currently filters: + # user:pass@foo/whatever and http://username:pass@wherever/foo + # This code has false positives and consumes parts of logs that are + # not passwds + + # begin: start of a passwd containing string + # end: end of a passwd containing string + # sep: char between user and passwd + # prev_begin: where in the overall string to start a search for + # a passwd + # sep_search_end: where in the string to end a search for the sep + output = [] + begin = len(data) + prev_begin = begin + sep = 1 + while sep: + # Find the potential end of a passwd + try: + end = data.rindex('@', 0, begin) + except ValueError: + # No passwd in the rest of the data + output.insert(0, data[0:begin]) + break + + # Search for the beginning of a passwd + sep = None + sep_search_end = end + while not sep: + # URL-style username+password + try: + begin = data.rindex('://', 0, sep_search_end) + except ValueError: + # No url style in the data, check for ssh style in the + # rest of the string + begin = 0 + # Search for separator + try: + sep = data.index(':', begin + 3, end) + except ValueError: + # No separator; choices: + if begin == 0: + # Searched the whole string so there's no password + # here. Return the remaining data + output.insert(0, data[0:begin]) + break + # Search for a different beginning of the password field. + sep_search_end = begin + continue + if sep: + # Password was found; remove it. + output.insert(0, data[end:prev_begin]) + output.insert(0, '********') + output.insert(0, data[begin:sep + 1]) + prev_begin = begin + + return ''.join(output) + class AnsibleModule(object): @@ -1019,65 +1079,6 @@ class AnsibleModule(object): params2.update(params) return (params2, args) - def _heuristic_log_sanitize(self, data): - ''' Remove strings that look like passwords from log messages ''' - # Currently filters: - # user:pass@foo/whatever and http://username:pass@wherever/foo - # This code has false positives and consumes parts of logs that are - # not passwds - - # begin: start of a passwd containing string - # end: end of a passwd containing string - # sep: char between user and passwd - # prev_begin: where in the overall string to start a search for - # a passwd - # sep_search_end: where in the string to end a search for the sep - output = [] - begin = len(data) - prev_begin = begin - sep = 1 - while sep: - # Find the potential end of a passwd - try: - end = data.rindex('@', 0, begin) - except ValueError: - # No passwd in the rest of the data - output.insert(0, data[0:begin]) - break - - # Search for the beginning of a passwd - sep = None - sep_search_end = end - while not sep: - # URL-style username+password - try: - begin = data.rindex('://', 0, sep_search_end) - except ValueError: - # No url style in the data, check for ssh style in the - # rest of the string - begin = 0 - # Search for separator - try: - sep = data.index(':', begin + 3, end) - except ValueError: - # No separator; choices: - if begin == 0: - # Searched the whole string so there's no password - # here. Return the remaining data - output.insert(0, data[0:begin]) - break - # Search for a different beginning of the password field. - sep_search_end = begin - continue - if sep: - # Password was found; remove it. - output.insert(0, data[end:prev_begin]) - output.insert(0, '********') - output.insert(0, data[begin:sep + 1]) - prev_begin = begin - - return ''.join(output) - def _log_invocation(self): ''' log that ansible ran the module ''' # TODO: generalize a separate log function and make log_invocation use it @@ -1100,7 +1101,7 @@ class AnsibleModule(object): param_val = str(param_val) elif isinstance(param_val, unicode): param_val = param_val.encode('utf-8') - log_args[param] = self._heuristic_log_sanitize(param_val) + log_args[param] = heuristic_log_sanitize(param_val) module = 'ansible-%s' % os.path.basename(__file__) msg = [] @@ -1444,27 +1445,27 @@ class AnsibleModule(object): # create a printable version of the command for use # in reporting later, which strips out things like # passwords from the args list - if isinstance(args, list): - clean_args = " ".join(pipes.quote(arg) for arg in args) + if isinstance(args, basestring): + to_clean_args = shlex.split(args.encode('utf-8')) else: - clean_args = args + to_clean_args = args - # all clean strings should return two match groups, - # where the first is the CLI argument and the second - # is the password/key/phrase that will be hidden - clean_re_strings = [ - # this removes things like --password, --pass, --pass-wd, etc. - # optionally followed by an '=' or a space. The password can - # be quoted or not too, though it does not care about quotes - # that are not balanced - # source: http://blog.stevenlevithan.com/archives/match-quoted-string - r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)', - r'^(?P.*:)(?P.*)(?P\@.*)$', - # TODO: add more regex checks here - ] - for re_str in clean_re_strings: - r = re.compile(re_str) - clean_args = r.sub(r'\1********', clean_args) + clean_args = [] + is_passwd = False + for arg in to_clean_args: + if is_passwd: + is_passwd = False + clean_args.append('********') + continue + if PASSWD_ARG_RE.match(arg): + sep_idx = arg.find('=') + if sep_idx > -1: + clean_args.append('%s=********' % arg[:sep_idx]) + continue + else: + is_passwd = True + clean_args.append(heuristic_log_sanitize(arg)) + clean_args = ' '.join(pipes.quote(arg) for arg in clean_args) if data: st_in = subprocess.PIPE @@ -1549,7 +1550,7 @@ class AnsibleModule(object): self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args) if rc != 0 and check_rc: - msg = stderr.rstrip() + msg = heuristic_log_sanitize(stderr.rstrip()) self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg) # reset the pwd diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py index 7c4d82914b..8e3edf4f4f 100644 --- a/lib/ansible/utils/__init__.py +++ b/lib/ansible/utils/__init__.py @@ -32,6 +32,7 @@ from ansible.utils.su_prompts import * from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s from ansible.callbacks import display from ansible.module_utils.splitter import split_args, unquote +from ansible.module_utils.basic import heuristic_log_sanitize import ansible.constants as C import ast import time @@ -932,34 +933,18 @@ def sanitize_output(str): private_keys = ['password', 'login_password'] - filter_re = [ - # filter out things like user:pass@foo/whatever - # and http://username:pass@wherever/foo - re.compile('^(?P.*:)(?P.*)(?P\@.*)$'), - ] + parts = parse_kv(str) + output = [] + for (k, v) in parts.items(): + if k in private_keys: + output.append("%s=VALUE_HIDDEN" % k) + continue + else: + v = heuristic_log_sanitize(v) + output.append('%s=%s' % (k, v)) + output = ' '.join(output) + return output - parts = str.split() - output = '' - for part in parts: - try: - (k,v) = part.split('=', 1) - if k in private_keys: - output += " %s=VALUE_HIDDEN" % k - else: - found = False - for filter in filter_re: - m = filter.match(v) - if m: - d = m.groupdict() - output += " %s=%s" % (k, d['before'] + "********" + d['after']) - found = True - break - if not found: - output += " %s" % part - except: - output += " %s" % part - - return output.strip() #################################################################### # option handling code for /usr/bin/ansible and ansible-playbook diff --git a/test/units/TestModuleUtilsBasic.py b/test/units/TestModuleUtilsBasic.py index 18a4e0d772..2ac77764d7 100644 --- a/test/units/TestModuleUtilsBasic.py +++ b/test/units/TestModuleUtilsBasic.py @@ -7,6 +7,7 @@ from nose.tools import timed from ansible import errors from ansible.module_common import ModuleReplacer +from ansible.module_utils.basic import heuristic_log_sanitize from ansible.utils import checksum as utils_checksum TEST_MODULE_DATA = """ @@ -264,23 +265,23 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase): @timed(5) def test_log_sanitize_speed_many_url(self): - self.module._heuristic_log_sanitize(self.many_url) + heuristic_log_sanitize(self.many_url) @timed(5) def test_log_sanitize_speed_many_ssh(self): - self.module._heuristic_log_sanitize(self.many_ssh) + heuristic_log_sanitize(self.many_ssh) @timed(5) def test_log_sanitize_speed_one_url(self): - self.module._heuristic_log_sanitize(self.one_url) + heuristic_log_sanitize(self.one_url) @timed(5) def test_log_sanitize_speed_one_ssh(self): - self.module._heuristic_log_sanitize(self.one_ssh) + heuristic_log_sanitize(self.one_ssh) @timed(5) def test_log_sanitize_speed_zero_secrets(self): - self.module._heuristic_log_sanitize(self.zero_secrets) + heuristic_log_sanitize(self.zero_secrets) # # Test that the password obfuscation sanitizes somewhat cleanly. @@ -290,8 +291,8 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase): url_data = repr(self._gen_data(3, True, True, self.URL_SECRET)) ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET)) - url_output = self.module._heuristic_log_sanitize(url_data) - ssh_output = self.module._heuristic_log_sanitize(ssh_data) + url_output = heuristic_log_sanitize(url_data) + ssh_output = heuristic_log_sanitize(ssh_data) # Basic functionality: Successfully hid the password try: diff --git a/v2/ansible/module_utils/basic.py b/v2/ansible/module_utils/basic.py index cd4d602453..099b808b2c 100644 --- a/v2/ansible/module_utils/basic.py +++ b/v2/ansible/module_utils/basic.py @@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict( directory_mode = dict(), # used by copy ) +PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?') def get_platform(): ''' what's the platform? example: Linux is a platform. ''' @@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d): else: return d +def heuristic_log_sanitize(data): + ''' Remove strings that look like passwords from log messages ''' + # Currently filters: + # user:pass@foo/whatever and http://username:pass@wherever/foo + # This code has false positives and consumes parts of logs that are + # not passwds + + # begin: start of a passwd containing string + # end: end of a passwd containing string + # sep: char between user and passwd + # prev_begin: where in the overall string to start a search for + # a passwd + # sep_search_end: where in the string to end a search for the sep + output = [] + begin = len(data) + prev_begin = begin + sep = 1 + while sep: + # Find the potential end of a passwd + try: + end = data.rindex('@', 0, begin) + except ValueError: + # No passwd in the rest of the data + output.insert(0, data[0:begin]) + break + + # Search for the beginning of a passwd + sep = None + sep_search_end = end + while not sep: + # URL-style username+password + try: + begin = data.rindex('://', 0, sep_search_end) + except ValueError: + # No url style in the data, check for ssh style in the + # rest of the string + begin = 0 + # Search for separator + try: + sep = data.index(':', begin + 3, end) + except ValueError: + # No separator; choices: + if begin == 0: + # Searched the whole string so there's no password + # here. Return the remaining data + output.insert(0, data[0:begin]) + break + # Search for a different beginning of the password field. + sep_search_end = begin + continue + if sep: + # Password was found; remove it. + output.insert(0, data[end:prev_begin]) + output.insert(0, '********') + output.insert(0, data[begin:sep + 1]) + prev_begin = begin + + return ''.join(output) + class AnsibleModule(object): @@ -1009,64 +1069,6 @@ class AnsibleModule(object): params = dict() return params - def _heuristic_log_sanitize(self, data): - ''' Remove strings that look like passwords from log messages ''' - # Currently filters: - # user:pass@foo/whatever and http://username:pass@wherever/foo - # This code has false positives and consumes parts of logs that are - # not passwds - - # begin: start of a passwd containing string - # end: end of a passwd containing string - # sep: char between user and passwd - # prev_begin: where in the overall string to start a search for - # a passwd - # sep_search_end: where in the string to end a search for the sep - output = [] - begin = len(data) - prev_begin = begin - sep = 1 - while sep: - # Find the potential end of a passwd - try: - end = data.rindex('@', 0, begin) - except ValueError: - # No passwd in the rest of the data - output.insert(0, data[0:begin]) - break - - # Search for the beginning of a passwd - sep = None - sep_search_end = end - while not sep: - # URL-style username+password - try: - begin = data.rindex('://', 0, sep_search_end) - except ValueError: - # No url style in the data, check for ssh style in the - # rest of the string - begin = 0 - # Search for separator - try: - sep = data.index(':', begin + 3, end) - except ValueError: - # No separator; choices: - if begin == 0: - # Searched the whole string so there's no password - # here. Return the remaining data - output.insert(0, data[0:begin]) - break - # Search for a different beginning of the password field. - sep_search_end = begin - continue - if sep: - # Password was found; remove it. - output.insert(0, data[end:prev_begin]) - output.insert(0, '********') - output.insert(0, data[begin:sep + 1]) - prev_begin = begin - - return ''.join(output) def _log_invocation(self): ''' log that ansible ran the module ''' @@ -1090,7 +1092,7 @@ class AnsibleModule(object): param_val = str(param_val) elif isinstance(param_val, unicode): param_val = param_val.encode('utf-8') - log_args[param] = self._heuristic_log_sanitize(param_val) + log_args[param] = heuristic_log_sanitize(param_val) module = 'ansible-%s' % os.path.basename(__file__) msg = [] @@ -1434,27 +1436,27 @@ class AnsibleModule(object): # create a printable version of the command for use # in reporting later, which strips out things like # passwords from the args list - if isinstance(args, list): - clean_args = " ".join(pipes.quote(arg) for arg in args) + if isinstance(args, basestring): + to_clean_args = shlex.split(args.encode('utf-8')) else: - clean_args = args + to_clean_args = args - # all clean strings should return two match groups, - # where the first is the CLI argument and the second - # is the password/key/phrase that will be hidden - clean_re_strings = [ - # this removes things like --password, --pass, --pass-wd, etc. - # optionally followed by an '=' or a space. The password can - # be quoted or not too, though it does not care about quotes - # that are not balanced - # source: http://blog.stevenlevithan.com/archives/match-quoted-string - r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)', - r'^(?P.*:)(?P.*)(?P\@.*)$', - # TODO: add more regex checks here - ] - for re_str in clean_re_strings: - r = re.compile(re_str) - clean_args = r.sub(r'\1********', clean_args) + clean_args = [] + is_passwd = False + for arg in to_clean_args: + if is_passwd: + is_passwd = False + clean_args.append('********') + continue + if PASSWD_ARG_RE.match(arg): + sep_idx = arg.find('=') + if sep_idx > -1: + clean_args.append('%s=********' % arg[:sep_idx]) + continue + else: + is_passwd = True + clean_args.append(heuristic_log_sanitize(arg)) + clean_args = ' '.join(pipes.quote(arg) for arg in clean_args) if data: st_in = subprocess.PIPE @@ -1539,7 +1541,7 @@ class AnsibleModule(object): self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args) if rc != 0 and check_rc: - msg = stderr.rstrip() + msg = heuristic_log_sanitize(stderr.rstrip()) self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg) # reset the pwd