Reduce commenting as basic.py is copied to the remote host with every module transfer.

Move some comments to the unittest as we might look at that if we decide
to refactor the code.
This commit is contained in:
Toshio Kuratomi 2014-09-05 07:39:17 -07:00
parent d293a46994
commit 8233522a7a
2 changed files with 45 additions and 50 deletions

View file

@ -967,66 +967,50 @@ class AnsibleModule(object):
return (params2, args)
def _heuristic_log_sanitize(self, data):
''' Remove strings that look like passwords from log messages
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
Currently filters out things like:
* user:pass@foo/whatever
* http://username:pass@wherever/foo
Currently, the heuristics are subject to false positives. This could
change in the future should we decide that no_log is what we want to
push people towards.
'''
# Regexes can be too slow for this. Pathological cases for regexes
# seem to be large amounts of data with many ':' but no '@'
# This function gets slower when there are many replacements but not
# nearly as slow as the worst case for regexes. If we need the
# flexibility of regex's in the future, re.sub() is faster than
# re.match() + str.join(). We might be able to decide to use a regex
# strategy if we detect a large number of '@' symbolsand use this
# function otherwise.
# begin points to the beginning of a password containing string
# end points to the end of the password containing string
# sep points to the char in between username and password
# prev_begin keeps track of where in the string to start a new search
# for the end of a password substring
# sep_search_end keeps track of where in the string to end a search
# for the separator
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1 # Prime the pump with a sentinel value
sep = 1
while sep:
# Find the potential end of a password
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No end marker, so add the rest of the data
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of the password
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# Search for the beginning of a URL-style username+password
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# If we don't find that, then we default to the start of
# the string (b/c ssh-style username+password could
# be taking up all of this parameter).
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for a separator character inside of where we know the
# password might live.
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator, now we have choices:
# No separator; choices:
if begin == 0:
# We've searched the whole string so there's no
# password here. Return the remaining data
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
@ -1081,11 +1065,7 @@ class AnsibleModule(object):
# 6655 - allow for accented characters
if isinstance(msg, unicode):
# If we've done everything right up above msg should be type
# str, not type unicode here. This is partial protection in case
# we've done something wrong. But if we arrive here we can
# potentially get tracebacks in code up above (when mixing unicode
# and str together)
# We should never get here as msg should be type str, not unicode
msg = msg.encode('utf-8')
if (has_journal):

View file

@ -247,11 +247,21 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
#################################################################################
#
# Several speed tests -- previously, the obfuscation of passwords carried
# an unreasonable speed penalty for some module parameters. We want to be
# sure we don't regress to that state.
# Speed tests
#
# Previously, we used regexes which had some pathologically slow cases for
# parameters with large amounts of data with many ':' but no '@'. The
# present function gets slower when there are many replacements so we may
# want to explore regexes in the future (for the speed when substituting
# or flexibility). These speed tests will hopefully tell us if we're
# introducing code that has cases that are simply too slow.
#
# Some regex notes:
# * re.sub() is faster than re.match() + str.join().
# * We may be able to detect a large number of '@' symbols and then use
# a regex else use the present function.
@timed(5)
def test_log_sanitize_speed_many_url(self):
self.module._heuristic_log_sanitize(self.many_url)
@ -297,10 +307,15 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
# the same:
self.assertEqual(len(url_output), len(url_data))
# ssh checking is somewhat harder as the heuristic is overzealous in
# most cases. Since the input will have at least one ":" present
# before the password we can tell some things about the beginning and
# end of the data, though:
# ssh checking is harder as the heuristic is overzealous in many
# cases. Since the input will have at least one ":" present before
# the password we can tell some things about the beginning and end of
# the data, though:
self.assertTrue(ssh_output.startswith("{'"))
self.assertTrue(ssh_output.endswith("'}}}}"))
self.assertIn(":********@foo.com/data',", ssh_output)
# The overzealous-ness here may lead to us changing the algorithm in
# the future. We could make it consume less of the data (with the
# possiblity of leaving partial passwords exposed) and encourage
# people to use no_log instead of relying on this obfuscation.