From 065bb52109527ff60f4d9e15fac537dd221e6cb2 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Mon, 31 Aug 2015 10:37:09 +0530 Subject: [PATCH] Be systematic about parsing and validating hostnames and addresses This adds a parse_address(pattern) utility function that returns (host,port), and uses it wherever where we accept IPv4 and IPv6 addresses and hostnames (or host patterns): the inventory parser the the add_host action plugin. It also introduces a more extensive set of unit tests that supersedes the old add_host unit tests (which didn't actually test add_host, but only the parsing function). --- lib/ansible/inventory/__init__.py | 24 +-- lib/ansible/inventory/expand_hosts.py | 2 +- lib/ansible/inventory/ini.py | 57 ++---- lib/ansible/parsing/utils/addresses.py | 200 +++++++++++++++++++++ lib/ansible/plugins/action/add_host.py | 36 +--- test/units/parsing/test_addresses.py | 56 ++++++ test/units/plugins/action/test_add_host.py | 47 ----- 7 files changed, 283 insertions(+), 139 deletions(-) create mode 100644 lib/ansible/parsing/utils/addresses.py create mode 100644 test/units/parsing/test_addresses.py delete mode 100644 test/units/plugins/action/test_add_host.py diff --git a/lib/ansible/inventory/__init__.py b/lib/ansible/inventory/__init__.py index 2eda3e6649..657084c724 100644 --- a/lib/ansible/inventory/__init__.py +++ b/lib/ansible/inventory/__init__.py @@ -33,6 +33,7 @@ from ansible.inventory.group import Group from ansible.inventory.host import Host from ansible.plugins import vars_loader from ansible.utils.vars import combine_vars +from ansible.parsing.utils.addresses import parse_address try: from __main__ import display @@ -83,27 +84,16 @@ class Inventory(object): host_list = host_list.split(",") host_list = [ h for h in host_list if h and h.strip() ] + self.parser = None + if host_list is None: - self.parser = None + pass elif isinstance(host_list, list): - self.parser = None all = Group('all') self.groups = [ all ] - ipv6_re = re.compile('\[([a-f:A-F0-9]*[%[0-z]+]?)\](?::(\d+))?') - for x in host_list: - m = ipv6_re.match(x) - if m: - all.add_host(Host(m.groups()[0], m.groups()[1])) - else: - if ":" in x: - tokens = x.rsplit(":", 1) - # if there is ':' in the address, then this is an ipv6 - if ':' in tokens[0]: - all.add_host(Host(x)) - else: - all.add_host(Host(tokens[0], tokens[1])) - else: - all.add_host(Host(x)) + for h in host_list: + (host, port) = parse_address(h, allow_ranges=False) + all.add_host(Host(host, port)) elif self._loader.path_exists(host_list): #TODO: switch this to a plugin loader and a 'condition' per plugin on which it should be tried, restoring 'inventory pllugins' if self._loader.is_directory(host_list): diff --git a/lib/ansible/inventory/expand_hosts.py b/lib/ansible/inventory/expand_hosts.py index 0d63ba08bb..7e1c127c27 100644 --- a/lib/ansible/inventory/expand_hosts.py +++ b/lib/ansible/inventory/expand_hosts.py @@ -44,7 +44,7 @@ def detect_range(line = None): Returnes True if the given line contains a pattern, else False. ''' - if 0 <= line.find("[") < line.find(":") < line.find("]"): + if '[' in line: return True else: return False diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index 36115c5797..3040804813 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -29,6 +29,7 @@ from ansible.inventory.host import Host from ansible.inventory.group import Group from ansible.inventory.expand_hosts import detect_range from ansible.inventory.expand_hosts import expand_hostname_range +from ansible.parsing.utils.addresses import parse_address from ansible.utils.unicode import to_unicode, to_bytes class InventoryParser(object): @@ -265,30 +266,20 @@ class InventoryParser(object): optional port number that applies to all of them. ''' - # Is a port number specified? - # - # This may be a mandatory :NN suffix on any square-bracketed expression - # (IPv6 address, IPv4 address, host name, host pattern), or an optional - # :NN suffix on an IPv4 address, host name, or pattern. IPv6 addresses - # must be in square brackets if a port is specified. + # Can the given hostpattern be parsed as a host with an optional port + # specification? - port = None + (pattern, port) = parse_address(hostpattern, allow_ranges=True) + if not pattern: + self._raise_error("Can't parse '%s' as host[:port]" % hostpattern) - for type in ['bracketed_hostport', 'hostport']: - m = self.patterns[type].match(hostpattern) - if m: - (hostpattern, port) = m.groups() - continue + # Once we have separated the pattern, we expand it into list of one or + # more hostnames, depending on whether it contains any [x:y] ranges. - # Now we're left with just the pattern, which results in a list of one - # or more hostnames, depending on whether it contains any [x:y] ranges. - # - # FIXME: We could be more strict here about validation. - - if detect_range(hostpattern): - hostnames = expand_hostname_range(hostpattern) + if detect_range(pattern): + hostnames = expand_hostname_range(pattern) else: - hostnames = [hostpattern] + hostnames = [pattern] return (hostnames, port) @@ -374,29 +365,3 @@ class InventoryParser(object): $ # end of the line ''', re.X ) - - # The following patterns match the various ways in which a port number - # may be specified on an IPv6 address, IPv4 address, hostname, or host - # pattern. All of the above may be enclosed in square brackets with a - # mandatory :NN suffix; or all but the first may be given without any - # brackets but with an :NN suffix. - - self.patterns['bracketed_hostport'] = re.compile( - r'''^ - \[(.+)\] # [host identifier] - :([0-9]+) # :port number - $ - ''', re.X - ) - - self.patterns['hostport'] = re.compile( - r'''^ - ((?: # We want to match: - [^:\[\]] # (a non-range character - | # ...or... - \[[^\]]*\] # a complete bracketed expression) - )*) # repeated as many times as possible - :([0-9]+) # followed by a port number - $ - ''', re.X - ) diff --git a/lib/ansible/parsing/utils/addresses.py b/lib/ansible/parsing/utils/addresses.py new file mode 100644 index 0000000000..3a128a3524 --- /dev/null +++ b/lib/ansible/parsing/utils/addresses.py @@ -0,0 +1,200 @@ +# Copyright 2015 Abhijit Menon-Sen +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import re + +# Components that match a numeric or alphanumeric begin:end or begin:end:step +# range expression inside square brackets. + +numeric_range = r''' + \[ + (?:[0-9]+:[0-9]+) # numeric begin:end + (?::[0-9]+)? # numeric :step (optional) + \] +''' + +alphanumeric_range = r''' + \[ + (?: + [a-z]:[a-z]| # one-char alphabetic range + [0-9]+:[0-9]+ # ...or a numeric one + ) + (?::[0-9]+)? # numeric :step (optional) + \] +''' + +# Components that match a 16-bit portion of an IPv6 address in hexadecimal +# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation +# (0..255) or an [x:y(:z)] numeric range. + +ipv6_component = r''' + (?: + [0-9a-f]{{1,4}}| # 0..ffff + {range} # or a numeric range + ) +'''.format(range=numeric_range) + +ipv4_component = r''' + (?: + [01]?[0-9]{{1,2}}| # 0..199 + 2[0-4][0-9]| # 200..249 + 25[0-5]| # 250..255 + {range} # or a numeric range + ) +'''.format(range=numeric_range) + +patterns = { + # This matches a square-bracketed expression with a port specification. What + # is inside the square brackets is validated later. + + 'bracketed_hostport': re.compile( + r'''^ + \[(.+)\] # [host identifier] + :([0-9]+) # :port number + $ + ''', re.X + ), + + # This matches a bare IPv4 address or hostname (or host pattern including + # [x:y(:z)] ranges) with a port specification. + + 'hostport': re.compile( + r'''^ + ((?: # We want to match: + [^:\[\]] # (a non-range character + | # ...or... + \[[^\]]*\] # a complete bracketed expression) + )*) # repeated as many times as possible + :([0-9]+) # followed by a port number + $ + ''', re.X + ), + + # This matches an IPv4 address, but also permits range expressions. + + 'ipv4': re.compile( + r'''^ + (?:{i4}\.){{3}}{i4} # Three parts followed by dots plus one + $ + '''.format(i4=ipv4_component), re.X|re.I + ), + + # This matches an IPv6 address, but also permits range expressions. + # + # This expression looks complex, but it really only spells out the various + # combinations in which the basic unit of an IPv6 address (0..ffff) can be + # written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such + # as ::ffff:192.0.2.3. + # + # Note that we can't just use ipaddress.ip_address() because we also have to + # accept ranges in place of each component. + + 'ipv6': re.compile( + r'''^ + (?:{0}:){{7}}{0}| # uncompressed: 1:2:3:4:5:6:7:8 + (?:{0}:){{1,6}}:| # compressed variants, which are all + (?:{0}:)(?:{0}){{1,6}}| # a::b for various lengths of a,b + (?:{0}:){{2}}(?::{0}){{1,5}}| + (?:{0}:){{3}}(?::{0}){{1,4}}| + (?:{0}:){{4}}(?::{0}){{1,3}}| + (?:{0}:){{5}}(?::{0}){{1,2}}| + (?:{0}:){{6}}(?::{0})| # ...all with 2 <= a+b <= 7 + :(?::{0}){{1,6}}| # ::ffff(:ffff...) + {0}?::| # ffff::, :: + # ipv4-in-ipv6 variants + (?:0:){{6}}(?:{0}\.){{3}}{0}| + ::(?:ffff:)?(?:{0}\.){{3}}{0}| + (?:0:){{5}}ffff:(?:{0}\.){{3}}{0} + $ + '''.format(ipv6_component), re.X|re.I + ), + + # This matches a hostname or host pattern including [x:y(:z)] ranges. + # + # We roughly follow DNS rules here, but also allow ranges (and underscores). + # In the past, no systematic rules were enforced about inventory hostnames, + # but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded + # various metacharacters anyway. + # + # We don't enforce DNS length restrictions here (63 characters per label, + # 253 characters total) or make any attempt to process IDNs. + + 'hostname': re.compile( + r'''^ # We need at least one label, + (?: # which comprises: + [a-z0-9_-]| # (a valid domain label character + {0} # or a bracketed alphanumeric range) + )+ + (?:\.(?:[a-z0-9_-]|{0})+)* # Followed by zero or more .labels + $ + '''.format(alphanumeric_range), re.X|re.I + ), +} + +def parse_address(address, allow_ranges=False): + """ + Takes a string and returns a (host, port) tuple. If the host is None, then + the string could not be parsed as a host identifier with an optional port + specification. If the port is None, then no port was specified. + + The host identifier may be a hostname (qualified or not), an IPv4 address, + or an IPv6 address. If allow_ranges is True, then any of those may contain + [x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z]. + + The port number is an optional :NN suffix on an IPv4 address or host name, + or a mandatory :NN suffix on any square-bracketed expression: IPv6 address, + IPv4 address, or host name. (This means the only way to specify a port for + an IPv6 address is to enclose it in square brackets.) + """ + + # First, we extract the port number if one is specified. + + port = None + for type in ['bracketed_hostport', 'hostport']: + m = patterns[type].match(address) + if m: + (address, port) = m.groups() + port = int(port) + continue + + # What we're left with now must be an IPv4 or IPv6 address, possibly with + # numeric ranges, or a hostname with alphanumeric ranges. + + host = None + for type in ['ipv4', 'ipv6', 'hostname']: + m = patterns[type].match(address) + if m: + host = address + continue + + # If it isn't any of the above, we don't understand it. + + if not host: + return (None, None) + + # If we get to this point, we know that any included ranges are valid. If + # the caller is prepared to handle them, all is well. Otherwise we treat + # it as a parse failure. + + if not allow_ranges and '[' in host: + return (None, None) + + return (host, port) diff --git a/lib/ansible/plugins/action/add_host.py b/lib/ansible/plugins/action/add_host.py index cf2dab1737..0e7d3187e5 100644 --- a/lib/ansible/plugins/action/add_host.py +++ b/lib/ansible/plugins/action/add_host.py @@ -23,6 +23,8 @@ __metaclass__ = type import re from ansible.plugins.action import ActionBase +from ansible.parsing.utils.addresses import parse_address +from ansible.errors import AnsibleError, AnsibleParserError class ActionModule(ActionBase): ''' Create inventory hosts and groups in the memory inventory''' @@ -40,9 +42,11 @@ class ActionModule(ActionBase): new_name = self._task.args.get('name', self._task.args.get('hostname', None)) #vv("creating host via 'add_host': hostname=%s" % new_name) - new_name, new_port = _parse_ip_host_and_port(new_name) - if new_port: - self._task.args['ansible_ssh_port'] = new_port + name, port = parse_address(new_name, allow_ranges=False) + if not name: + raise AnsibleError("Invalid inventory hostname: %s" % new_name) + if port: + self._task.args['ansible_ssh_port'] = port groups = self._task.args.get('groupname', self._task.args.get('groups', self._task.args.get('group', ''))) # add it to the group if that was specified @@ -58,28 +62,4 @@ class ActionModule(ActionBase): if not k in [ 'name', 'hostname', 'groupname', 'groups' ]: host_vars[k] = self._task.args[k] - return dict(changed=True, add_host=dict(host_name=new_name, groups=new_groups, host_vars=host_vars)) - -def _parse_ip_host_and_port(hostname): - """ - Attempt to parse the hostname and port from a hostname, e.g., - - some-host-name - some-host-name:80 - 8.8.8.8 - 8.8.8.8:80 - 2001:db8:0:1 - [2001:db8:0:1]:80 - """ - if hostname.count(':') > 1: - match = re.match( - '\[(?P[^\]]+)\](:(?P[0-9]+))?', - hostname - ) - if match: - return match.group('ip'), match.group('port') - else: - return hostname, None - elif ':' in hostname: - return hostname.rsplit(':', 1) - return hostname, None + return dict(changed=True, add_host=dict(host_name=name, groups=new_groups, host_vars=host_vars)) diff --git a/test/units/parsing/test_addresses.py b/test/units/parsing/test_addresses.py new file mode 100644 index 0000000000..2bfb110218 --- /dev/null +++ b/test/units/parsing/test_addresses.py @@ -0,0 +1,56 @@ +import unittest + +from ansible.parsing.utils.addresses import parse_address + +class TestParseAddress(unittest.TestCase): + + tests = { + # IPv4 addresses + '192.0.2.3': ['192.0.2.3', None], + '192.0.2.3:23': ['192.0.2.3', 23], + + # IPv6 addresses + '::': ['::', None], + '::1': ['::1', None], + '[::1]:442': ['::1', 442], + 'abcd:ef98:7654:3210:abcd:ef98:7654:3210': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', None], + '[abcd:ef98:7654:3210:abcd:ef98:7654:3210]:42': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', 42], + + # Hostnames + 'some-host': ['some-host', None], + 'some-host:80': ['some-host', 80], + 'some.host.com:492': ['some.host.com', 492], + '[some.host.com]:493': ['some.host.com', 493], + + # Various errors + '': [None, None], + 'some..host': [None, None], + 'some.': [None, None], + '[example.com]': [None, None], + } + + range_tests = { + '192.0.2.[3:10]': ['192.0.2.[3:10]', None], + '192.0.2.[3:10]:23': ['192.0.2.[3:10]', 23], + 'abcd:ef98::7654:[1:9]': ['abcd:ef98::7654:[1:9]', None], + '[abcd:ef98::7654:[6:32]]:2222': ['abcd:ef98::7654:[6:32]', 2222], + 'foo[a:b]:42': ['foo[a:b]', 42], + 'foo[a-b]:32': [None, None], + 'foo[x-y]': [None, None], + } + + def test_without_ranges(self): + for t in self.tests: + test = self.tests[t] + + (host, port) = parse_address(t) + assert host == test[0] + assert port == test[1] + + def test_with_ranges(self): + for t in self.range_tests: + test = self.range_tests[t] + + (host, port) = parse_address(t, allow_ranges=True) + assert host == test[0] + assert port == test[1] diff --git a/test/units/plugins/action/test_add_host.py b/test/units/plugins/action/test_add_host.py deleted file mode 100644 index c694d387a3..0000000000 --- a/test/units/plugins/action/test_add_host.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest - -from ansible.plugins.action import add_host - - -class TestAddHost(unittest.TestCase): - - def test_hostname(self): - host, port = add_host._parse_ip_host_and_port('some-remote-host') - assert host == 'some-remote-host' - assert port is None - - def test_hostname_with_port(self): - host, port = add_host._parse_ip_host_and_port('some-remote-host:80') - assert host == 'some-remote-host' - assert port == '80' - - def test_parse_ip_host_and_port_v4(self): - host, port = add_host._parse_ip_host_and_port('8.8.8.8') - assert host == '8.8.8.8' - assert port is None - - def test_parse_ip_host_and_port_v4_and_port(self): - host, port = add_host._parse_ip_host_and_port('8.8.8.8:80') - assert host == '8.8.8.8' - assert port == '80' - - def test_parse_ip_host_and_port_v6(self): - host, port = add_host._parse_ip_host_and_port( - 'dead:beef:dead:beef:dead:beef:dead:beef' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port is None - - def test_parse_ip_host_and_port_v6_with_brackets(self): - host, port = add_host._parse_ip_host_and_port( - '[dead:beef:dead:beef:dead:beef:dead:beef]' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port is None - - def test_parse_ip_host_and_port_v6_with_brackets_and_port(self): - host, port = add_host._parse_ip_host_and_port( - '[dead:beef:dead:beef:dead:beef:dead:beef]:80' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port == '80'