Fix bug (#18355) where encrypted inventories fail 18355 (#18373)

* Fix bug (#18355) where encrypted inventories fail This is first part of fix for #18355 * Make DataLoader._get_file_contents return bytes The issue #18355 is caused by a change to inventory to stop using _get_file_contents so that it can handle text encoding itself to better protect against harmless text encoding errors in ini files (invalid unicode text in comment fields). So this makes _get_file_contents return bytes so it and other callers can handle the to_text(). The data returned by _get_file_contents() is now a bytes object instead of a text object. The callers of _get_file_contents() have been updated to call to_text() themselves on the results. Previously, the ini parser attempted to work around ini files that potentially include non-vailid unicode in comment lines. To do this, it stopped using DataLoader._get_file_contents() which does the decryption of files if vault encrypted. It didn't use that because _get_file_contents previously did to_text() on the read data itself. _get_file_contents() returns a bytestring now, so ini.py can call it and still special case ini file comments when converting to_text(). That also means encrypted inventory files are decrypted first. Fixes #18355
2016-11-07 10:07:26 -05:00 · 2016-11-07 10:07:26 -05:00 · dd0189839e
commit dd0189839e
parent 5aaf1d1a15
6 changed files with 32 additions and 20 deletions
--- a/lib/ansible/inventory/ini.py
+++ b/lib/ansible/inventory/ini.py
@ -51,18 +51,21 @@ class InventoryParser(object):

        # Read in the hosts, groups, and variables defined in the
        # inventory file.
+        if loader:
+            (b_data, private) = loader._get_file_contents(filename)
+        else:
+            with open(filename, 'rb') as fh:
+                b_data = fh.read()

-        with open(filename, 'rb') as fh:
-            data = fh.read()
-            try:
-                # Faster to do to_text once on a long string than many
-                # times on smaller strings
-                data = to_text(data, errors='surrogate_or_strict')
-                data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
-            except UnicodeError:
-                # Skip comment lines here to avoid potential undecodable
-                # errors in comments: https://github.com/ansible/ansible/issues/17593
-                data = [to_text(line, errors='surrogate_or_strict') for line in data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]
+        try:
+            # Faster to do to_text once on a long string than many
+            # times on smaller strings
+            data = to_text(b_data, errors='surrogate_or_strict')
+            data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
+        except UnicodeError:
+            # Skip comment lines here to avoid potential undecodable
+            # errors in comments: https://github.com/ansible/ansible/issues/17593
+            data = [to_text(line, errors='surrogate_or_strict') for line in b_data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]

        self._parse(data)

--- a/lib/ansible/parsing/dataloader.py
+++ b/lib/ansible/parsing/dataloader.py
@ -116,7 +116,9 @@ class DataLoader():
            parsed_data = self._FILE_CACHE[file_name]
        else:
            # read the file contents and load the data structure from them
-            (file_data, show_content) = self._get_file_contents(file_name)
+            (b_file_data, show_content) = self._get_file_contents(file_name)
+
+            file_data = to_text(b_file_data, errors='surrogate_or_strict')
            parsed_data = self.load(data=file_data, file_name=file_name, show_content=show_content)

            # cache the file contents for next time
@ -178,7 +180,6 @@ class DataLoader():
                    data = self._vault.decrypt(data, filename=b_file_name)
                    show_content = False

-            data = to_text(data, errors='surrogate_or_strict')
            return (data, show_content)

        except (IOError, OSError) as e:
--- a/lib/ansible/plugins/action/include_vars.py
+++ b/lib/ansible/plugins/action/include_vars.py
@ -22,7 +22,7 @@ from os import path, walk
 import re

 from ansible.errors import AnsibleError
-from ansible.module_utils._text import to_native
+from ansible.module_utils._text import to_native, to_text
 from ansible.plugins.action import ActionBase


@ -245,7 +245,9 @@ class ActionModule(ActionBase):
            )
            return failed, err_msg, results

-        data, show_content = self._loader._get_file_contents(filename)
+        b_data, show_content = self._loader._get_file_contents(filename)
+        data = to_text(b_data, errors='surrogate_or_strict')
+
        self.show_content = show_content
        data = self._loader.load(data, show_content)
        if not data:
--- a/lib/ansible/plugins/lookup/file.py
+++ b/lib/ansible/plugins/lookup/file.py
@ -19,6 +19,7 @@ __metaclass__ = type

 from ansible.errors import AnsibleError, AnsibleParserError
 from ansible.plugins.lookup import LookupBase
+from ansible.module_utils._text import to_text

 try:
    from __main__ import display
@ -41,7 +42,8 @@ class LookupModule(LookupBase):
            display.vvvv(u"File lookup using %s as file" % lookupfile)
            try:
                if lookupfile:
-                    contents, show_data = self._loader._get_file_contents(lookupfile)
+                    b_contents, show_data = self._loader._get_file_contents(lookupfile)
+                    contents = to_text(b_contents, errors='surrogate_or_strict')
                    ret.append(contents.rstrip())
                else:
                    raise AnsibleParserError()
--- a/test/units/mock/loader.py
+++ b/test/units/mock/loader.py
@ -23,6 +23,8 @@ import os

 from ansible.errors import AnsibleParserError
 from ansible.parsing.dataloader import DataLoader
+from ansible.module_utils._text import to_bytes
+

 class DictDataLoader(DataLoader):

@ -39,9 +41,11 @@ class DictDataLoader(DataLoader):
            return self.load(self._file_mapping[path], path)
        return None

+    # TODO: the real _get_file_contents returns a bytestring, so we actually convert the
+    #       unicode/text it's created with to utf-8
    def _get_file_contents(self, path):
        if path in self._file_mapping:
-            return (self._file_mapping[path], False)
+            return (to_bytes(self._file_mapping[path]), False)
        else:
            raise AnsibleParserError("file not found: %s" % path)

--- a/test/units/parsing/test_data_loader.py
+++ b/test/units/parsing/test_data_loader.py
@ -37,13 +37,13 @@ class TestDataLoader(unittest.TestCase):

    @patch.object(DataLoader, '_get_file_contents')
    def test_parse_json_from_file(self, mock_def):
-        mock_def.return_value = ("""{"a": 1, "b": 2, "c": 3}""", True)
+        mock_def.return_value = (b"""{"a": 1, "b": 2, "c": 3}""", True)
        output = self._loader.load_from_file('dummy_json.txt')
        self.assertEqual(output, dict(a=1,b=2,c=3))

    @patch.object(DataLoader, '_get_file_contents')
    def test_parse_yaml_from_file(self, mock_def):
-        mock_def.return_value = ("""
+        mock_def.return_value = (b"""
        a: 1
        b: 2
        c: 3
@ -53,7 +53,7 @@ class TestDataLoader(unittest.TestCase):

    @patch.object(DataLoader, '_get_file_contents')
    def test_parse_fail_from_file(self, mock_def):
-        mock_def.return_value = ("""
+        mock_def.return_value = (b"""
        TEXT:
            ***
               NOT VALID