18322d
From ae9b545cef4a68dfb9f9356dd27e43ff71ec26aa Mon Sep 17 00:00:00 2001
18322d
From: Eduardo Otubo <otubo@redhat.com>
18322d
Date: Wed, 29 May 2019 13:41:45 +0200
18322d
Subject: [PATCH 1/5] Azure: Ensure platform random_seed is always serializable
18322d
 as JSON.
18322d
18322d
RH-Author: Eduardo Otubo <otubo@redhat.com>
18322d
Message-id: <20190529134149.842-2-otubo@redhat.com>
18322d
Patchwork-id: 88272
18322d
O-Subject: [RHEL-8.0.1/RHEL-8.1.0 cloud-init PATCHv2 1/5] Azure: Ensure platform random_seed is always serializable as JSON.
18322d
Bugzilla: 1691986
18322d
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
18322d
RH-Acked-by: Cathy Avery <cavery@redhat.com>
18322d
18322d
From: "Jason Zions (MSFT)" <jasonzio@microsoft.com>
18322d
commit 0dc3a77f41f4544e4cb5a41637af7693410d4cdf
18322d
Author: Jason Zions (MSFT) <jasonzio@microsoft.com>
18322d
Date:   Tue Mar 26 18:53:50 2019 +0000
18322d
18322d
    Azure: Ensure platform random_seed is always serializable as JSON.
18322d
18322d
    The Azure platform surfaces random bytes into /sys via Hyper-V.
18322d
    Python 2.7 json.dump() raises an exception if asked to convert
18322d
    a str with non-character content, and python 3.0 json.dump()
18322d
    won't serialize a "bytes" value. As a result, c-i instance
18322d
    data is often not written by Azure, making reboots slower (c-i
18322d
    has to repeat work).
18322d
18322d
    The random data is base64-encoded and then decoded into a string
18322d
    (str or unicode depending on the version of Python in use). The
18322d
    base64 string has just as many bits of entropy, so we're not
18322d
    throwing away useful "information", but we can be certain
18322d
    json.dump() will correctly serialize the bits.
18322d
18322d
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
18322d
18322d
Conflicts:
18322d
    tests/unittests/test_datasource/test_azure.py
18322d
    Skipped the commit edf052c as it removes support for python-2.6
18322d
18322d
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
18322d
---
18322d
 cloudinit/sources/DataSourceAzure.py          | 24 +++++++++++++++++++-----
18322d
 tests/data/azure/non_unicode_random_string    |  1 +
18322d
 tests/unittests/test_datasource/test_azure.py | 24 ++++++++++++++++++++++--
18322d
 3 files changed, 42 insertions(+), 7 deletions(-)
18322d
 create mode 100644 tests/data/azure/non_unicode_random_string
18322d
18322d
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
18322d
index 2062ca5..a768b2c 100644
18322d
--- a/cloudinit/sources/DataSourceAzure.py
18322d
+++ b/cloudinit/sources/DataSourceAzure.py
18322d
@@ -54,6 +54,7 @@ REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
18322d
 REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
18322d
 AGENT_SEED_DIR = '/var/lib/waagent'
18322d
 IMDS_URL = "http://169.254.169.254/metadata/"
18322d
+PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
18322d
 
18322d
 # List of static scripts and network config artifacts created by
18322d
 # stock ubuntu suported images.
18322d
@@ -195,6 +196,8 @@ if util.is_FreeBSD():
18322d
         RESOURCE_DISK_PATH = "/dev/" + res_disk
18322d
     else:
18322d
         LOG.debug("resource disk is None")
18322d
+    # TODO Find where platform entropy data is surfaced
18322d
+    PLATFORM_ENTROPY_SOURCE = None
18322d
 
18322d
 BUILTIN_DS_CONFIG = {
18322d
     'agent_command': AGENT_START_BUILTIN,
18322d
@@ -1100,16 +1103,27 @@ def _check_freebsd_cdrom(cdrom_dev):
18322d
     return False
18322d
 
18322d
 
18322d
-def _get_random_seed():
18322d
+def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):
18322d
     """Return content random seed file if available, otherwise,
18322d
        return None."""
18322d
     # azure / hyper-v provides random data here
18322d
-    # TODO. find the seed on FreeBSD platform
18322d
     # now update ds_cfg to reflect contents pass in config
18322d
-    if util.is_FreeBSD():
18322d
+    if source is None:
18322d
         return None
18322d
-    return util.load_file("/sys/firmware/acpi/tables/OEM0",
18322d
-                          quiet=True, decode=False)
18322d
+    seed = util.load_file(source, quiet=True, decode=False)
18322d
+
18322d
+    # The seed generally contains non-Unicode characters. load_file puts
18322d
+    # them into a str (in python 2) or bytes (in python 3). In python 2,
18322d
+    # bad octets in a str cause util.json_dumps() to throw an exception. In
18322d
+    # python 3, bytes is a non-serializable type, and the handler load_file
18322d
+    # uses applies b64 encoding *again* to handle it. The simplest solution
18322d
+    # is to just b64encode the data and then decode it to a serializable
18322d
+    # string. Same number of bits of entropy, just with 25% more zeroes.
18322d
+    # There's no need to undo this base64-encoding when the random seed is
18322d
+    # actually used in cc_seed_random.py.
18322d
+    seed = base64.b64encode(seed).decode()
18322d
+
18322d
+    return seed
18322d
 
18322d
 
18322d
 def list_possible_azure_ds_devs():
18322d
diff --git a/tests/data/azure/non_unicode_random_string b/tests/data/azure/non_unicode_random_string
18322d
new file mode 100644
18322d
index 0000000..b9ecefb
18322d
--- /dev/null
18322d
+++ b/tests/data/azure/non_unicode_random_string
18322d
@@ -0,0 +1 @@
18322d
+OEM0d\x00\x00\x00\x01\x80VRTUALMICROSFT\x02\x17\x00\x06MSFT\x97\x00\x00\x00C\xb4{V\xf4X%\x061x\x90\x1c\xfen\x86\xbf~\xf5\x8c\x94&\x88\xed\x84\xf9B\xbd\xd3\xf1\xdb\xee:\xd9\x0fc\x0e\x83(\xbd\xe3'\xfc\x85,\xdf\xf4\x13\x99N\xc5\xf3Y\x1e\xe3\x0b\xa4H\x08J\xb9\xdcdb$
18322d
\ No newline at end of file
18322d
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
18322d
index 417d86a..eacf225 100644
18322d
--- a/tests/unittests/test_datasource/test_azure.py
18322d
+++ b/tests/unittests/test_datasource/test_azure.py
18322d
@@ -7,11 +7,11 @@ from cloudinit.sources import (
18322d
     UNSET, DataSourceAzure as dsaz, InvalidMetaDataException)
18322d
 from cloudinit.util import (b64e, decode_binary, load_file, write_file,
18322d
                             find_freebsd_part, get_path_dev_freebsd,
18322d
-                            MountFailedError)
18322d
+                            MountFailedError, json_dumps, load_json)
18322d
 from cloudinit.version import version_string as vs
18322d
 from cloudinit.tests.helpers import (
18322d
     HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
18322d
-    ExitStack, PY26, SkipTest)
18322d
+    ExitStack, PY26, SkipTest, resourceLocation)
18322d
 
18322d
 import crypt
18322d
 import httpretty
18322d
@@ -1924,4 +1924,24 @@ class TestWBIsPlatformViable(CiTestCase):
18322d
             self.logs.getvalue())
18322d
 
18322d
 
18322d
+class TestRandomSeed(CiTestCase):
18322d
+    """Test proper handling of random_seed"""
18322d
+
18322d
+    def test_non_ascii_seed_is_serializable(self):
18322d
+        """Pass if a random string from the Azure infrastructure which
18322d
+        contains at least one non-Unicode character can be converted to/from
18322d
+        JSON without alteration and without throwing an exception.
18322d
+        """
18322d
+        path = resourceLocation("azure/non_unicode_random_string")
18322d
+        result = dsaz._get_random_seed(path)
18322d
+
18322d
+        obj = {'seed': result}
18322d
+        try:
18322d
+            serialized = json_dumps(obj)
18322d
+            deserialized = load_json(serialized)
18322d
+        except UnicodeDecodeError:
18322d
+            self.fail("Non-serializable random seed returned")
18322d
+
18322d
+        self.assertEqual(deserialized['seed'], result)
18322d
+
18322d
 # vi: ts=4 expandtab
18322d
-- 
18322d
1.8.3.1
18322d