sailesh1993 / rpms / cloud-init

Forked from rpms/cloud-init a year ago
Clone
16d12a
From f6dc3cf39a4884657478a47894ce8a76ec9a72c5 Mon Sep 17 00:00:00 2001
16d12a
From: Eduardo Otubo <otubo@redhat.com>
16d12a
Date: Wed, 24 Jun 2020 07:34:29 +0200
16d12a
Subject: [PATCH 1/4] ec2: Do not log IMDSv2 token values, instead use REDACTED
16d12a
 (#219)
16d12a
16d12a
RH-Author: Eduardo Otubo <otubo@redhat.com>
16d12a
Message-id: <20200505082940.18316-1-otubo@redhat.com>
16d12a
Patchwork-id: 96264
16d12a
O-Subject: [RHEL-7.9/RHEL-8.3 cloud-init PATCH] ec2: Do not log IMDSv2 token values, instead use REDACTED (#219)
16d12a
Bugzilla: 1822343
16d12a
RH-Acked-by: Cathy Avery <cavery@redhat.com>
16d12a
RH-Acked-by: Mohammed Gamal <mgamal@redhat.com>
16d12a
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
16d12a
16d12a
Note: There's no RHEL-8.3/cloud-init-19.4 branch yet, but it should be
16d12a
queued to be applied on top of it when it's created.
16d12a
16d12a
commit 87cd040ed8fe7195cbb357ed3bbf53cd2a81436c
16d12a
Author: Ryan Harper <ryan.harper@canonical.com>
16d12a
Date:   Wed Feb 19 15:01:09 2020 -0600
16d12a
16d12a
    ec2: Do not log IMDSv2 token values, instead use REDACTED (#219)
16d12a
16d12a
    Instead of logging the token values used log the headers and replace the actual
16d12a
    values with the string 'REDACTED'.  This allows users to examine cloud-init.log
16d12a
    and see that the IMDSv2 token header is being used but avoids leaving the value
16d12a
    used in the log file itself.
16d12a
16d12a
    LP: #1863943
16d12a
16d12a
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
16d12a
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
16d12a
---
16d12a
 cloudinit/ec2_utils.py                      | 12 ++++++++--
16d12a
 cloudinit/sources/DataSourceEc2.py          | 35 +++++++++++++++++++----------
16d12a
 cloudinit/url_helper.py                     | 27 ++++++++++++++++------
16d12a
 tests/unittests/test_datasource/test_ec2.py | 17 ++++++++++++++
16d12a
 4 files changed, 70 insertions(+), 21 deletions(-)
16d12a
16d12a
diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py
16d12a
index 57708c1..34acfe8 100644
16d12a
--- a/cloudinit/ec2_utils.py
16d12a
+++ b/cloudinit/ec2_utils.py
16d12a
@@ -142,7 +142,8 @@ def skip_retry_on_codes(status_codes, _request_args, cause):
16d12a
 def get_instance_userdata(api_version='latest',
16d12a
                           metadata_address='http://169.254.169.254',
16d12a
                           ssl_details=None, timeout=5, retries=5,
16d12a
-                          headers_cb=None, exception_cb=None):
16d12a
+                          headers_cb=None, headers_redact=None,
16d12a
+                          exception_cb=None):
16d12a
     ud_url = url_helper.combine_url(metadata_address, api_version)
16d12a
     ud_url = url_helper.combine_url(ud_url, 'user-data')
16d12a
     user_data = ''
16d12a
@@ -155,7 +156,8 @@ def get_instance_userdata(api_version='latest',
16d12a
                                              SKIP_USERDATA_CODES)
16d12a
         response = url_helper.read_file_or_url(
16d12a
             ud_url, ssl_details=ssl_details, timeout=timeout,
16d12a
-            retries=retries, exception_cb=exception_cb, headers_cb=headers_cb)
16d12a
+            retries=retries, exception_cb=exception_cb, headers_cb=headers_cb,
16d12a
+            headers_redact=headers_redact)
16d12a
         user_data = response.contents
16d12a
     except url_helper.UrlError as e:
16d12a
         if e.code not in SKIP_USERDATA_CODES:
16d12a
@@ -169,11 +171,13 @@ def _get_instance_metadata(tree, api_version='latest',
16d12a
                            metadata_address='http://169.254.169.254',
16d12a
                            ssl_details=None, timeout=5, retries=5,
16d12a
                            leaf_decoder=None, headers_cb=None,
16d12a
+                           headers_redact=None,
16d12a
                            exception_cb=None):
16d12a
     md_url = url_helper.combine_url(metadata_address, api_version, tree)
16d12a
     caller = functools.partial(
16d12a
         url_helper.read_file_or_url, ssl_details=ssl_details,
16d12a
         timeout=timeout, retries=retries, headers_cb=headers_cb,
16d12a
+        headers_redact=headers_redact,
16d12a
         exception_cb=exception_cb)
16d12a
 
16d12a
     def mcaller(url):
16d12a
@@ -197,6 +201,7 @@ def get_instance_metadata(api_version='latest',
16d12a
                           metadata_address='http://169.254.169.254',
16d12a
                           ssl_details=None, timeout=5, retries=5,
16d12a
                           leaf_decoder=None, headers_cb=None,
16d12a
+                          headers_redact=None,
16d12a
                           exception_cb=None):
16d12a
     # Note, 'meta-data' explicitly has trailing /.
16d12a
     # this is required for CloudStack (LP: #1356855)
16d12a
@@ -204,6 +209,7 @@ def get_instance_metadata(api_version='latest',
16d12a
                                   metadata_address=metadata_address,
16d12a
                                   ssl_details=ssl_details, timeout=timeout,
16d12a
                                   retries=retries, leaf_decoder=leaf_decoder,
16d12a
+                                  headers_redact=headers_redact,
16d12a
                                   headers_cb=headers_cb,
16d12a
                                   exception_cb=exception_cb)
16d12a
 
16d12a
@@ -212,12 +218,14 @@ def get_instance_identity(api_version='latest',
16d12a
                           metadata_address='http://169.254.169.254',
16d12a
                           ssl_details=None, timeout=5, retries=5,
16d12a
                           leaf_decoder=None, headers_cb=None,
16d12a
+                          headers_redact=None,
16d12a
                           exception_cb=None):
16d12a
     return _get_instance_metadata(tree='dynamic/instance-identity',
16d12a
                                   api_version=api_version,
16d12a
                                   metadata_address=metadata_address,
16d12a
                                   ssl_details=ssl_details, timeout=timeout,
16d12a
                                   retries=retries, leaf_decoder=leaf_decoder,
16d12a
+                                  headers_redact=headers_redact,
16d12a
                                   headers_cb=headers_cb,
16d12a
                                   exception_cb=exception_cb)
16d12a
 # vi: ts=4 expandtab
16d12a
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
16d12a
index b9f346a..0f2bfef 100644
16d12a
--- a/cloudinit/sources/DataSourceEc2.py
16d12a
+++ b/cloudinit/sources/DataSourceEc2.py
16d12a
@@ -31,6 +31,9 @@ STRICT_ID_DEFAULT = "warn"
16d12a
 API_TOKEN_ROUTE = 'latest/api/token'
16d12a
 API_TOKEN_DISABLED = '_ec2_disable_api_token'
16d12a
 AWS_TOKEN_TTL_SECONDS = '21600'
16d12a
+AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token'
16d12a
+AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds'
16d12a
+AWS_TOKEN_REDACT = [AWS_TOKEN_PUT_HEADER, AWS_TOKEN_REQ_HEADER]
16d12a
 
16d12a
 
16d12a
 class CloudNames(object):
16d12a
@@ -158,7 +161,8 @@ class DataSourceEc2(sources.DataSource):
16d12a
         for api_ver in self.extended_metadata_versions:
16d12a
             url = url_tmpl.format(self.metadata_address, api_ver)
16d12a
             try:
16d12a
-                resp = uhelp.readurl(url=url, headers=headers)
16d12a
+                resp = uhelp.readurl(url=url, headers=headers,
16d12a
+                                     headers_redact=AWS_TOKEN_REDACT)
16d12a
             except uhelp.UrlError as e:
16d12a
                 LOG.debug('url %s raised exception %s', url, e)
16d12a
             else:
16d12a
@@ -180,6 +184,7 @@ class DataSourceEc2(sources.DataSource):
16d12a
                 self.identity = ec2.get_instance_identity(
16d12a
                     api_version, self.metadata_address,
16d12a
                     headers_cb=self._get_headers,
16d12a
+                    headers_redact=AWS_TOKEN_REDACT,
16d12a
                     exception_cb=self._refresh_stale_aws_token_cb).get(
16d12a
                         'document', {})
16d12a
             return self.identity.get(
16d12a
@@ -205,7 +210,8 @@ class DataSourceEc2(sources.DataSource):
16d12a
         LOG.debug('Fetching Ec2 IMDSv2 API Token')
16d12a
         url, response = uhelp.wait_for_url(
16d12a
             urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb,
16d12a
-            headers_cb=self._get_headers, request_method=request_method)
16d12a
+            headers_cb=self._get_headers, request_method=request_method,
16d12a
+            headers_redact=AWS_TOKEN_REDACT)
16d12a
 
16d12a
         if url and response:
16d12a
             self._api_token = response
16d12a
@@ -252,7 +258,8 @@ class DataSourceEc2(sources.DataSource):
16d12a
             url, _ = uhelp.wait_for_url(
16d12a
                 urls=urls, max_wait=url_params.max_wait_seconds,
16d12a
                 timeout=url_params.timeout_seconds, status_cb=LOG.warning,
16d12a
-                headers_cb=self._get_headers, request_method=request_method)
16d12a
+                headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers,
16d12a
+                request_method=request_method)
16d12a
 
16d12a
             if url:
16d12a
                 metadata_address = url2base[url]
16d12a
@@ -420,6 +427,7 @@ class DataSourceEc2(sources.DataSource):
16d12a
         if not self.wait_for_metadata_service():
16d12a
             return {}
16d12a
         api_version = self.get_metadata_api_version()
16d12a
+        redact = AWS_TOKEN_REDACT
16d12a
         crawled_metadata = {}
16d12a
         if self.cloud_name == CloudNames.AWS:
16d12a
             exc_cb = self._refresh_stale_aws_token_cb
16d12a
@@ -429,14 +437,17 @@ class DataSourceEc2(sources.DataSource):
16d12a
         try:
16d12a
             crawled_metadata['user-data'] = ec2.get_instance_userdata(
16d12a
                 api_version, self.metadata_address,
16d12a
-                headers_cb=self._get_headers, exception_cb=exc_cb_ud)
16d12a
+                headers_cb=self._get_headers, headers_redact=redact,
16d12a
+                exception_cb=exc_cb_ud)
16d12a
             crawled_metadata['meta-data'] = ec2.get_instance_metadata(
16d12a
                 api_version, self.metadata_address,
16d12a
-                headers_cb=self._get_headers, exception_cb=exc_cb)
16d12a
+                headers_cb=self._get_headers, headers_redact=redact,
16d12a
+                exception_cb=exc_cb)
16d12a
             if self.cloud_name == CloudNames.AWS:
16d12a
                 identity = ec2.get_instance_identity(
16d12a
                     api_version, self.metadata_address,
16d12a
-                    headers_cb=self._get_headers, exception_cb=exc_cb)
16d12a
+                    headers_cb=self._get_headers, headers_redact=redact,
16d12a
+                    exception_cb=exc_cb)
16d12a
                 crawled_metadata['dynamic'] = {'instance-identity': identity}
16d12a
         except Exception:
16d12a
             util.logexc(
16d12a
@@ -455,11 +466,12 @@ class DataSourceEc2(sources.DataSource):
16d12a
         if self.cloud_name != CloudNames.AWS:
16d12a
             return None
16d12a
         LOG.debug("Refreshing Ec2 metadata API token")
16d12a
-        request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds}
16d12a
+        request_header = {AWS_TOKEN_REQ_HEADER: seconds}
16d12a
         token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE)
16d12a
         try:
16d12a
-            response = uhelp.readurl(
16d12a
-                token_url, headers=request_header, request_method="PUT")
16d12a
+            response = uhelp.readurl(token_url, headers=request_header,
16d12a
+                                     headers_redact=AWS_TOKEN_REDACT,
16d12a
+                                     request_method="PUT")
16d12a
         except uhelp.UrlError as e:
16d12a
             LOG.warning(
16d12a
                 'Unable to get API token: %s raised exception %s',
16d12a
@@ -500,8 +512,7 @@ class DataSourceEc2(sources.DataSource):
16d12a
                                                  API_TOKEN_DISABLED):
16d12a
             return {}
16d12a
         # Request a 6 hour token if URL is API_TOKEN_ROUTE
16d12a
-        request_token_header = {
16d12a
-            'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS}
16d12a
+        request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS}
16d12a
         if API_TOKEN_ROUTE in url:
16d12a
             return request_token_header
16d12a
         if not self._api_token:
16d12a
@@ -511,7 +522,7 @@ class DataSourceEc2(sources.DataSource):
16d12a
             self._api_token = self._refresh_api_token()
16d12a
             if not self._api_token:
16d12a
                 return {}
16d12a
-        return {'X-aws-ec2-metadata-token': self._api_token}
16d12a
+        return {AWS_TOKEN_PUT_HEADER: self._api_token}
16d12a
 
16d12a
 
16d12a
 class DataSourceEc2Local(DataSourceEc2):
16d12a
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
16d12a
index 1496a47..3e7de9f 100644
16d12a
--- a/cloudinit/url_helper.py
16d12a
+++ b/cloudinit/url_helper.py
16d12a
@@ -8,6 +8,7 @@
16d12a
 #
16d12a
 # This file is part of cloud-init. See LICENSE file for license information.
16d12a
 
16d12a
+import copy
16d12a
 import json
16d12a
 import os
16d12a
 import requests
16d12a
@@ -41,6 +42,7 @@ else:
16d12a
 SSL_ENABLED = False
16d12a
 CONFIG_ENABLED = False  # This was added in 0.7 (but taken out in >=1.0)
16d12a
 _REQ_VER = None
16d12a
+REDACTED = 'REDACTED'
16d12a
 try:
16d12a
     from distutils.version import LooseVersion
16d12a
     import pkg_resources
16d12a
@@ -199,9 +201,9 @@ def _get_ssl_args(url, ssl_details):
16d12a
 
16d12a
 
16d12a
 def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
16d12a
-            headers=None, headers_cb=None, ssl_details=None,
16d12a
-            check_status=True, allow_redirects=True, exception_cb=None,
16d12a
-            session=None, infinite=False, log_req_resp=True,
16d12a
+            headers=None, headers_cb=None, headers_redact=None,
16d12a
+            ssl_details=None, check_status=True, allow_redirects=True,
16d12a
+            exception_cb=None, session=None, infinite=False, log_req_resp=True,
16d12a
             request_method=None):
16d12a
     """Wrapper around requests.Session to read the url and retry if necessary
16d12a
 
16d12a
@@ -217,6 +219,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
16d12a
     :param headers: Optional dict of headers to send during request
16d12a
     :param headers_cb: Optional callable returning a dict of values to send as
16d12a
         headers during request
16d12a
+    :param headers_redact: Optional list of header names to redact from the log
16d12a
     :param ssl_details: Optional dict providing key_file, ca_certs, and
16d12a
         cert_file keys for use on in ssl connections.
16d12a
     :param check_status: Optional boolean set True to raise when HTTPError
16d12a
@@ -243,6 +246,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
16d12a
     req_args['method'] = request_method
16d12a
     if timeout is not None:
16d12a
         req_args['timeout'] = max(float(timeout), 0)
16d12a
+    if headers_redact is None:
16d12a
+        headers_redact = []
16d12a
     # It doesn't seem like config
16d12a
     # was added in older library versions (or newer ones either), thus we
16d12a
     # need to manually do the retries if it wasn't...
16d12a
@@ -287,6 +292,12 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
16d12a
             if k == 'data':
16d12a
                 continue
16d12a
             filtered_req_args[k] = v
16d12a
+            if k == 'headers':
16d12a
+                for hkey, _hval in v.items():
16d12a
+                    if hkey in headers_redact:
16d12a
+                        filtered_req_args[k][hkey] = (
16d12a
+                            copy.deepcopy(req_args[k][hkey]))
16d12a
+                        filtered_req_args[k][hkey] = REDACTED
16d12a
         try:
16d12a
 
16d12a
             if log_req_resp:
16d12a
@@ -339,8 +350,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
16d12a
     return None  # Should throw before this...
16d12a
 
16d12a
 
16d12a
-def wait_for_url(urls, max_wait=None, timeout=None,
16d12a
-                 status_cb=None, headers_cb=None, sleep_time=1,
16d12a
+def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None,
16d12a
+                 headers_cb=None, headers_redact=None, sleep_time=1,
16d12a
                  exception_cb=None, sleep_time_cb=None, request_method=None):
16d12a
     """
16d12a
     urls:      a list of urls to try
16d12a
@@ -352,6 +363,7 @@ def wait_for_url(urls, max_wait=None, timeout=None,
16d12a
     status_cb: call method with string message when a url is not available
16d12a
     headers_cb: call method with single argument of url to get headers
16d12a
                 for request.
16d12a
+    headers_redact: a list of header names to redact from the log
16d12a
     exception_cb: call method with 2 arguments 'msg' (per status_cb) and
16d12a
                   'exception', the exception that occurred.
16d12a
     sleep_time_cb: call method with 2 arguments (response, loop_n) that
16d12a
@@ -415,8 +427,9 @@ def wait_for_url(urls, max_wait=None, timeout=None,
16d12a
                     headers = {}
16d12a
 
16d12a
                 response = readurl(
16d12a
-                    url, headers=headers, timeout=timeout,
16d12a
-                    check_status=False, request_method=request_method)
16d12a
+                    url, headers=headers, headers_redact=headers_redact,
16d12a
+                    timeout=timeout, check_status=False,
16d12a
+                    request_method=request_method)
16d12a
                 if not response.contents:
16d12a
                     reason = "empty response [%s]" % (response.code)
16d12a
                     url_exc = UrlError(ValueError(reason), code=response.code,
16d12a
diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py
16d12a
index 34a089f..bd5bd4c 100644
16d12a
--- a/tests/unittests/test_datasource/test_ec2.py
16d12a
+++ b/tests/unittests/test_datasource/test_ec2.py
16d12a
@@ -429,6 +429,23 @@ class TestEc2(test_helpers.HttprettyTestCase):
16d12a
         self.assertTrue(ds.get_data())
16d12a
         self.assertFalse(ds.is_classic_instance())
16d12a
 
16d12a
+    def test_aws_token_redacted(self):
16d12a
+        """Verify that aws tokens are redacted when logged."""
16d12a
+        ds = self._setup_ds(
16d12a
+            platform_data=self.valid_platform_data,
16d12a
+            sys_cfg={'datasource': {'Ec2': {'strict_id': False}}},
16d12a
+            md={'md': DEFAULT_METADATA})
16d12a
+        self.assertTrue(ds.get_data())
16d12a
+        all_logs = self.logs.getvalue().splitlines()
16d12a
+        REDACT_TTL = "'X-aws-ec2-metadata-token-ttl-seconds': 'REDACTED'"
16d12a
+        REDACT_TOK = "'X-aws-ec2-metadata-token': 'REDACTED'"
16d12a
+        logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log]
16d12a
+        logs_with_redacted = [log for log in all_logs if REDACT_TOK in log]
16d12a
+        logs_with_token = [log for log in all_logs if 'API-TOKEN' in log]
16d12a
+        self.assertEqual(1, len(logs_with_redacted_ttl))
16d12a
+        self.assertEqual(79, len(logs_with_redacted))
16d12a
+        self.assertEqual(0, len(logs_with_token))
16d12a
+
16d12a
     @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
16d12a
     def test_valid_platform_with_strict_true(self, m_dhcp):
16d12a
         """Valid platform data should return true with strict_id true."""
16d12a
-- 
16d12a
1.8.3.1
16d12a