Blame SOURCES/libvirt-util-assign-tap-device-names-using-a-monotonically-increasing-integer.patch

397dc2
From 37b1acb1c820421d62b1416d90138bae7961bfb7 Mon Sep 17 00:00:00 2001
397dc2
Message-Id: <37b1acb1c820421d62b1416d90138bae7961bfb7@dist-git>
397dc2
From: Laine Stump <laine@redhat.com>
397dc2
Date: Sat, 12 Dec 2020 22:04:52 -0500
397dc2
Subject: [PATCH] util: assign tap device names using a monotonically
397dc2
 increasing integer
397dc2
397dc2
When creating a standard tap device, if provided with an ifname that
397dc2
contains "%d", rather than taking that literally as the name to use
397dc2
for the new device, the kernel will instead use that string as a
397dc2
template, and search for the lowest number that could be put in place
397dc2
of %d and produce an otherwise unused and unique name for the new
397dc2
device. For example, if there is no tap device name given in the XML,
397dc2
libvirt will always send "vnet%d" as the device name, and the kernel
397dc2
will create new devices named "vnet0", "vnet1", etc. If one of those
397dc2
devices is deleted, creating a "hole" in the name list, the kernel
397dc2
will always attempt to reuse the name in the hole first before using a
397dc2
name with a higher number (i.e. it finds the lowest possible unused
397dc2
number).
397dc2
397dc2
The problem with this, as described in the previous patch dealing with
397dc2
macvtap device naming, is that it makes "immediate reuse" of a newly
397dc2
freed tap device name *much* more common, and in the aftermath of
397dc2
deleting a tap device, there is some other necessary cleanup of things
397dc2
which are named based on the device name (nwfilter rules, bandwidth
397dc2
rules, OVS switch ports, to name a few) that could end up stomping
397dc2
over the top of the setup of a new device of the same name for a
397dc2
different guest.
397dc2
397dc2
Since the kernel "create a name based on a template" functionality for
397dc2
tap devices doesn't exist for macvtap, this patch for standard tap
397dc2
devices is a bit different from the previous patch for macvtap - in
397dc2
particular there was no previous "bitmap ID reservation system" or
397dc2
overly-complex retry loop that needed to be removed. We simply find
397dc2
and unused name, and pass that name on to the kernel instead of
397dc2
"vnet%d".
397dc2
397dc2
This counter is also wrapped when either it gets to INT_MAX or if the
397dc2
full name would overflow IFNAMSIZ-1 characters. In the case of
397dc2
"vnet%d" and a 32 bit int, we would reach INT_MAX first, but possibly
397dc2
someday someone will change the name from vnet to something else.
397dc2
397dc2
(NB: It is still possible for a user to provide their own
397dc2
parameterized template name (e.g. "mytap%d") in the XML, and libvirt
397dc2
will just pass that through to the kernel as it always has.)
397dc2
397dc2
Signed-off-by: Laine Stump <laine@redhat.com>
397dc2
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
397dc2
(cherry picked from commit 95089f481e003d971fe0a082018216c58c1b80e5)
397dc2
397dc2
https://bugzilla.redhat.com/1874304
397dc2
Signed-off-by: Laine Stump <laine@redhat.com>
397dc2
Message-Id: <20201213030453.48851-3-laine@redhat.com>
397dc2
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
397dc2
---
397dc2
 src/libvirt_private.syms |   1 +
397dc2
 src/qemu/qemu_process.c  |  20 +++++++-
397dc2
 src/util/virnetdevtap.c  | 108 ++++++++++++++++++++++++++++++++++++++-
397dc2
 src/util/virnetdevtap.h  |   4 ++
397dc2
 4 files changed, 130 insertions(+), 3 deletions(-)
397dc2
397dc2
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
397dc2
index 1c66c40f86..d6598c2514 100644
397dc2
--- a/src/libvirt_private.syms
397dc2
+++ b/src/libvirt_private.syms
397dc2
@@ -2638,6 +2638,7 @@ virNetDevTapGetName;
397dc2
 virNetDevTapGetRealDeviceName;
397dc2
 virNetDevTapInterfaceStats;
397dc2
 virNetDevTapReattachBridge;
397dc2
+virNetDevTapReserveName;
397dc2
 
397dc2
 
397dc2
 # util/virnetdevveth.h
397dc2
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
397dc2
index b49a463c02..f90096e68d 100644
397dc2
--- a/src/qemu/qemu_process.c
397dc2
+++ b/src/qemu/qemu_process.c
397dc2
@@ -3287,8 +3287,26 @@ qemuProcessNotifyNets(virDomainDefPtr def)
397dc2
          * domain to be unceremoniously killed, which would be *very*
397dc2
          * impolite.
397dc2
          */
397dc2
-        if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_DIRECT)
397dc2
+        switch (virDomainNetGetActualType(net)) {
397dc2
+        case VIR_DOMAIN_NET_TYPE_DIRECT:
397dc2
             virNetDevMacVLanReserveName(net->ifname);
397dc2
+            break;
397dc2
+        case VIR_DOMAIN_NET_TYPE_BRIDGE:
397dc2
+        case VIR_DOMAIN_NET_TYPE_NETWORK:
397dc2
+        case VIR_DOMAIN_NET_TYPE_ETHERNET:
397dc2
+            virNetDevTapReserveName(net->ifname);
397dc2
+            break;
397dc2
+        case VIR_DOMAIN_NET_TYPE_USER:
397dc2
+        case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
397dc2
+        case VIR_DOMAIN_NET_TYPE_SERVER:
397dc2
+        case VIR_DOMAIN_NET_TYPE_CLIENT:
397dc2
+        case VIR_DOMAIN_NET_TYPE_MCAST:
397dc2
+        case VIR_DOMAIN_NET_TYPE_INTERNAL:
397dc2
+        case VIR_DOMAIN_NET_TYPE_HOSTDEV:
397dc2
+        case VIR_DOMAIN_NET_TYPE_UDP:
397dc2
+        case VIR_DOMAIN_NET_TYPE_LAST:
397dc2
+            break;
397dc2
+        }
397dc2
 
397dc2
         if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
397dc2
             if (!conn && !(conn = virGetConnectNetwork()))
397dc2
diff --git a/src/util/virnetdevtap.c b/src/util/virnetdevtap.c
397dc2
index 6a16b58d60..fd4b70df30 100644
397dc2
--- a/src/util/virnetdevtap.c
397dc2
+++ b/src/util/virnetdevtap.c
397dc2
@@ -45,11 +45,51 @@
397dc2
 #if defined(HAVE_GETIFADDRS) && defined(AF_LINK)
397dc2
 # include <ifaddrs.h>
397dc2
 #endif
397dc2
+#include <math.h>
397dc2
 
397dc2
 #define VIR_FROM_THIS VIR_FROM_NONE
397dc2
 
397dc2
 VIR_LOG_INIT("util.netdevtap");
397dc2
 
397dc2
+virMutex virNetDevTapCreateMutex = VIR_MUTEX_INITIALIZER;
397dc2
+static int virNetDevTapLastID = -1; /* not "unsigned" because callers use %d */
397dc2
+
397dc2
+
397dc2
+/**
397dc2
+ * virNetDevTapReserveName:
397dc2
+ * @name: name of an existing tap device
397dc2
+ *
397dc2
+ * Set the value of virNetDevTapLastID to assure that any new tap
397dc2
+ * device created with an autogenerated name will use a number higher
397dc2
+ * than the number in the given tap device name.
397dc2
+ *
397dc2
+ * Returns nothing.
397dc2
+ */
397dc2
+void
397dc2
+virNetDevTapReserveName(const char *name)
397dc2
+{
397dc2
+    unsigned int id;
397dc2
+    const char *idstr = NULL;
397dc2
+
397dc2
+
397dc2
+    if (STRPREFIX(name, VIR_NET_GENERATED_TAP_PREFIX)) {
397dc2
+
397dc2
+        VIR_INFO("marking device in use: '%s'", name);
397dc2
+
397dc2
+        idstr = name + strlen(VIR_NET_GENERATED_TAP_PREFIX);
397dc2
+
397dc2
+        if (virStrToLong_ui(idstr, NULL, 10, &id) >= 0) {
397dc2
+            virMutexLock(&virNetDevTapCreateMutex);
397dc2
+
397dc2
+            if (virNetDevTapLastID < (int)id)
397dc2
+                virNetDevTapLastID = id;
397dc2
+
397dc2
+            virMutexUnlock(&virNetDevTapCreateMutex);
397dc2
+        }
397dc2
+    }
397dc2
+}
397dc2
+
397dc2
+
397dc2
 /**
397dc2
  * virNetDevTapGetName:
397dc2
  * @tapfd: a tun/tap file descriptor
397dc2
@@ -200,6 +240,55 @@ virNetDevProbeVnetHdr(int tapfd)
397dc2
 
397dc2
 
397dc2
 #ifdef TUNSETIFF
397dc2
+/**
397dc2
+ * virNetDevTapGenerateName:
397dc2
+ * @ifname: pointer to pointer to string containing template
397dc2
+ *
397dc2
+ * generate a new (currently unused) name for a new tap device based
397dc2
+ * on the templace string in @ifname - replace %d with
397dc2
+ * ++virNetDevTapLastID, and keep trying new values until one is found
397dc2
+ * that doesn't already exist, or we've tried 10000 different
397dc2
+ * names. Once a usable name is found, replace the template with the
397dc2
+ * actual name.
397dc2
+ *
397dc2
+ * Returns 0 on success, -1 on failure.
397dc2
+ */
397dc2
+static int
397dc2
+virNetDevTapGenerateName(char **ifname)
397dc2
+{
397dc2
+    int id;
397dc2
+    double maxIDd = pow(10, IFNAMSIZ - 1 - strlen(VIR_NET_GENERATED_TAP_PREFIX));
397dc2
+    int maxID = INT_MAX;
397dc2
+    int attempts = 0;
397dc2
+
397dc2
+    if (maxIDd <= (double)INT_MAX)
397dc2
+        maxID = (int)maxIDd;
397dc2
+
397dc2
+    do {
397dc2
+        g_autofree char *try = NULL;
397dc2
+
397dc2
+        id = ++virNetDevTapLastID;
397dc2
+
397dc2
+        /* reset before overflow */
397dc2
+        if (virNetDevTapLastID >= maxID)
397dc2
+            virNetDevTapLastID = -1;
397dc2
+
397dc2
+        try = g_strdup_printf(*ifname, id);
397dc2
+
397dc2
+        if (!virNetDevExists(try)) {
397dc2
+            g_free(*ifname);
397dc2
+            *ifname = g_steal_pointer(&try;;
397dc2
+            return 0;
397dc2
+        }
397dc2
+    } while (++attempts < 10000);
397dc2
+
397dc2
+    virReportError(VIR_ERR_INTERNAL_ERROR,
397dc2
+                   _("no unused %s names available"),
397dc2
+                   VIR_NET_GENERATED_TAP_PREFIX);
397dc2
+    return -1;
397dc2
+}
397dc2
+
397dc2
+
397dc2
 /**
397dc2
  * virNetDevTapCreate:
397dc2
  * @ifname: the interface name
397dc2
@@ -226,10 +315,22 @@ int virNetDevTapCreate(char **ifname,
397dc2
                        size_t tapfdSize,
397dc2
                        unsigned int flags)
397dc2
 {
397dc2
-    size_t i;
397dc2
+    size_t i = 0;
397dc2
     struct ifreq ifr;
397dc2
     int ret = -1;
397dc2
-    int fd;
397dc2
+    int fd = 0;
397dc2
+
397dc2
+    virMutexLock(&virNetDevTapCreateMutex);
397dc2
+
397dc2
+    /* if ifname is "vnet%d", then auto-generate a name for the new
397dc2
+     * device (the kernel could do this for us, but has a bad habit of
397dc2
+     * immediately re-using names that have just been released, which
397dc2
+     * can lead to race conditions).
397dc2
+     */
397dc2
+    if (STREQ(*ifname, VIR_NET_GENERATED_TAP_PREFIX "%d") &&
397dc2
+        virNetDevTapGenerateName(ifname) < 0) {
397dc2
+        goto cleanup;
397dc2
+    }
397dc2
 
397dc2
     if (!tunpath)
397dc2
         tunpath = "/dev/net/tun";
397dc2
@@ -295,9 +396,11 @@ int virNetDevTapCreate(char **ifname,
397dc2
         tapfd[i] = fd;
397dc2
     }
397dc2
 
397dc2
+    VIR_INFO("created device: '%s'", *ifname);
397dc2
     ret = 0;
397dc2
 
397dc2
  cleanup:
397dc2
+    virMutexUnlock(&virNetDevTapCreateMutex);
397dc2
     if (ret < 0) {
397dc2
         VIR_FORCE_CLOSE(fd);
397dc2
         while (i--)
397dc2
@@ -347,6 +450,7 @@ int virNetDevTapDelete(const char *ifname,
397dc2
         goto cleanup;
397dc2
     }
397dc2
 
397dc2
+    VIR_INFO("delete device: '%s'", ifname);
397dc2
     ret = 0;
397dc2
 
397dc2
  cleanup:
397dc2
diff --git a/src/util/virnetdevtap.h b/src/util/virnetdevtap.h
397dc2
index cae8e61861..2994c9ca71 100644
397dc2
--- a/src/util/virnetdevtap.h
397dc2
+++ b/src/util/virnetdevtap.h
397dc2
@@ -29,6 +29,10 @@
397dc2
 # define VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP 1
397dc2
 #endif
397dc2
 
397dc2
+void
397dc2
+virNetDevTapReserveName(const char *name)
397dc2
+    ATTRIBUTE_NONNULL(1);
397dc2
+
397dc2
 int virNetDevTapCreate(char **ifname,
397dc2
                        const char *tunpath,
397dc2
                        int *tapfd,
397dc2
-- 
397dc2
2.29.2
397dc2