176ecd
From 5010c42c47b7b5a3d68d83369d6c17ed0bc11cff Mon Sep 17 00:00:00 2001
176ecd
From: Petr Mensik <pemensik@redhat.com>
176ecd
Date: Wed, 17 Feb 2021 11:47:28 +0100
176ecd
Subject: [PATCH] Correct occasional --bind-dynamic synchronization break
176ecd
176ecd
Request only one re-read of addresses and/or routes
176ecd
176ecd
Previous implementation re-reads systemd addresses exactly the same
176ecd
number of time equal number of notifications received.
176ecd
This is not necessary, we need just notification of change, then re-read
176ecd
the current state and adapt listeners. Repeated re-reading slows netlink
176ecd
processing and highers CPU usage on mass interface changes.
176ecd
176ecd
Continue reading multicast events from netlink, even when ENOBUFS
176ecd
arrive. Broadcasts are not trusted anyway and refresh would be done in
176ecd
iface_enumerate. Save queued events sent again.
176ecd
176ecd
Remove sleeping on netlink ENOBUFS
176ecd
176ecd
With reduced number of written events netlink should receive ENOBUFS
176ecd
rarely. It does not make sense to wait if it is received. It is just a
176ecd
signal some packets got missing. Fast reading all pending packets is required,
176ecd
seq checking ensures it already. Finishes changes by
176ecd
commit 1d07667ac77c55b9de56b1b2c385167e0e0ec27a.
176ecd
176ecd
Move restart from iface_enumerate to enumerate_interfaces
176ecd
176ecd
When ENOBUFS is received, restart of reading addresses is done. But
176ecd
previously found addresses might not have been found this time. In order
176ecd
to catch this, restart both IPv4 and IPv6 enumeration with clearing
176ecd
found interfaces first. It should deliver up-to-date state also after
176ecd
ENOBUFS.
176ecd
176ecd
Read all netlink messages before netlink restart
176ecd
176ecd
Before writing again into netlink socket, try fetching all pending
176ecd
messages. They would be ignored, only might trigger new address
176ecd
synchronization. Should ensure new try has better chance to succeed.
176ecd
176ecd
Request sending ENOBUFS again
176ecd
176ecd
ENOBUFS error handling was improved. Netlink is correctly drained before
176ecd
sending a new request again. It seems ENOBUFS supression is no longer
176ecd
necessary or wanted. Let kernel tell us when it failed and handle it a
176ecd
good way.
176ecd
---
176ecd
 src/netlink.c | 67 ++++++++++++++++++++++++++++++++++++---------------
176ecd
 src/network.c | 11 +++++++--
176ecd
 2 files changed, 57 insertions(+), 21 deletions(-)
176ecd
176ecd
diff --git a/src/netlink.c b/src/netlink.c
176ecd
index ac1a1c5..f95f3e8 100644
176ecd
--- a/src/netlink.c
176ecd
+++ b/src/netlink.c
176ecd
@@ -32,13 +32,21 @@
176ecd
 
176ecd
 #ifndef NDA_RTA
176ecd
 #  define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 
176ecd
-#endif 
176ecd
+#endif
176ecd
+
176ecd
+/* Used to request refresh of addresses or routes just once,
176ecd
+ * when multiple changes might be announced. */
176ecd
+enum async_states {
176ecd
+  STATE_NEWADDR = (1 << 0),
176ecd
+  STATE_NEWROUTE = (1 << 1),
176ecd
+};
176ecd
 
176ecd
 
176ecd
 static struct iovec iov;
176ecd
 static u32 netlink_pid;
176ecd
 
176ecd
-static void nl_async(struct nlmsghdr *h);
176ecd
+static unsigned nl_async(struct nlmsghdr *h, unsigned state);
176ecd
+static void nl_multicast_state(unsigned state);
176ecd
 
176ecd
 void netlink_init(void)
176ecd
 {
176ecd
@@ -135,7 +143,9 @@ static ssize_t netlink_recv(void)
176ecd
   
176ecd
 
176ecd
 /* family = AF_UNSPEC finds ARP table entries.
176ecd
-   family = AF_LOCAL finds MAC addresses. */
176ecd
+   family = AF_LOCAL finds MAC addresses.
176ecd
+   returns 0 on failure, 1 on success, -1 when restart is required
176ecd
+*/
176ecd
 int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
 {
176ecd
   struct sockaddr_nl addr;
176ecd
@@ -143,6 +153,7 @@ int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
   ssize_t len;
176ecd
   static unsigned int seq = 0;
176ecd
   int callback_ok = 1;
176ecd
+  unsigned state = 0;
176ecd
 
176ecd
   struct {
176ecd
     struct nlmsghdr nlh;
176ecd
@@ -154,7 +165,6 @@ int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
   addr.nl_groups = 0;
176ecd
   addr.nl_pid = 0; /* address to kernel */
176ecd
  
176ecd
- again: 
176ecd
   if (family == AF_UNSPEC)
176ecd
     req.nlh.nlmsg_type = RTM_GETNEIGH;
176ecd
   else if (family == AF_LOCAL)
176ecd
@@ -181,8 +191,8 @@ int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
 	{
176ecd
 	  if (errno == ENOBUFS)
176ecd
 	    {
176ecd
-	      sleep(1);
176ecd
-	      goto again;
176ecd
+	      nl_multicast_state(state);
176ecd
+	      return -1;
176ecd
 	    }
176ecd
 	  return 0;
176ecd
 	}
176ecd
@@ -191,7 +201,7 @@ int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
 	if (h->nlmsg_pid != netlink_pid || h->nlmsg_type == NLMSG_ERROR)
176ecd
 	  {
176ecd
 	    /* May be multicast arriving async */
176ecd
-	    nl_async(h);
176ecd
+	    state = nl_async(h, state);
176ecd
 	  }
176ecd
 	else if (h->nlmsg_seq != seq)
176ecd
 	  {
176ecd
@@ -327,26 +337,36 @@ int iface_enumerate(int family, void *parm, int (*callback)())
176ecd
     }
176ecd
 }
176ecd
 
176ecd
-void netlink_multicast(void)
176ecd
+static void nl_multicast_state(unsigned state)
176ecd
 {
176ecd
   ssize_t len;
176ecd
   struct nlmsghdr *h;
176ecd
   int flags;
176ecd
-  
176ecd
-  /* don't risk blocking reading netlink messages here. */
176ecd
+
176ecd
   if ((flags = fcntl(daemon->netlinkfd, F_GETFL)) == -1 ||
176ecd
       fcntl(daemon->netlinkfd, F_SETFL, flags | O_NONBLOCK) == -1) 
176ecd
     return;
176ecd
+
176ecd
+  do {
176ecd
+    /* don't risk blocking reading netlink messages here. */
176ecd
+    while ((len = netlink_recv()) != -1)
176ecd
   
176ecd
-  if ((len = netlink_recv()) != -1)
176ecd
-    for (h = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(h, (size_t)len); h = NLMSG_NEXT(h, len))
176ecd
-      nl_async(h);
176ecd
-  
176ecd
+      for (h = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(h, (size_t)len); h = NLMSG_NEXT(h, len))
176ecd
+	state = nl_async(h, state);
176ecd
+  } while (errno == ENOBUFS);
176ecd
+
176ecd
   /* restore non-blocking status */
176ecd
   fcntl(daemon->netlinkfd, F_SETFL, flags);
176ecd
 }
176ecd
 
176ecd
-static void nl_async(struct nlmsghdr *h)
176ecd
+void netlink_multicast(void)
176ecd
+{
176ecd
+  unsigned state = 0;
176ecd
+  nl_multicast_state(state);
176ecd
+}
176ecd
+
176ecd
+
176ecd
+static unsigned nl_async(struct nlmsghdr *h, unsigned state)
176ecd
 {
176ecd
   if (h->nlmsg_type == NLMSG_ERROR)
176ecd
     {
176ecd
@@ -354,7 +374,8 @@ static void nl_async(struct nlmsghdr *h)
176ecd
       if (err->error != 0)
176ecd
 	my_syslog(LOG_ERR, _("netlink returns error: %s"), strerror(-(err->error)));
176ecd
     }
176ecd
-  else if (h->nlmsg_pid == 0 && h->nlmsg_type == RTM_NEWROUTE) 
176ecd
+  else if (h->nlmsg_pid == 0 && h->nlmsg_type == RTM_NEWROUTE &&
176ecd
+	   (state & STATE_NEWROUTE)==0)
176ecd
     {
176ecd
       /* We arrange to receive netlink multicast messages whenever the network route is added.
176ecd
 	 If this happens and we still have a DNS packet in the buffer, we re-send it.
176ecd
@@ -366,10 +387,18 @@ static void nl_async(struct nlmsghdr *h)
176ecd
       if (rtm->rtm_type == RTN_UNICAST && rtm->rtm_scope == RT_SCOPE_LINK &&
176ecd
 	  (rtm->rtm_table == RT_TABLE_MAIN ||
176ecd
 	   rtm->rtm_table == RT_TABLE_LOCAL))
176ecd
-	queue_event(EVENT_NEWROUTE);
176ecd
+	{
176ecd
+	  queue_event(EVENT_NEWROUTE);
176ecd
+	  state |= STATE_NEWROUTE;
176ecd
+	}
176ecd
+    }
176ecd
+  else if ((h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) &&
176ecd
+	   (state & STATE_NEWADDR)==0)
176ecd
+    {
176ecd
+      queue_event(EVENT_NEWADDR);
176ecd
+      state |= STATE_NEWADDR;
176ecd
     }
176ecd
-  else if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) 
176ecd
-    queue_event(EVENT_NEWADDR);
176ecd
+  return state;
176ecd
 }
176ecd
 #endif
176ecd
 
176ecd
diff --git a/src/network.c b/src/network.c
176ecd
index c6e7d89..47caf38 100644
176ecd
--- a/src/network.c
176ecd
+++ b/src/network.c
176ecd
@@ -656,7 +656,8 @@ int enumerate_interfaces(int reset)
176ecd
 
176ecd
   if ((param.fd = socket(PF_INET, SOCK_DGRAM, 0)) == -1)
176ecd
     return 0;
176ecd
- 
176ecd
+
176ecd
+again:
176ecd
   /* Mark interfaces for garbage collection */
176ecd
   for (iface = daemon->interfaces; iface; iface = iface->next) 
176ecd
     iface->found = 0;
176ecd
@@ -709,10 +710,16 @@ int enumerate_interfaces(int reset)
176ecd
   
176ecd
 #ifdef HAVE_IPV6
176ecd
   ret = iface_enumerate(AF_INET6, &param, iface_allowed_v6);
176ecd
+  if (ret < 0)
176ecd
+    goto again;
176ecd
 #endif
176ecd
 
176ecd
   if (ret)
176ecd
-    ret = iface_enumerate(AF_INET, &param, iface_allowed_v4); 
176ecd
+    {
176ecd
+      ret = iface_enumerate(AF_INET, &param, iface_allowed_v4);
176ecd
+      if (ret < 0)
176ecd
+	goto again;
176ecd
+    }
176ecd
  
176ecd
   errsave = errno;
176ecd
   close(param.fd);
176ecd
-- 
176ecd
2.26.2
176ecd