6c0556
commit f4f8f4d4e0f92488431b268c8cd9555730b9afe9
6c0556
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
6c0556
Date:   Wed Dec 30 19:19:37 2020 +0000
6c0556
6c0556
    elf: Use relaxed atomics for racy accesses [BZ #19329]
6c0556
    
6c0556
    This is a follow up patch to the fix for bug 19329.  This adds relaxed
6c0556
    MO atomics to accesses that were previously data races but are now
6c0556
    race conditions, and where relaxed MO is sufficient.
6c0556
    
6c0556
    The race conditions all follow the pattern that the write is behind the
6c0556
    dlopen lock, but a read can happen concurrently (e.g. during tls access)
6c0556
    without holding the lock.  For slotinfo entries the read value only
6c0556
    matters if it reads from a synchronized write in dlopen or dlclose,
6c0556
    otherwise the related dtv entry is not valid to access so it is fine
6c0556
    to leave it in an inconsistent state.  The same applies for
6c0556
    GL(dl_tls_max_dtv_idx) and GL(dl_tls_generation), but there the
6c0556
    algorithm relies on the fact that the read of the last synchronized
6c0556
    write is an increasing value.
6c0556
    
6c0556
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
6c0556
6c0556
diff --git a/elf/dl-close.c b/elf/dl-close.c
6c0556
index 1ece0ae1dd062d1e..7d2dc2272cd643f5 100644
6c0556
--- a/elf/dl-close.c
6c0556
+++ b/elf/dl-close.c
6c0556
@@ -79,9 +79,10 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
6c0556
 	{
6c0556
 	  assert (old_map->l_tls_modid == idx);
6c0556
 
6c0556
-	  /* Mark the entry as unused. */
6c0556
-	  listp->slotinfo[idx - disp].gen = GL(dl_tls_generation) + 1;
6c0556
-	  listp->slotinfo[idx - disp].map = NULL;
6c0556
+	  /* Mark the entry as unused.  These can be read concurrently.  */
6c0556
+	  atomic_store_relaxed (&listp->slotinfo[idx - disp].gen,
6c0556
+				GL(dl_tls_generation) + 1);
6c0556
+	  atomic_store_relaxed (&listp->slotinfo[idx - disp].map, NULL);
6c0556
 	}
6c0556
 
6c0556
       /* If this is not the last currently used entry no need to look
6c0556
@@ -96,8 +97,8 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
6c0556
 
6c0556
       if (listp->slotinfo[idx - disp].map != NULL)
6c0556
 	{
6c0556
-	  /* Found a new last used index.  */
6c0556
-	  GL(dl_tls_max_dtv_idx) = idx;
6c0556
+	  /* Found a new last used index.  This can be read concurrently.  */
6c0556
+	  atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), idx);
6c0556
 	  return true;
6c0556
 	}
6c0556
     }
6c0556
@@ -571,7 +572,9 @@ _dl_close_worker (struct link_map *map, bool force)
6c0556
 					GL(dl_tls_dtv_slotinfo_list), 0,
6c0556
 					imap->l_init_called))
6c0556
 		/* All dynamically loaded modules with TLS are unloaded.  */
6c0556
-		GL(dl_tls_max_dtv_idx) = GL(dl_tls_static_nelem);
6c0556
+		/* Can be read concurrently.  */
6c0556
+		atomic_store_relaxed (&GL(dl_tls_max_dtv_idx),
6c0556
+				      GL(dl_tls_static_nelem));
6c0556
 
6c0556
 	      if (imap->l_tls_offset != NO_TLS_OFFSET
6c0556
 		  && imap->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
6c0556
@@ -769,8 +772,11 @@ _dl_close_worker (struct link_map *map, bool force)
6c0556
   /* If we removed any object which uses TLS bump the generation counter.  */
6c0556
   if (any_tls)
6c0556
     {
6c0556
-      if (__glibc_unlikely (++GL(dl_tls_generation) == 0))
6c0556
+      size_t newgen = GL(dl_tls_generation) + 1;
6c0556
+      if (__glibc_unlikely (newgen == 0))
6c0556
 	_dl_fatal_printf ("TLS generation counter wrapped!  Please report as described in "REPORT_BUGS_TO".\n");
6c0556
+      /* Can be read concurrently.  */
6c0556
+      atomic_store_relaxed (&GL(dl_tls_generation), newgen);
6c0556
 
6c0556
       if (tls_free_end == GL(dl_tls_static_used))
6c0556
 	GL(dl_tls_static_used) = tls_free_start;
6c0556
diff --git a/elf/dl-open.c b/elf/dl-open.c
6c0556
index b052bb0bc2cd17aa..a67fb3aee40860e1 100644
6c0556
--- a/elf/dl-open.c
6c0556
+++ b/elf/dl-open.c
6c0556
@@ -395,9 +395,12 @@ update_tls_slotinfo (struct link_map *new)
6c0556
 	}
6c0556
     }
6c0556
 
6c0556
-  if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
6c0556
+  size_t newgen = GL(dl_tls_generation) + 1;
6c0556
+  if (__glibc_unlikely (newgen == 0))
6c0556
     _dl_fatal_printf (N_("\
6c0556
 TLS generation counter wrapped!  Please report this."));
6c0556
+  /* Can be read concurrently.  */
6c0556
+  atomic_store_relaxed (&GL(dl_tls_generation), newgen);
6c0556
 
6c0556
   /* We need a second pass for static tls data, because
6c0556
      _dl_update_slotinfo must not be run while calls to
6c0556
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
6c0556
index da83cd6ae2ee6504..801eafad3961573c 100644
6c0556
--- a/elf/dl-tls.c
6c0556
+++ b/elf/dl-tls.c
6c0556
@@ -175,7 +175,9 @@ _dl_next_tls_modid (void)
6c0556
       /* No gaps, allocate a new entry.  */
6c0556
     nogaps:
6c0556
 
6c0556
-      result = ++GL(dl_tls_max_dtv_idx);
6c0556
+      result = GL(dl_tls_max_dtv_idx) + 1;
6c0556
+      /* Can be read concurrently.  */
6c0556
+      atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
6c0556
     }
6c0556
 
6c0556
   return result;
6c0556
@@ -359,10 +361,12 @@ allocate_dtv (void *result)
6c0556
   dtv_t *dtv;
6c0556
   size_t dtv_length;
6c0556
 
6c0556
+  /* Relaxed MO, because the dtv size is later rechecked, not relied on.  */
6c0556
+  size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
6c0556
   /* We allocate a few more elements in the dtv than are needed for the
6c0556
      initial set of modules.  This should avoid in most cases expansions
6c0556
      of the dtv.  */
6c0556
-  dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
6c0556
+  dtv_length = max_modid + DTV_SURPLUS;
6c0556
   dtv = calloc (dtv_length + 2, sizeof (dtv_t));
6c0556
   if (dtv != NULL)
6c0556
     {
6c0556
@@ -767,7 +771,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 	      if (modid > max_modid)
6c0556
 		break;
6c0556
 
6c0556
-	      size_t gen = listp->slotinfo[cnt].gen;
6c0556
+	      size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
6c0556
 
6c0556
 	      if (gen > new_gen)
6c0556
 		/* Not relevant.  */
6c0556
@@ -779,7 +783,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 		continue;
6c0556
 
6c0556
 	      /* If there is no map this means the entry is empty.  */
6c0556
-	      struct link_map *map = listp->slotinfo[cnt].map;
6c0556
+	      struct link_map *map
6c0556
+		= atomic_load_relaxed (&listp->slotinfo[cnt].map);
6c0556
 	      /* Check whether the current dtv array is large enough.  */
6c0556
 	      if (dtv[-1].counter < modid)
6c0556
 		{
6c0556
@@ -923,7 +928,12 @@ __tls_get_addr (GET_ADDR_ARGS)
6c0556
 {
6c0556
   dtv_t *dtv = THREAD_DTV ();
6c0556
 
6c0556
-  if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
6c0556
+  /* Update is needed if dtv[0].counter < the generation of the accessed
6c0556
+     module.  The global generation counter is used here as it is easier
6c0556
+     to check.  Synchronization for the relaxed MO access is guaranteed
6c0556
+     by user code, see CONCURRENCY NOTES in _dl_update_slotinfo.  */
6c0556
+  size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
6c0556
+  if (__glibc_unlikely (dtv[0].counter != gen))
6c0556
     return update_get_addr (GET_ADDR_PARAM);
6c0556
 
6c0556
   void *p = dtv[GET_ADDR_MODULE].pointer.val;
6c0556
@@ -946,7 +956,10 @@ _dl_tls_get_addr_soft (struct link_map *l)
6c0556
     return NULL;
6c0556
 
6c0556
   dtv_t *dtv = THREAD_DTV ();
6c0556
-  if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
6c0556
+  /* This may be called without holding the GL(dl_load_lock).  Reading
6c0556
+     arbitrary gen value is fine since this is best effort code.  */
6c0556
+  size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
6c0556
+  if (__glibc_unlikely (dtv[0].counter != gen))
6c0556
     {
6c0556
       /* This thread's DTV is not completely current,
6c0556
 	 but it might already cover this module.  */
6c0556
@@ -1032,7 +1045,9 @@ cannot create TLS data structures"));
6c0556
   /* Add the information into the slotinfo data structure.  */
6c0556
   if (do_add)
6c0556
     {
6c0556
-      listp->slotinfo[idx].map = l;
6c0556
-      listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
6c0556
+      /* Can be read concurrently.  See _dl_update_slotinfo.  */
6c0556
+      atomic_store_relaxed (&listp->slotinfo[idx].map, l);
6c0556
+      atomic_store_relaxed (&listp->slotinfo[idx].gen,
6c0556
+			    GL(dl_tls_generation) + 1);
6c0556
     }
6c0556
 }
6c0556
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
6c0556
index 533ee2b3a6e85ad8..bc543dcc264ea361 100644
6c0556
--- a/sysdeps/x86_64/dl-tls.c
6c0556
+++ b/sysdeps/x86_64/dl-tls.c
6c0556
@@ -40,7 +40,8 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
6c0556
 {
6c0556
   dtv_t *dtv = THREAD_DTV ();
6c0556
 
6c0556
-  if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
6c0556
+  size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
6c0556
+  if (__glibc_unlikely (dtv[0].counter != gen))
6c0556
     return update_get_addr (GET_ADDR_PARAM);
6c0556
 
6c0556
   return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);