6c0556
commit 1387ad6225c2222f027790e3f460e31aa5dd2c54
6c0556
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
6c0556
Date:   Wed Dec 30 19:19:37 2020 +0000
6c0556
6c0556
    elf: Fix data races in pthread_create and TLS access [BZ #19329]
6c0556
    
6c0556
    DTV setup at thread creation (_dl_allocate_tls_init) is changed
6c0556
    to take the dlopen lock, GL(dl_load_lock).  Avoiding data races
6c0556
    here without locks would require design changes: the map that is
6c0556
    accessed for static TLS initialization here may be concurrently
6c0556
    freed by dlclose.  That use after free may be solved by only
6c0556
    locking around static TLS setup or by ensuring dlclose does not
6c0556
    free modules with static TLS, however currently every link map
6c0556
    with TLS has to be accessed at least to see if it needs static
6c0556
    TLS.  And even if that's solved, still a lot of atomics would be
6c0556
    needed to synchronize DTV related globals without a lock. So fix
6c0556
    both bug 19329 and bug 27111 with a lock that prevents DTV setup
6c0556
    running concurrently with dlopen or dlclose.
6c0556
    
6c0556
    _dl_update_slotinfo at TLS access still does not use any locks
6c0556
    so CONCURRENCY NOTES are added to explain the synchronization.
6c0556
    The early exit from the slotinfo walk when max_modid is reached
6c0556
    is not strictly necessary, but does not hurt either.
6c0556
    
6c0556
    An incorrect acquire load was removed from _dl_resize_dtv: it
6c0556
    did not synchronize with any release store or fence and
6c0556
    synchronization is now handled separately at thread creation
6c0556
    and TLS access time.
6c0556
    
6c0556
    There are still a number of racy read accesses to globals that
6c0556
    will be changed to relaxed MO atomics in a followup patch. This
6c0556
    should not introduce regressions compared to existing behaviour
6c0556
    and avoid cluttering the main part of the fix.
6c0556
    
6c0556
    Not all TLS access related data races got fixed here: there are
6c0556
    additional races at lazy tlsdesc relocations see bug 27137.
6c0556
    
6c0556
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
6c0556
6c0556
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
6c0556
index 15ed01d795a8627a..da83cd6ae2ee6504 100644
6c0556
--- a/elf/dl-tls.c
6c0556
+++ b/elf/dl-tls.c
6c0556
@@ -471,14 +471,11 @@ extern dtv_t _dl_static_dtv[];
6c0556
 #endif
6c0556
 
6c0556
 static dtv_t *
6c0556
-_dl_resize_dtv (dtv_t *dtv)
6c0556
+_dl_resize_dtv (dtv_t *dtv, size_t max_modid)
6c0556
 {
6c0556
   /* Resize the dtv.  */
6c0556
   dtv_t *newp;
6c0556
-  /* Load GL(dl_tls_max_dtv_idx) atomically since it may be written to by
6c0556
-     other threads concurrently.  */
6c0556
-  size_t newsize
6c0556
-    = atomic_load_acquire (&GL(dl_tls_max_dtv_idx)) + DTV_SURPLUS;
6c0556
+  size_t newsize = max_modid + DTV_SURPLUS;
6c0556
   size_t oldsize = dtv[-1].counter;
6c0556
 
6c0556
   if (dtv == GL(dl_initial_dtv))
6c0556
@@ -524,11 +521,14 @@ _dl_allocate_tls_init (void *result)
6c0556
   size_t total = 0;
6c0556
   size_t maxgen = 0;
6c0556
 
6c0556
+  /* Protects global dynamic TLS related state.  */
6c0556
+  __rtld_lock_lock_recursive (GL(dl_load_lock));
6c0556
+
6c0556
   /* Check if the current dtv is big enough.   */
6c0556
   if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
6c0556
     {
6c0556
       /* Resize the dtv.  */
6c0556
-      dtv = _dl_resize_dtv (dtv);
6c0556
+      dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
6c0556
 
6c0556
       /* Install this new dtv in the thread data structures.  */
6c0556
       INSTALL_DTV (result, &dtv[-1]);
6c0556
@@ -596,6 +596,7 @@ _dl_allocate_tls_init (void *result)
6c0556
       listp = listp->next;
6c0556
       assert (listp != NULL);
6c0556
     }
6c0556
+  __rtld_lock_unlock_recursive (GL(dl_load_lock));
6c0556
 
6c0556
   /* The DTV version is up-to-date now.  */
6c0556
   dtv[0].counter = maxgen;
6c0556
@@ -730,12 +731,29 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 
6c0556
   if (dtv[0].counter < listp->slotinfo[idx].gen)
6c0556
     {
6c0556
-      /* The generation counter for the slot is higher than what the
6c0556
-	 current dtv implements.  We have to update the whole dtv but
6c0556
-	 only those entries with a generation counter <= the one for
6c0556
-	 the entry we need.  */
6c0556
+      /* CONCURRENCY NOTES:
6c0556
+
6c0556
+	 Here the dtv needs to be updated to new_gen generation count.
6c0556
+
6c0556
+	 This code may be called during TLS access when GL(dl_load_lock)
6c0556
+	 is not held.  In that case the user code has to synchronize with
6c0556
+	 dlopen and dlclose calls of relevant modules.  A module m is
6c0556
+	 relevant if the generation of m <= new_gen and dlclose of m is
6c0556
+	 synchronized: a memory access here happens after the dlopen and
6c0556
+	 before the dlclose of relevant modules.  The dtv entries for
6c0556
+	 relevant modules need to be updated, other entries can be
6c0556
+	 arbitrary.
6c0556
+
6c0556
+	 This e.g. means that the first part of the slotinfo list can be
6c0556
+	 accessed race free, but the tail may be concurrently extended.
6c0556
+	 Similarly relevant slotinfo entries can be read race free, but
6c0556
+	 other entries are racy.  However updating a non-relevant dtv
6c0556
+	 entry does not affect correctness.  For a relevant module m,
6c0556
+	 max_modid >= modid of m.  */
6c0556
       size_t new_gen = listp->slotinfo[idx].gen;
6c0556
       size_t total = 0;
6c0556
+      size_t max_modid  = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
6c0556
+      assert (max_modid >= req_modid);
6c0556
 
6c0556
       /* We have to look through the entire dtv slotinfo list.  */
6c0556
       listp =  GL(dl_tls_dtv_slotinfo_list);
6c0556
@@ -745,12 +763,14 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 	    {
6c0556
 	      size_t modid = total + cnt;
6c0556
 
6c0556
+	      /* Later entries are not relevant.  */
6c0556
+	      if (modid > max_modid)
6c0556
+		break;
6c0556
+
6c0556
 	      size_t gen = listp->slotinfo[cnt].gen;
6c0556
 
6c0556
 	      if (gen > new_gen)
6c0556
-		/* This is a slot for a generation younger than the
6c0556
-		   one we are handling now.  It might be incompletely
6c0556
-		   set up so ignore it.  */
6c0556
+		/* Not relevant.  */
6c0556
 		continue;
6c0556
 
6c0556
 	      /* If the entry is older than the current dtv layout we
6c0556
@@ -767,7 +787,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 		    continue;
6c0556
 
6c0556
 		  /* Resize the dtv.  */
6c0556
-		  dtv = _dl_resize_dtv (dtv);
6c0556
+		  dtv = _dl_resize_dtv (dtv, max_modid);
6c0556
 
6c0556
 		  assert (modid <= dtv[-1].counter);
6c0556
 
6c0556
@@ -789,8 +809,17 @@ _dl_update_slotinfo (unsigned long int req_modid)
6c0556
 	    }
6c0556
 
6c0556
 	  total += listp->len;
6c0556
+	  if (total > max_modid)
6c0556
+	    break;
6c0556
+
6c0556
+	  /* Synchronize with _dl_add_to_slotinfo.  Ideally this would
6c0556
+	     be consume MO since we only need to order the accesses to
6c0556
+	     the next node after the read of the address and on most
6c0556
+	     hardware (other than alpha) a normal load would do that
6c0556
+	     because of the address dependency.  */
6c0556
+	  listp = atomic_load_acquire (&listp->next);
6c0556
 	}
6c0556
-      while ((listp = listp->next) != NULL);
6c0556
+      while (listp != NULL);
6c0556
 
6c0556
       /* This will be the new maximum generation counter.  */
6c0556
       dtv[0].counter = new_gen;
6c0556
@@ -982,7 +1011,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add)
6c0556
 	 the first slot.  */
6c0556
       assert (idx == 0);
6c0556
 
6c0556
-      listp = prevp->next = (struct dtv_slotinfo_list *)
6c0556
+      listp = (struct dtv_slotinfo_list *)
6c0556
 	malloc (sizeof (struct dtv_slotinfo_list)
6c0556
 		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
6c0556
       if (listp == NULL)
6c0556
@@ -996,6 +1025,8 @@ cannot create TLS data structures"));
6c0556
       listp->next = NULL;
6c0556
       memset (listp->slotinfo, '\0',
6c0556
 	      TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
6c0556
+      /* Synchronize with _dl_update_slotinfo.  */
6c0556
+      atomic_store_release (&prevp->next, listp);
6c0556
     }
6c0556
 
6c0556
   /* Add the information into the slotinfo data structure.  */