Blame SOURCES/gcc8-rh1652016.patch

0e3697
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
0e3697
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
0e3697
Date:   Mon Nov 26 15:15:57 2018 +0000
0e3697
0e3697
    S/390: Fix flogr RTX.
0e3697
    
0e3697
    The flogr instruction uses a 64 bit register pair target operand.  In
0e3697
    the RTX we model this as a write to a TImode register.  Unfortunately
0e3697
    the RTX's being assigned to the two parts of the target operand were
0e3697
    swapped.  This is no problem if in the end the flogr instruction will
0e3697
    be emitted since the instruction still does what the clzdi expander
0e3697
    expects.  However, a problem arises when the RTX is used to optimize
0e3697
    CLZ for a constant input operand.  Even then it matters only if the
0e3697
    expression couldn't be folded on tree level already.
0e3697
    
0e3697
    In the testcase this happened thanks to loop unrolling on RTL level.
0e3697
    The iteration variable is used as an argument to the clz
0e3697
    builtin. Due to the loop unrolling it becomes a constant and after
0e3697
    folding the broken RTX leads to a wrong assumption.
0e3697
    
0e3697
    gcc/ChangeLog:
0e3697
    
0e3697
    2018-11-26  Andreas Krebbel  <krebbel@linux.ibm.com>
0e3697
    
0e3697
            Backport from mainline
0e3697
            2018-11-20  Andreas Krebbel  <krebbel@linux.ibm.com>
0e3697
    
0e3697
            * config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
0e3697
            DImode parts of the target operand.
0e3697
    
0e3697
    gcc/testsuite/ChangeLog:
0e3697
    
0e3697
    2018-11-26  Andreas Krebbel  <krebbel@linux.ibm.com>
0e3697
    
0e3697
            Backport from mainline
0e3697
            2018-11-20  Andreas Krebbel  <krebbel@linux.ibm.com>
0e3697
    
0e3697
            * gcc.target/s390/flogr-1.c: New test.
0e3697
    
0e3697
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
0e3697
0e3697
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
0e3697
index c4d391bc9b5..53bb1985285 100644
0e3697
--- a/gcc/config/s390/s390.md
0e3697
+++ b/gcc/config/s390/s390.md
0e3697
@@ -8861,17 +8861,17 @@
0e3697
   DONE;
0e3697
 })
0e3697
 
0e3697
+; CLZ result is in hard reg op0 - this is the high part of the target operand
0e3697
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
0e3697
 (define_insn "clztidi2"
0e3697
   [(set (match_operand:TI 0 "register_operand" "=d")
0e3697
 	(ior:TI
0e3697
-	  (ashift:TI
0e3697
-            (zero_extend:TI
0e3697
-   	      (xor:DI (match_operand:DI 1 "register_operand" "d")
0e3697
-                      (lshiftrt (match_operand:DI 2 "const_int_operand" "")
0e3697
-				(subreg:SI (clz:DI (match_dup 1)) 4))))
0e3697
-
0e3697
-	    (const_int 64))
0e3697
-          (zero_extend:TI (clz:DI (match_dup 1)))))
0e3697
+	  (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
0e3697
+		     (const_int 64))
0e3697
+	  (zero_extend:TI
0e3697
+	   (xor:DI (match_dup 1)
0e3697
+		   (lshiftrt (match_operand:DI 2 "const_int_operand" "")
0e3697
+			     (subreg:SI (clz:DI (match_dup 1)) 4))))))
0e3697
    (clobber (reg:CC CC_REGNUM))]
0e3697
   "UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
0e3697
    && TARGET_EXTIMM && TARGET_ZARCH"
0e3697
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
0e3697
new file mode 100644
0e3697
index 00000000000..a3869000d62
0e3697
--- /dev/null
0e3697
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
0e3697
@@ -0,0 +1,47 @@
0e3697
+/* { dg-do run } */
0e3697
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
0e3697
+/* { dg-require-effective-target stdint_types } */
0e3697
+
0e3697
+/* Folding of the FLOGR caused a wrong value to be returned by
0e3697
+   __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
0e3697
+   The problematic folding can only be triggered with constants inputs
0e3697
+   introduced on RTL level.  In this case it happens with loop
0e3697
+   unrolling.  */
0e3697
+
0e3697
+#include <stdint.h>
0e3697
+#include <assert.h>
0e3697
+
0e3697
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
0e3697
+  if (x <= 1) {
0e3697
+    return x;
0e3697
+  }
0e3697
+  int msb_on_index;
0e3697
+  msb_on_index = (31 ^ __builtin_clz(x - 1));
0e3697
+  assert(msb_on_index < 31);
0e3697
+  return 1U << (msb_on_index + 1);
0e3697
+}
0e3697
+
0e3697
+void __attribute__((noinline,noclone))
0e3697
+die (int a)
0e3697
+{
0e3697
+  if (a)
0e3697
+    __builtin_abort ();
0e3697
+}
0e3697
+
0e3697
+void test_pow2_ceil_u32(void) {
0e3697
+  unsigned i;
0e3697
+
0e3697
+  for (i = 0; i < 18; i++) {
0e3697
+      uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
0e3697
+      if (!(a_ == (((uint32_t)1) << i))) {
0e3697
+	die(1);
0e3697
+      }
0e3697
+  }
0e3697
+}
0e3697
+
0e3697
+int
0e3697
+main(void) {
0e3697
+  test_pow2_ceil_u32();
0e3697
+
0e3697
+  return 0;
0e3697
+}