|
|
0e3697 |
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
|
|
|
0e3697 |
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
|
|
|
0e3697 |
Date: Mon Nov 26 15:15:57 2018 +0000
|
|
|
0e3697 |
|
|
|
0e3697 |
S/390: Fix flogr RTX.
|
|
|
0e3697 |
|
|
|
0e3697 |
The flogr instruction uses a 64 bit register pair target operand. In
|
|
|
0e3697 |
the RTX we model this as a write to a TImode register. Unfortunately
|
|
|
0e3697 |
the RTX's being assigned to the two parts of the target operand were
|
|
|
0e3697 |
swapped. This is no problem if in the end the flogr instruction will
|
|
|
0e3697 |
be emitted since the instruction still does what the clzdi expander
|
|
|
0e3697 |
expects. However, a problem arises when the RTX is used to optimize
|
|
|
0e3697 |
CLZ for a constant input operand. Even then it matters only if the
|
|
|
0e3697 |
expression couldn't be folded on tree level already.
|
|
|
0e3697 |
|
|
|
0e3697 |
In the testcase this happened thanks to loop unrolling on RTL level.
|
|
|
0e3697 |
The iteration variable is used as an argument to the clz
|
|
|
0e3697 |
builtin. Due to the loop unrolling it becomes a constant and after
|
|
|
0e3697 |
folding the broken RTX leads to a wrong assumption.
|
|
|
0e3697 |
|
|
|
0e3697 |
gcc/ChangeLog:
|
|
|
0e3697 |
|
|
|
0e3697 |
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
Backport from mainline
|
|
|
0e3697 |
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
* config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
|
|
|
0e3697 |
DImode parts of the target operand.
|
|
|
0e3697 |
|
|
|
0e3697 |
gcc/testsuite/ChangeLog:
|
|
|
0e3697 |
|
|
|
0e3697 |
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
Backport from mainline
|
|
|
0e3697 |
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
* gcc.target/s390/flogr-1.c: New test.
|
|
|
0e3697 |
|
|
|
0e3697 |
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
|
|
|
0e3697 |
|
|
|
0e3697 |
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
|
|
|
0e3697 |
index c4d391bc9b5..53bb1985285 100644
|
|
|
0e3697 |
--- a/gcc/config/s390/s390.md
|
|
|
0e3697 |
+++ b/gcc/config/s390/s390.md
|
|
|
0e3697 |
@@ -8861,17 +8861,17 @@
|
|
|
0e3697 |
DONE;
|
|
|
0e3697 |
})
|
|
|
0e3697 |
|
|
|
0e3697 |
+; CLZ result is in hard reg op0 - this is the high part of the target operand
|
|
|
0e3697 |
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
|
|
|
0e3697 |
(define_insn "clztidi2"
|
|
|
0e3697 |
[(set (match_operand:TI 0 "register_operand" "=d")
|
|
|
0e3697 |
(ior:TI
|
|
|
0e3697 |
- (ashift:TI
|
|
|
0e3697 |
- (zero_extend:TI
|
|
|
0e3697 |
- (xor:DI (match_operand:DI 1 "register_operand" "d")
|
|
|
0e3697 |
- (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
|
|
0e3697 |
- (subreg:SI (clz:DI (match_dup 1)) 4))))
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- (const_int 64))
|
|
|
0e3697 |
- (zero_extend:TI (clz:DI (match_dup 1)))))
|
|
|
0e3697 |
+ (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
|
|
|
0e3697 |
+ (const_int 64))
|
|
|
0e3697 |
+ (zero_extend:TI
|
|
|
0e3697 |
+ (xor:DI (match_dup 1)
|
|
|
0e3697 |
+ (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
|
|
0e3697 |
+ (subreg:SI (clz:DI (match_dup 1)) 4))))))
|
|
|
0e3697 |
(clobber (reg:CC CC_REGNUM))]
|
|
|
0e3697 |
"UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
|
|
|
0e3697 |
&& TARGET_EXTIMM && TARGET_ZARCH"
|
|
|
0e3697 |
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
|
|
0e3697 |
new file mode 100644
|
|
|
0e3697 |
index 00000000000..a3869000d62
|
|
|
0e3697 |
--- /dev/null
|
|
|
0e3697 |
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
|
|
0e3697 |
@@ -0,0 +1,47 @@
|
|
|
0e3697 |
+/* { dg-do run } */
|
|
|
0e3697 |
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
|
|
|
0e3697 |
+/* { dg-require-effective-target stdint_types } */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+/* Folding of the FLOGR caused a wrong value to be returned by
|
|
|
0e3697 |
+ __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
|
|
|
0e3697 |
+ The problematic folding can only be triggered with constants inputs
|
|
|
0e3697 |
+ introduced on RTL level. In this case it happens with loop
|
|
|
0e3697 |
+ unrolling. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+#include <stdint.h>
|
|
|
0e3697 |
+#include <assert.h>
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
|
|
|
0e3697 |
+ if (x <= 1) {
|
|
|
0e3697 |
+ return x;
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ int msb_on_index;
|
|
|
0e3697 |
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
|
|
|
0e3697 |
+ assert(msb_on_index < 31);
|
|
|
0e3697 |
+ return 1U << (msb_on_index + 1);
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+void __attribute__((noinline,noclone))
|
|
|
0e3697 |
+die (int a)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ if (a)
|
|
|
0e3697 |
+ __builtin_abort ();
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+void test_pow2_ceil_u32(void) {
|
|
|
0e3697 |
+ unsigned i;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ for (i = 0; i < 18; i++) {
|
|
|
0e3697 |
+ uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
|
|
|
0e3697 |
+ if (!(a_ == (((uint32_t)1) << i))) {
|
|
|
0e3697 |
+ die(1);
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+int
|
|
|
0e3697 |
+main(void) {
|
|
|
0e3697 |
+ test_pow2_ceil_u32();
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ return 0;
|
|
|
0e3697 |
+}
|