Blame SOURCES/gcc11-tremont3.patch

e60d6e
From c4c7244349999f91ef2a7cd2108eee0372490be9 Mon Sep 17 00:00:00 2001
e60d6e
From: "H.J. Lu" <hjl.tools@gmail.com>
e60d6e
Date: Wed, 15 Sep 2021 14:18:21 +0800
e60d6e
Subject: [PATCH 3/3] x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY
e60d6e
e60d6e
1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
e60d6e
TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP splitters.
e60d6e
2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
e60d6e
TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP splitters.
e60d6e
e60d6e
gcc/
e60d6e
e60d6e
	* config/i386/i386.h (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY):
e60d6e
	New.
e60d6e
	(TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
e60d6e
	* config/i386/i386.md (SSE FP to FP splitters): Replace
e60d6e
	TARGET_SSE_PARTIAL_REG_DEPENDENCY with
e60d6e
	TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY.
e60d6e
	(SSE INT to FP splitter): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY
e60d6e
	with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY.
e60d6e
	* config/i386/x86-tune.def
e60d6e
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New.
e60d6e
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
e60d6e
e60d6e
gcc/testsuite/
e60d6e
e60d6e
	* gcc.target/i386/sse-covert-1.c: Likewise.
e60d6e
	* gcc.target/i386/sse-fp-covert-1.c: Likewise.
e60d6e
	* gcc.target/i386/sse-int-covert-1.c: Likewise.
e60d6e
---
e60d6e
 gcc/config/i386/i386.h                        |  4 ++++
e60d6e
 gcc/config/i386/i386.md                       |  9 ++++++---
e60d6e
 gcc/config/i386/x86-tune.def                  | 15 +++++++++++++++
e60d6e
 gcc/testsuite/gcc.target/i386/sse-covert-1.c  | 19 +++++++++++++++++++
e60d6e
 .../gcc.target/i386/sse-fp-covert-1.c         | 15 +++++++++++++++
e60d6e
 .../gcc.target/i386/sse-int-covert-1.c        | 14 ++++++++++++++
e60d6e
 6 files changed, 73 insertions(+), 3 deletions(-)
e60d6e
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c
e60d6e
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
e60d6e
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
e60d6e
e60d6e
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
e60d6e
index 73e118900f7..5b992195df7 100644
e60d6e
--- a/gcc/config/i386/i386.h
e60d6e
+++ b/gcc/config/i386/i386.h
e60d6e
@@ -553,6 +553,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
e60d6e
 	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
e60d6e
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
e60d6e
 	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
e60d6e
+#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \
e60d6e
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY]
e60d6e
+#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \
e60d6e
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY]
e60d6e
 #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
e60d6e
 	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
e60d6e
 #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
e60d6e
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
e60d6e
index 97325e38676..053bec1c1e1 100644
e60d6e
--- a/gcc/config/i386/i386.md
e60d6e
+++ b/gcc/config/i386/i386.md
e60d6e
@@ -4378,7 +4378,8 @@
e60d6e
         (float_extend:DF
e60d6e
           (match_operand:SF 1 "nonimmediate_operand")))]
e60d6e
   "!TARGET_AVX
e60d6e
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
e60d6e
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
e60d6e
+   && epilogue_completed
e60d6e
    && optimize_function_for_speed_p (cfun)
e60d6e
    && (!REG_P (operands[1])
e60d6e
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
e60d6e
@@ -4540,7 +4541,8 @@
e60d6e
         (float_truncate:SF
e60d6e
 	  (match_operand:DF 1 "nonimmediate_operand")))]
e60d6e
   "!TARGET_AVX
e60d6e
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
e60d6e
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
e60d6e
+   && epilogue_completed
e60d6e
    && optimize_function_for_speed_p (cfun)
e60d6e
    && (!REG_P (operands[1])
e60d6e
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
e60d6e
@@ -5053,7 +5055,8 @@
e60d6e
   [(set (match_operand:MODEF 0 "sse_reg_operand")
e60d6e
 	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
e60d6e
   "!TARGET_AVX
e60d6e
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
e60d6e
+   && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
e60d6e
+   && epilogue_completed
e60d6e
    && optimize_function_for_speed_p (cfun)
e60d6e
    && (!EXT_REX_SSE_REG_P (operands[0])
e60d6e
        || TARGET_AVX512VL)"
e60d6e
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
e60d6e
index 636e0c788bf..b5166fb1316 100644
e60d6e
--- a/gcc/config/i386/x86-tune.def
e60d6e
+++ b/gcc/config/i386/x86-tune.def
e60d6e
@@ -64,6 +64,21 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
e60d6e
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
 	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
e60d6e
 
e60d6e
+/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
e60d6e
+   partial write to the destination in scalar SSE conversion from FP
e60d6e
+   to FP.  */
e60d6e
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
e60d6e
+	  "sse_partial_reg_fp_converts_dependency",
e60d6e
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
+	  | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
+
e60d6e
+/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
e60d6e
+   write to the destination in scalar SSE conversion from integer to FP.  */
e60d6e
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
e60d6e
+	  "sse_partial_reg_converts_dependency",
e60d6e
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
+	  | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
+
e60d6e
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
e60d6e
    are resolved on SSE register parts instead of whole registers, so we may
e60d6e
    maintain just lower part of scalar values in proper format leaving the
e60d6e
diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..c30af694505
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
e60d6e
@@ -0,0 +1,19 @@
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */
e60d6e
+
e60d6e
+extern float f;
e60d6e
+extern double d;
e60d6e
+extern int i;
e60d6e
+
e60d6e
+void
e60d6e
+foo (void)
e60d6e
+{
e60d6e
+  d = f;
e60d6e
+  f = i;
e60d6e
+}
e60d6e
+
e60d6e
+/* { dg-final { scan-assembler "cvtss2sd" } } */
e60d6e
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
e60d6e
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
e60d6e
+/* { dg-final { scan-assembler-not "cvtdq2ps" } } */
e60d6e
+/* { dg-final { scan-assembler-not "pxor" } } */
e60d6e
diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..b6567e60e3e
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
e60d6e
@@ -0,0 +1,15 @@
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */
e60d6e
+
e60d6e
+extern float f;
e60d6e
+extern double d;
e60d6e
+
e60d6e
+void
e60d6e
+foo (void)
e60d6e
+{
e60d6e
+  d = f;
e60d6e
+}
e60d6e
+
e60d6e
+/* { dg-final { scan-assembler "cvtss2sd" } } */
e60d6e
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
e60d6e
+/* { dg-final { scan-assembler-not "pxor" } } */
e60d6e
diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..107f7241def
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
e60d6e
@@ -0,0 +1,14 @@
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */
e60d6e
+
e60d6e
+extern float f;
e60d6e
+extern int i;
e60d6e
+
e60d6e
+void
e60d6e
+foo (void)
e60d6e
+{
e60d6e
+  f = i;
e60d6e
+}
e60d6e
+
e60d6e
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
e60d6e
+/* { dg-final { scan-assembler-not "pxor" } } */
e60d6e
-- 
e60d6e
2.18.2
e60d6e