Blame SOURCES/gcc11-tremont3.patch

f49307
From c4c7244349999f91ef2a7cd2108eee0372490be9 Mon Sep 17 00:00:00 2001
f49307
From: "H.J. Lu" <hjl.tools@gmail.com>
f49307
Date: Wed, 15 Sep 2021 14:18:21 +0800
f49307
Subject: [PATCH 3/3] x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY
f49307
f49307
1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
f49307
TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP splitters.
f49307
2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
f49307
TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP splitters.
f49307
f49307
gcc/
f49307
f49307
	* config/i386/i386.h (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY):
f49307
	New.
f49307
	(TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
f49307
	* config/i386/i386.md (SSE FP to FP splitters): Replace
f49307
	TARGET_SSE_PARTIAL_REG_DEPENDENCY with
f49307
	TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY.
f49307
	(SSE INT to FP splitter): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY
f49307
	with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY.
f49307
	* config/i386/x86-tune.def
f49307
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New.
f49307
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
f49307
f49307
gcc/testsuite/
f49307
f49307
	* gcc.target/i386/sse-covert-1.c: Likewise.
f49307
	* gcc.target/i386/sse-fp-covert-1.c: Likewise.
f49307
	* gcc.target/i386/sse-int-covert-1.c: Likewise.
f49307
---
f49307
 gcc/config/i386/i386.h                        |  4 ++++
f49307
 gcc/config/i386/i386.md                       |  9 ++++++---
f49307
 gcc/config/i386/x86-tune.def                  | 15 +++++++++++++++
f49307
 gcc/testsuite/gcc.target/i386/sse-covert-1.c  | 19 +++++++++++++++++++
f49307
 .../gcc.target/i386/sse-fp-covert-1.c         | 15 +++++++++++++++
f49307
 .../gcc.target/i386/sse-int-covert-1.c        | 14 ++++++++++++++
f49307
 6 files changed, 73 insertions(+), 3 deletions(-)
f49307
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c
f49307
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
f49307
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
f49307
f49307
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
f49307
index 73e118900f7..5b992195df7 100644
f49307
--- a/gcc/config/i386/i386.h
f49307
+++ b/gcc/config/i386/i386.h
f49307
@@ -553,6 +553,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
f49307
 	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
f49307
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
f49307
 	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
f49307
+#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \
f49307
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY]
f49307
+#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \
f49307
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY]
f49307
 #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
f49307
 	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
f49307
 #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
f49307
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
f49307
index 97325e38676..053bec1c1e1 100644
f49307
--- a/gcc/config/i386/i386.md
f49307
+++ b/gcc/config/i386/i386.md
f49307
@@ -4378,7 +4378,8 @@
f49307
         (float_extend:DF
f49307
           (match_operand:SF 1 "nonimmediate_operand")))]
f49307
   "!TARGET_AVX
f49307
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
f49307
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
f49307
+   && epilogue_completed
f49307
    && optimize_function_for_speed_p (cfun)
f49307
    && (!REG_P (operands[1])
f49307
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
f49307
@@ -4540,7 +4541,8 @@
f49307
         (float_truncate:SF
f49307
 	  (match_operand:DF 1 "nonimmediate_operand")))]
f49307
   "!TARGET_AVX
f49307
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
f49307
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
f49307
+   && epilogue_completed
f49307
    && optimize_function_for_speed_p (cfun)
f49307
    && (!REG_P (operands[1])
f49307
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
f49307
@@ -5053,7 +5055,8 @@
f49307
   [(set (match_operand:MODEF 0 "sse_reg_operand")
f49307
 	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
f49307
   "!TARGET_AVX
f49307
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
f49307
+   && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
f49307
+   && epilogue_completed
f49307
    && optimize_function_for_speed_p (cfun)
f49307
    && (!EXT_REX_SSE_REG_P (operands[0])
f49307
        || TARGET_AVX512VL)"
f49307
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
f49307
index 636e0c788bf..b5166fb1316 100644
f49307
--- a/gcc/config/i386/x86-tune.def
f49307
+++ b/gcc/config/i386/x86-tune.def
f49307
@@ -64,6 +64,21 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
f49307
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
 	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
f49307
 
f49307
+/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
f49307
+   partial write to the destination in scalar SSE conversion from FP
f49307
+   to FP.  */
f49307
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
f49307
+	  "sse_partial_reg_fp_converts_dependency",
f49307
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
+	  | m_BDVER | m_ZNVER | m_GENERIC)
f49307
+
f49307
+/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
f49307
+   write to the destination in scalar SSE conversion from integer to FP.  */
f49307
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
f49307
+	  "sse_partial_reg_converts_dependency",
f49307
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
+	  | m_BDVER | m_ZNVER | m_GENERIC)
f49307
+
f49307
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
f49307
    are resolved on SSE register parts instead of whole registers, so we may
f49307
    maintain just lower part of scalar values in proper format leaving the
f49307
diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
f49307
new file mode 100644
f49307
index 00000000000..c30af694505
f49307
--- /dev/null
f49307
+++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
f49307
@@ -0,0 +1,19 @@
f49307
+/* { dg-do compile } */
f49307
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */
f49307
+
f49307
+extern float f;
f49307
+extern double d;
f49307
+extern int i;
f49307
+
f49307
+void
f49307
+foo (void)
f49307
+{
f49307
+  d = f;
f49307
+  f = i;
f49307
+}
f49307
+
f49307
+/* { dg-final { scan-assembler "cvtss2sd" } } */
f49307
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
f49307
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
f49307
+/* { dg-final { scan-assembler-not "cvtdq2ps" } } */
f49307
+/* { dg-final { scan-assembler-not "pxor" } } */
f49307
diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
f49307
new file mode 100644
f49307
index 00000000000..b6567e60e3e
f49307
--- /dev/null
f49307
+++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
f49307
@@ -0,0 +1,15 @@
f49307
+/* { dg-do compile } */
f49307
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */
f49307
+
f49307
+extern float f;
f49307
+extern double d;
f49307
+
f49307
+void
f49307
+foo (void)
f49307
+{
f49307
+  d = f;
f49307
+}
f49307
+
f49307
+/* { dg-final { scan-assembler "cvtss2sd" } } */
f49307
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
f49307
+/* { dg-final { scan-assembler-not "pxor" } } */
f49307
diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
f49307
new file mode 100644
f49307
index 00000000000..107f7241def
f49307
--- /dev/null
f49307
+++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
f49307
@@ -0,0 +1,14 @@
f49307
+/* { dg-do compile } */
f49307
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */
f49307
+
f49307
+extern float f;
f49307
+extern int i;
f49307
+
f49307
+void
f49307
+foo (void)
f49307
+{
f49307
+  f = i;
f49307
+}
f49307
+
f49307
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
f49307
+/* { dg-final { scan-assembler-not "pxor" } } */
f49307
-- 
f49307
2.18.2
f49307