Blame SOURCES/gcc11-tremont3.patch

44ce1d
From c4c7244349999f91ef2a7cd2108eee0372490be9 Mon Sep 17 00:00:00 2001
44ce1d
From: "H.J. Lu" <hjl.tools@gmail.com>
44ce1d
Date: Wed, 15 Sep 2021 14:18:21 +0800
44ce1d
Subject: [PATCH 3/3] x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY
44ce1d
44ce1d
1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
44ce1d
TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP splitters.
44ce1d
2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with
44ce1d
TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP splitters.
44ce1d
44ce1d
gcc/
44ce1d
44ce1d
	* config/i386/i386.h (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY):
44ce1d
	New.
44ce1d
	(TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
44ce1d
	* config/i386/i386.md (SSE FP to FP splitters): Replace
44ce1d
	TARGET_SSE_PARTIAL_REG_DEPENDENCY with
44ce1d
	TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY.
44ce1d
	(SSE INT to FP splitter): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY
44ce1d
	with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY.
44ce1d
	* config/i386/x86-tune.def
44ce1d
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New.
44ce1d
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
44ce1d
44ce1d
gcc/testsuite/
44ce1d
44ce1d
	* gcc.target/i386/sse-covert-1.c: Likewise.
44ce1d
	* gcc.target/i386/sse-fp-covert-1.c: Likewise.
44ce1d
	* gcc.target/i386/sse-int-covert-1.c: Likewise.
44ce1d
---
44ce1d
 gcc/config/i386/i386.h                        |  4 ++++
44ce1d
 gcc/config/i386/i386.md                       |  9 ++++++---
44ce1d
 gcc/config/i386/x86-tune.def                  | 15 +++++++++++++++
44ce1d
 gcc/testsuite/gcc.target/i386/sse-covert-1.c  | 19 +++++++++++++++++++
44ce1d
 .../gcc.target/i386/sse-fp-covert-1.c         | 15 +++++++++++++++
44ce1d
 .../gcc.target/i386/sse-int-covert-1.c        | 14 ++++++++++++++
44ce1d
 6 files changed, 73 insertions(+), 3 deletions(-)
44ce1d
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c
44ce1d
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
44ce1d
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
44ce1d
44ce1d
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
44ce1d
index 73e118900f7..5b992195df7 100644
44ce1d
--- a/gcc/config/i386/i386.h
44ce1d
+++ b/gcc/config/i386/i386.h
44ce1d
@@ -553,6 +553,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
44ce1d
 	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
44ce1d
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
44ce1d
 	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
44ce1d
+#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \
44ce1d
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY]
44ce1d
+#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \
44ce1d
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY]
44ce1d
 #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
44ce1d
 	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
44ce1d
 #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
44ce1d
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
44ce1d
index 97325e38676..053bec1c1e1 100644
44ce1d
--- a/gcc/config/i386/i386.md
44ce1d
+++ b/gcc/config/i386/i386.md
44ce1d
@@ -4378,7 +4378,8 @@
44ce1d
         (float_extend:DF
44ce1d
           (match_operand:SF 1 "nonimmediate_operand")))]
44ce1d
   "!TARGET_AVX
44ce1d
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
44ce1d
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
44ce1d
+   && epilogue_completed
44ce1d
    && optimize_function_for_speed_p (cfun)
44ce1d
    && (!REG_P (operands[1])
44ce1d
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
44ce1d
@@ -4540,7 +4541,8 @@
44ce1d
         (float_truncate:SF
44ce1d
 	  (match_operand:DF 1 "nonimmediate_operand")))]
44ce1d
   "!TARGET_AVX
44ce1d
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
44ce1d
+   && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
44ce1d
+   && epilogue_completed
44ce1d
    && optimize_function_for_speed_p (cfun)
44ce1d
    && (!REG_P (operands[1])
44ce1d
        || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
44ce1d
@@ -5053,7 +5055,8 @@
44ce1d
   [(set (match_operand:MODEF 0 "sse_reg_operand")
44ce1d
 	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
44ce1d
   "!TARGET_AVX
44ce1d
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
44ce1d
+   && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
44ce1d
+   && epilogue_completed
44ce1d
    && optimize_function_for_speed_p (cfun)
44ce1d
    && (!EXT_REX_SSE_REG_P (operands[0])
44ce1d
        || TARGET_AVX512VL)"
44ce1d
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
44ce1d
index 636e0c788bf..b5166fb1316 100644
44ce1d
--- a/gcc/config/i386/x86-tune.def
44ce1d
+++ b/gcc/config/i386/x86-tune.def
44ce1d
@@ -64,6 +64,21 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
44ce1d
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
44ce1d
 	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
44ce1d
 
44ce1d
+/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
44ce1d
+   partial write to the destination in scalar SSE conversion from FP
44ce1d
+   to FP.  */
44ce1d
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
44ce1d
+	  "sse_partial_reg_fp_converts_dependency",
44ce1d
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
44ce1d
+	  | m_BDVER | m_ZNVER | m_GENERIC)
44ce1d
+
44ce1d
+/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
44ce1d
+   write to the destination in scalar SSE conversion from integer to FP.  */
44ce1d
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
44ce1d
+	  "sse_partial_reg_converts_dependency",
44ce1d
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
44ce1d
+	  | m_BDVER | m_ZNVER | m_GENERIC)
44ce1d
+
44ce1d
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
44ce1d
    are resolved on SSE register parts instead of whole registers, so we may
44ce1d
    maintain just lower part of scalar values in proper format leaving the
44ce1d
diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
44ce1d
new file mode 100644
44ce1d
index 00000000000..c30af694505
44ce1d
--- /dev/null
44ce1d
+++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
44ce1d
@@ -0,0 +1,19 @@
44ce1d
+/* { dg-do compile } */
44ce1d
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */
44ce1d
+
44ce1d
+extern float f;
44ce1d
+extern double d;
44ce1d
+extern int i;
44ce1d
+
44ce1d
+void
44ce1d
+foo (void)
44ce1d
+{
44ce1d
+  d = f;
44ce1d
+  f = i;
44ce1d
+}
44ce1d
+
44ce1d
+/* { dg-final { scan-assembler "cvtss2sd" } } */
44ce1d
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
44ce1d
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
44ce1d
+/* { dg-final { scan-assembler-not "cvtdq2ps" } } */
44ce1d
+/* { dg-final { scan-assembler-not "pxor" } } */
44ce1d
diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
44ce1d
new file mode 100644
44ce1d
index 00000000000..b6567e60e3e
44ce1d
--- /dev/null
44ce1d
+++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
44ce1d
@@ -0,0 +1,15 @@
44ce1d
+/* { dg-do compile } */
44ce1d
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */
44ce1d
+
44ce1d
+extern float f;
44ce1d
+extern double d;
44ce1d
+
44ce1d
+void
44ce1d
+foo (void)
44ce1d
+{
44ce1d
+  d = f;
44ce1d
+}
44ce1d
+
44ce1d
+/* { dg-final { scan-assembler "cvtss2sd" } } */
44ce1d
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
44ce1d
+/* { dg-final { scan-assembler-not "pxor" } } */
44ce1d
diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
44ce1d
new file mode 100644
44ce1d
index 00000000000..107f7241def
44ce1d
--- /dev/null
44ce1d
+++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
44ce1d
@@ -0,0 +1,14 @@
44ce1d
+/* { dg-do compile } */
44ce1d
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */
44ce1d
+
44ce1d
+extern float f;
44ce1d
+extern int i;
44ce1d
+
44ce1d
+void
44ce1d
+foo (void)
44ce1d
+{
44ce1d
+  f = i;
44ce1d
+}
44ce1d
+
44ce1d
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
44ce1d
+/* { dg-final { scan-assembler-not "pxor" } } */
44ce1d
-- 
44ce1d
2.18.2
44ce1d