Blame SOURCES/gcc11-mtune-alderlake.patch

e60d6e
From 54ccc52ba3f842cd94718967465a6015a752ca47 Mon Sep 17 00:00:00 2001
e60d6e
From: "Cui,Lili" <lili.cui@intel.com>
e60d6e
Date: Thu, 4 Nov 2021 10:38:56 +0800
e60d6e
Subject: [PATCH] x86: Update -mtune=alderlake
e60d6e
MIME-Version: 1.0
e60d6e
Content-Type: text/plain; charset=UTF-8
e60d6e
Content-Transfer-Encoding: 8bit
e60d6e
e60d6e
Update mtune for alderlake, Alder Lake Intel Hybrid Technology will not support
e60d6e
Intel® AVX-512. ISA features such as Intel® AVX, AVX-VNNI, Intel® AVX2, and
e60d6e
UMONITOR/UMWAIT/TPAUSE are supported.
e60d6e
e60d6e
gcc/ChangeLog
e60d6e
e60d6e
	* config/i386/i386-options.c (m_CORE_AVX2): Remove Alderlake
e60d6e
	from m_CORE_AVX2.
e60d6e
	(processor_cost_table): Use alderlake_cost for Alderlake.
e60d6e
	* config/i386/i386.c (ix86_sched_init_global): Handle Alderlake.
e60d6e
	* config/i386/x86-tune-costs.h (struct processor_costs): Add alderlake
e60d6e
	cost.
e60d6e
	* config/i386/x86-tune-sched.c (ix86_issue_rate): Change Alderlake
e60d6e
	issue rate to 4.
e60d6e
	(ix86_adjust_cost): Handle Alderlake.
e60d6e
	* config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Enable for Alderlake.
e60d6e
	(X86_TUNE_PARTIAL_REG_DEPENDENCY): Likewise.
e60d6e
	(X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Likewise.
e60d6e
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Likewise.
e60d6e
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
e60d6e
	(X86_TUNE_MEMORY_MISMATCH_STALL): Likewise.
e60d6e
	(X86_TUNE_USE_LEAVE): Likewise.
e60d6e
	(X86_TUNE_PUSH_MEMORY): Likewise.
e60d6e
	(X86_TUNE_USE_INCDEC): Likewise.
e60d6e
	(X86_TUNE_INTEGER_DFMODE_MOVES): Likewise.
e60d6e
	(X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Likewise.
e60d6e
	(X86_TUNE_USE_SAHF): Likewise.
e60d6e
	(X86_TUNE_USE_BT): Likewise.
e60d6e
	(X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Likewise.
e60d6e
	(X86_TUNE_ONE_IF_CONV_INSN): Likewise.
e60d6e
	(X86_TUNE_AVOID_MFENCE): Likewise.
e60d6e
	(X86_TUNE_USE_SIMODE_FIOP): Likewise.
e60d6e
	(X86_TUNE_EXT_80387_CONSTANTS): Likewise.
e60d6e
	(X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Likewise.
e60d6e
	(X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Likewise.
e60d6e
	(X86_TUNE_SSE_TYPELESS_STORES): Likewise.
e60d6e
	(X86_TUNE_SSE_LOAD0_BY_PXOR): Likewise.
e60d6e
	(X86_TUNE_AVOID_4BYTE_PREFIXES): Likewise.
e60d6e
	(X86_TUNE_USE_GATHER): Disable for Alderlake.
e60d6e
---
e60d6e
 gcc/config/i386/i386-options.c   |   4 +-
e60d6e
 gcc/config/i386/i386.c           |   1 +
e60d6e
 gcc/config/i386/x86-tune-costs.h | 120 +++++++++++++++++++++++++++++++
e60d6e
 gcc/config/i386/x86-tune-sched.c |   2 +
e60d6e
 gcc/config/i386/x86-tune.def     |  58 +++++++--------
e60d6e
 5 files changed, 155 insertions(+), 30 deletions(-)
e60d6e
e60d6e
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
e60d6e
index e7a3bd4aaea..a8cc0664f11 100644
e60d6e
--- a/gcc/config/i386/i386-options.c
e60d6e
+++ b/gcc/config/i386/i386-options.c
e60d6e
@@ -131,7 +131,7 @@ along with GCC; see the file COPYING3.  If not see
e60d6e
 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
e60d6e
 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
e60d6e
 		       | m_ROCKETLAKE)
e60d6e
-#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512)
e60d6e
+#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
e60d6e
 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
e60d6e
 #define m_GOLDMONT (HOST_WIDE_INT_1U<
e60d6e
 #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<
e60d6e
@@ -736,7 +736,7 @@ static const struct processor_costs *processor_cost_table[] =
e60d6e
   &icelake_cost,
e60d6e
   &skylake_cost,
e60d6e
   &icelake_cost,
e60d6e
-  &icelake_cost,
e60d6e
+  &alderlake_cost,
e60d6e
   &icelake_cost,
e60d6e
   &intel_cost,
e60d6e
   &geode_cost,
e60d6e
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
e60d6e
index e94efdf39fb..73c4d5115bb 100644
e60d6e
--- a/gcc/config/i386/i386.c
e60d6e
+++ b/gcc/config/i386/i386.c
e60d6e
@@ -17014,6 +17014,7 @@ ix86_sched_init_global (FILE *, int, int)
e60d6e
     case PROCESSOR_SANDYBRIDGE:
e60d6e
     case PROCESSOR_HASWELL:
e60d6e
     case PROCESSOR_TREMONT:
e60d6e
+    case PROCESSOR_ALDERLAKE:
e60d6e
     case PROCESSOR_GENERIC:
e60d6e
       /* Do not perform multipass scheduling for pre-reload schedule
e60d6e
          to save compile time.  */
e60d6e
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
e60d6e
index 93644be9cb3..dd5563d2e64 100644
e60d6e
--- a/gcc/config/i386/x86-tune-costs.h
e60d6e
+++ b/gcc/config/i386/x86-tune-costs.h
e60d6e
@@ -2070,6 +2070,126 @@ struct processor_costs icelake_cost = {
e60d6e
   "16",					/* Func alignment.  */
e60d6e
 };
e60d6e
 
e60d6e
+/* alderlake_cost should produce code tuned for alderlake family of CPUs.  */
e60d6e
+static stringop_algs alderlake_memcpy[2] = {
e60d6e
+  {libcall,
e60d6e
+   {{256, rep_prefix_1_byte, true},
e60d6e
+    {256, loop, false},
e60d6e
+    {-1, libcall, false}}},
e60d6e
+  {libcall,
e60d6e
+   {{256, rep_prefix_1_byte, true},
e60d6e
+    {256, loop, false},
e60d6e
+    {-1, libcall, false}}}};
e60d6e
+static stringop_algs alderlake_memset[2] = {
e60d6e
+  {libcall,
e60d6e
+   {{256, rep_prefix_1_byte, true},
e60d6e
+    {256, loop, false},
e60d6e
+    {-1, libcall, false}}},
e60d6e
+  {libcall,
e60d6e
+   {{256, rep_prefix_1_byte, true},
e60d6e
+    {256, loop, false},
e60d6e
+    {-1, libcall, false}}}};
e60d6e
+static const
e60d6e
+struct processor_costs alderlake_cost = {
e60d6e
+  {
e60d6e
+  /* Start of register allocator costs.  integer->integer move cost is 2.  */
e60d6e
+  6,				     /* cost for loading QImode using movzbl */
e60d6e
+  {6, 6, 6},				/* cost of loading integer registers
e60d6e
+					   in QImode, HImode and SImode.
e60d6e
+					   Relative to reg-reg move (2).  */
e60d6e
+  {6, 6, 6},				/* cost of storing integer registers */
e60d6e
+  4,					/* cost of reg,reg fld/fst */
e60d6e
+  {6, 6, 12},				/* cost of loading fp registers
e60d6e
+					   in SFmode, DFmode and XFmode */
e60d6e
+  {6, 6, 12},				/* cost of storing fp registers
e60d6e
+					   in SFmode, DFmode and XFmode */
e60d6e
+  2,					/* cost of moving MMX register */
e60d6e
+  {6, 6},				/* cost of loading MMX registers
e60d6e
+					   in SImode and DImode */
e60d6e
+  {6, 6},				/* cost of storing MMX registers
e60d6e
+					   in SImode and DImode */
e60d6e
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of loading SSE registers
e60d6e
+					   in 32,64,128,256 and 512-bit */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of storing SSE registers
e60d6e
+					   in 32,64,128,256 and 512-bit */
e60d6e
+  6, 6,				/* SSE->integer and integer->SSE moves */
e60d6e
+  6, 6,				/* mask->integer and integer->mask moves */
e60d6e
+  {6, 6, 6},				/* cost of loading mask register
e60d6e
+					   in QImode, HImode, SImode.  */
e60d6e
+  {6, 6, 6},			/* cost if storing mask register
e60d6e
+					   in QImode, HImode, SImode.  */
e60d6e
+  2,					/* cost of moving mask register.  */
e60d6e
+  /* End of register allocator costs.  */
e60d6e
+  },
e60d6e
+
e60d6e
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
e60d6e
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
e60d6e
+  COSTS_N_INSNS (1),			/* variable shift costs */
e60d6e
+  COSTS_N_INSNS (1),			/* constant shift costs */
e60d6e
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
e60d6e
+   COSTS_N_INSNS (4),			/*				 HI */
e60d6e
+   COSTS_N_INSNS (3),			/*				 SI */
e60d6e
+   COSTS_N_INSNS (4),			/*				 DI */
e60d6e
+   COSTS_N_INSNS (4)},			/*			      other */
e60d6e
+  0,					/* cost of multiply per each bit set */
e60d6e
+  {COSTS_N_INSNS (16),			/* cost of a divide/mod for QI */
e60d6e
+   COSTS_N_INSNS (22),			/*			    HI */
e60d6e
+   COSTS_N_INSNS (30),			/*			    SI */
e60d6e
+   COSTS_N_INSNS (74),			/*			    DI */
e60d6e
+   COSTS_N_INSNS (74)},			/*			    other */
e60d6e
+  COSTS_N_INSNS (1),			/* cost of movsx */
e60d6e
+  COSTS_N_INSNS (1),			/* cost of movzx */
e60d6e
+  8,					/* "large" insn */
e60d6e
+  17,					/* MOVE_RATIO */
e60d6e
+  17,					/* CLEAR_RATIO */
e60d6e
+  {6, 6, 6},				/* cost of loading integer registers
e60d6e
+					   in QImode, HImode and SImode.
e60d6e
+					   Relative to reg-reg move (2).  */
e60d6e
+  {6, 6, 6},				/* cost of storing integer registers */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of loading SSE register
e60d6e
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of storing SSE register
e60d6e
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
e60d6e
+  {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
e60d6e
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
e60d6e
+  6,					/* cost of moving SSE register to integer.  */
e60d6e
+  18, 6,				/* Gather load static, per_elt.  */
e60d6e
+  18, 6,				/* Gather store static, per_elt.  */
e60d6e
+  32,					/* size of l1 cache.  */
e60d6e
+  512,					/* size of l2 cache.  */
e60d6e
+  64,					/* size of prefetch block */
e60d6e
+  6,					/* number of parallel prefetches */
e60d6e
+  3,					/* Branch cost */
e60d6e
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
e60d6e
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
e60d6e
+  COSTS_N_INSNS (17),			/* cost of FDIV instruction.  */
e60d6e
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
e60d6e
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
e60d6e
+  COSTS_N_INSNS (14),			/* cost of FSQRT instruction.  */
e60d6e
+
e60d6e
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
e60d6e
+  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
e60d6e
+  COSTS_N_INSNS (4),			/* cost of MULSS instruction.  */
e60d6e
+  COSTS_N_INSNS (5),			/* cost of MULSD instruction.  */
e60d6e
+  COSTS_N_INSNS (5),			/* cost of FMA SS instruction.  */
e60d6e
+  COSTS_N_INSNS (5),			/* cost of FMA SD instruction.  */
e60d6e
+  COSTS_N_INSNS (13),			/* cost of DIVSS instruction.  */
e60d6e
+  COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
e60d6e
+  COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
e60d6e
+  COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
e60d6e
+  1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
e60d6e
+  alderlake_memcpy,
e60d6e
+  alderlake_memset,
e60d6e
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
e60d6e
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
e60d6e
+  "16:11:8",				/* Loop alignment.  */
e60d6e
+  "16:11:8",				/* Jump alignment.  */
e60d6e
+  "0:0:8",				/* Label alignment.  */
e60d6e
+  "16",					/* Func alignment.  */
e60d6e
+};
e60d6e
+
e60d6e
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
e60d6e
      very small blocks it is better to use loop. For large blocks, libcall can
e60d6e
      do nontemporary accesses and beat inline considerably.  */
e60d6e
diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c
e60d6e
index 56ada99a450..0c149a09531 100644
e60d6e
--- a/gcc/config/i386/x86-tune-sched.c
e60d6e
+++ b/gcc/config/i386/x86-tune-sched.c
e60d6e
@@ -72,6 +72,7 @@ ix86_issue_rate (void)
e60d6e
     case PROCESSOR_SANDYBRIDGE:
e60d6e
     case PROCESSOR_HASWELL:
e60d6e
     case PROCESSOR_TREMONT:
e60d6e
+    case PROCESSOR_ALDERLAKE:
e60d6e
     case PROCESSOR_GENERIC:
e60d6e
       return 4;
e60d6e
 
e60d6e
@@ -431,6 +432,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
e60d6e
     case PROCESSOR_SANDYBRIDGE:
e60d6e
     case PROCESSOR_HASWELL:
e60d6e
     case PROCESSOR_TREMONT:
e60d6e
+    case PROCESSOR_ALDERLAKE:
e60d6e
     case PROCESSOR_GENERIC:
e60d6e
       /* Stack engine allows to execute push&pop instructions in parall.  */
e60d6e
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
e60d6e
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
e60d6e
index 58e8ead56b4..4ae0b569841 100644
e60d6e
--- a/gcc/config/i386/x86-tune.def
e60d6e
+++ b/gcc/config/i386/x86-tune.def
e60d6e
@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
e60d6e
 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
e60d6e
           m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
e60d6e
 	  | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
e60d6e
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE |m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
e60d6e
    on modern chips.  Preffer stores affecting whole integer register
e60d6e
@@ -51,7 +51,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
e60d6e
 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
e60d6e
           m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2
e60d6e
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
e60d6e
-	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT
e60d6e
+	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE
e60d6e
 	  | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
e60d6e
@@ -62,7 +62,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
e60d6e
    that can be partly masked by careful scheduling of moves.  */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
e60d6e
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
-	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
e60d6e
    partial write to the destination in scalar SSE conversion from FP
e60d6e
@@ -70,14 +70,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
e60d6e
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
e60d6e
 	  "sse_partial_reg_fp_converts_dependency",
e60d6e
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
-	  | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE| m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
e60d6e
    write to the destination in scalar SSE conversion from integer to FP.  */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
e60d6e
 	  "sse_partial_reg_converts_dependency",
e60d6e
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
e60d6e
-	  | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
e60d6e
    are resolved on SSE register parts instead of whole registers, so we may
e60d6e
@@ -103,14 +103,14 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
e60d6e
           m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
e60d6e
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
e60d6e
 	  | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE
e60d6e
-	  | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
e60d6e
    full sized loads.  */
e60d6e
 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
e60d6e
           m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
e60d6e
 	  | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
e60d6e
-	  | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
e60d6e
    conditional jump instruction for 32 bit TARGET.  */
e60d6e
@@ -166,14 +166,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
e60d6e
 /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits.  */
e60d6e
 DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
e60d6e
 	  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT
e60d6e
-	  | m_GENERIC)
e60d6e
+	  | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
e60d6e
    Some chips, like 486 and Pentium works faster with separate load
e60d6e
    and push instructions.  */
e60d6e
 DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
e60d6e
           m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
e60d6e
-          | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
e60d6e
    over esp subtraction.  */
e60d6e
@@ -243,14 +243,14 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
e60d6e
 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
e60d6e
           ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
e60d6e
 	    | m_BONNELL | m_SILVERMONT | m_INTEL |  m_KNL | m_KNM | m_GOLDMONT
e60d6e
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
e60d6e
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
e60d6e
 
e60d6e
 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
e60d6e
    for DFmode copies */
e60d6e
 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
e60d6e
           ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
e60d6e
 	    | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
e60d6e
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
e60d6e
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
e60d6e
 
e60d6e
 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
e60d6e
    will impact LEA instruction selection. */
e60d6e
@@ -298,14 +298,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
e60d6e
 DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
e60d6e
 	  "misaligned_move_string_pro_epilogues",
e60d6e
 	  m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT
e60d6e
-	  | m_GENERIC)
e60d6e
+	  | m_ALDERLAKE |m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_USE_SAHF: Controls use of SAHF.  */
e60d6e
 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
e60d6e
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
e60d6e
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
e60d6e
 	  | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
e60d6e
-	  | m_GENERIC)
e60d6e
+	  | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions.  */
e60d6e
 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
e60d6e
@@ -316,12 +316,12 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
e60d6e
 DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
e60d6e
           m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
e60d6e
 	  | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS
e60d6e
-	  | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
e60d6e
    for bit-manipulation instructions.  */
e60d6e
 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
e60d6e
-	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
e60d6e
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
e60d6e
    on hardware capabilities. Bdver3 hardware has a loop buffer which makes
e60d6e
@@ -333,11 +333,11 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
e60d6e
    if-converted sequence to one.  */
e60d6e
 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
e60d6e
 	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
e60d6e
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence.  */
e60d6e
 DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
e60d6e
-	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
e60d6e
+	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
e60d6e
    generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
e60d6e
@@ -361,7 +361,8 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
e60d6e
 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
e60d6e
           ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
e60d6e
 	    | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
e60d6e
-	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
e60d6e
+	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
e60d6e
+	    | m_GENERIC))
e60d6e
 
e60d6e
 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
e60d6e
 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
e60d6e
@@ -370,7 +371,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
e60d6e
 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
e60d6e
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
e60d6e
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT
e60d6e
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /*****************************************************************************/
e60d6e
 /* SSE instruction selection tuning                                          */
e60d6e
@@ -385,15 +386,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
e60d6e
    of a sequence loading registers by parts.  */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
e60d6e
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
e60d6e
-	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
e60d6e
-	  | m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
e60d6e
+	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
e60d6e
+	  | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
e60d6e
    instead of a sequence loading registers by parts.  */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
e60d6e
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
e60d6e
 	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
e60d6e
-	  | m_TREMONT | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
+	  | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
e60d6e
    precision 128bit instructions instead of double where possible.   */
e60d6e
@@ -402,13 +403,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores.   */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
e60d6e
-	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_GENERIC)
e60d6e
+	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
e60d6e
    xorps/xorpd and other variants.  */
e60d6e
 DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
e60d6e
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
e60d6e
-	  | m_TREMONT | m_GENERIC)
e60d6e
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
e60d6e
 
e60d6e
 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
e60d6e
    to SSE registers.  If disabled, the moves will be done by storing
e60d6e
@@ -454,11 +455,12 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
e60d6e
 
e60d6e
 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes.  */
e60d6e
 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
e60d6e
-	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL)
e60d6e
+	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
e60d6e
+	  | m_INTEL)
e60d6e
 
e60d6e
 /* X86_TUNE_USE_GATHER: Use gather instructions.  */
e60d6e
 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
e60d6e
-	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC))
e60d6e
+	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC | m_ALDERLAKE))
e60d6e
 
e60d6e
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
e60d6e
    smaller FMA chain.  */
e60d6e
-- 
e60d6e
2.17.1
e60d6e