Blame SOURCES/gcc11-mtune-alderlake.patch

a46658
From 54ccc52ba3f842cd94718967465a6015a752ca47 Mon Sep 17 00:00:00 2001
a46658
From: "Cui,Lili" <lili.cui@intel.com>
a46658
Date: Thu, 4 Nov 2021 10:38:56 +0800
a46658
Subject: [PATCH] x86: Update -mtune=alderlake
a46658
MIME-Version: 1.0
a46658
Content-Type: text/plain; charset=UTF-8
a46658
Content-Transfer-Encoding: 8bit
a46658
a46658
Update mtune for alderlake, Alder Lake Intel Hybrid Technology will not support
a46658
Intel® AVX-512. ISA features such as Intel® AVX, AVX-VNNI, Intel® AVX2, and
a46658
UMONITOR/UMWAIT/TPAUSE are supported.
a46658
a46658
gcc/ChangeLog
a46658
a46658
	* config/i386/i386-options.c (m_CORE_AVX2): Remove Alderlake
a46658
	from m_CORE_AVX2.
a46658
	(processor_cost_table): Use alderlake_cost for Alderlake.
a46658
	* config/i386/i386.c (ix86_sched_init_global): Handle Alderlake.
a46658
	* config/i386/x86-tune-costs.h (struct processor_costs): Add alderlake
a46658
	cost.
a46658
	* config/i386/x86-tune-sched.c (ix86_issue_rate): Change Alderlake
a46658
	issue rate to 4.
a46658
	(ix86_adjust_cost): Handle Alderlake.
a46658
	* config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Enable for Alderlake.
a46658
	(X86_TUNE_PARTIAL_REG_DEPENDENCY): Likewise.
a46658
	(X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Likewise.
a46658
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Likewise.
a46658
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
a46658
	(X86_TUNE_MEMORY_MISMATCH_STALL): Likewise.
a46658
	(X86_TUNE_USE_LEAVE): Likewise.
a46658
	(X86_TUNE_PUSH_MEMORY): Likewise.
a46658
	(X86_TUNE_USE_INCDEC): Likewise.
a46658
	(X86_TUNE_INTEGER_DFMODE_MOVES): Likewise.
a46658
	(X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Likewise.
a46658
	(X86_TUNE_USE_SAHF): Likewise.
a46658
	(X86_TUNE_USE_BT): Likewise.
a46658
	(X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Likewise.
a46658
	(X86_TUNE_ONE_IF_CONV_INSN): Likewise.
a46658
	(X86_TUNE_AVOID_MFENCE): Likewise.
a46658
	(X86_TUNE_USE_SIMODE_FIOP): Likewise.
a46658
	(X86_TUNE_EXT_80387_CONSTANTS): Likewise.
a46658
	(X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Likewise.
a46658
	(X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Likewise.
a46658
	(X86_TUNE_SSE_TYPELESS_STORES): Likewise.
a46658
	(X86_TUNE_SSE_LOAD0_BY_PXOR): Likewise.
a46658
	(X86_TUNE_AVOID_4BYTE_PREFIXES): Likewise.
a46658
	(X86_TUNE_USE_GATHER): Disable for Alderlake.
a46658
---
a46658
 gcc/config/i386/i386-options.c   |   4 +-
a46658
 gcc/config/i386/i386.c           |   1 +
a46658
 gcc/config/i386/x86-tune-costs.h | 120 +++++++++++++++++++++++++++++++
a46658
 gcc/config/i386/x86-tune-sched.c |   2 +
a46658
 gcc/config/i386/x86-tune.def     |  58 +++++++--------
a46658
 5 files changed, 155 insertions(+), 30 deletions(-)
a46658
a46658
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
a46658
index e7a3bd4aaea..a8cc0664f11 100644
a46658
--- a/gcc/config/i386/i386-options.c
a46658
+++ b/gcc/config/i386/i386-options.c
a46658
@@ -131,7 +131,7 @@ along with GCC; see the file COPYING3.  If not see
a46658
 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
a46658
 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
a46658
 		       | m_ROCKETLAKE)
a46658
-#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512)
a46658
+#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
a46658
 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
a46658
 #define m_GOLDMONT (HOST_WIDE_INT_1U<
a46658
 #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<
a46658
@@ -736,7 +736,7 @@ static const struct processor_costs *processor_cost_table[] =
a46658
   &icelake_cost,
a46658
   &skylake_cost,
a46658
   &icelake_cost,
a46658
-  &icelake_cost,
a46658
+  &alderlake_cost,
a46658
   &icelake_cost,
a46658
   &intel_cost,
a46658
   &geode_cost,
a46658
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
a46658
index e94efdf39fb..73c4d5115bb 100644
a46658
--- a/gcc/config/i386/i386.c
a46658
+++ b/gcc/config/i386/i386.c
a46658
@@ -17014,6 +17014,7 @@ ix86_sched_init_global (FILE *, int, int)
a46658
     case PROCESSOR_SANDYBRIDGE:
a46658
     case PROCESSOR_HASWELL:
a46658
     case PROCESSOR_TREMONT:
a46658
+    case PROCESSOR_ALDERLAKE:
a46658
     case PROCESSOR_GENERIC:
a46658
       /* Do not perform multipass scheduling for pre-reload schedule
a46658
          to save compile time.  */
a46658
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
a46658
index 93644be9cb3..dd5563d2e64 100644
a46658
--- a/gcc/config/i386/x86-tune-costs.h
a46658
+++ b/gcc/config/i386/x86-tune-costs.h
a46658
@@ -2070,6 +2070,126 @@ struct processor_costs icelake_cost = {
a46658
   "16",					/* Func alignment.  */
a46658
 };
a46658
 
a46658
+/* alderlake_cost should produce code tuned for alderlake family of CPUs.  */
a46658
+static stringop_algs alderlake_memcpy[2] = {
a46658
+  {libcall,
a46658
+   {{256, rep_prefix_1_byte, true},
a46658
+    {256, loop, false},
a46658
+    {-1, libcall, false}}},
a46658
+  {libcall,
a46658
+   {{256, rep_prefix_1_byte, true},
a46658
+    {256, loop, false},
a46658
+    {-1, libcall, false}}}};
a46658
+static stringop_algs alderlake_memset[2] = {
a46658
+  {libcall,
a46658
+   {{256, rep_prefix_1_byte, true},
a46658
+    {256, loop, false},
a46658
+    {-1, libcall, false}}},
a46658
+  {libcall,
a46658
+   {{256, rep_prefix_1_byte, true},
a46658
+    {256, loop, false},
a46658
+    {-1, libcall, false}}}};
a46658
+static const
a46658
+struct processor_costs alderlake_cost = {
a46658
+  {
a46658
+  /* Start of register allocator costs.  integer->integer move cost is 2.  */
a46658
+  6,				     /* cost for loading QImode using movzbl */
a46658
+  {6, 6, 6},				/* cost of loading integer registers
a46658
+					   in QImode, HImode and SImode.
a46658
+					   Relative to reg-reg move (2).  */
a46658
+  {6, 6, 6},				/* cost of storing integer registers */
a46658
+  4,					/* cost of reg,reg fld/fst */
a46658
+  {6, 6, 12},				/* cost of loading fp registers
a46658
+					   in SFmode, DFmode and XFmode */
a46658
+  {6, 6, 12},				/* cost of storing fp registers
a46658
+					   in SFmode, DFmode and XFmode */
a46658
+  2,					/* cost of moving MMX register */
a46658
+  {6, 6},				/* cost of loading MMX registers
a46658
+					   in SImode and DImode */
a46658
+  {6, 6},				/* cost of storing MMX registers
a46658
+					   in SImode and DImode */
a46658
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
a46658
+  {6, 6, 6, 10, 15},			/* cost of loading SSE registers
a46658
+					   in 32,64,128,256 and 512-bit */
a46658
+  {6, 6, 6, 10, 15},			/* cost of storing SSE registers
a46658
+					   in 32,64,128,256 and 512-bit */
a46658
+  6, 6,				/* SSE->integer and integer->SSE moves */
a46658
+  6, 6,				/* mask->integer and integer->mask moves */
a46658
+  {6, 6, 6},				/* cost of loading mask register
a46658
+					   in QImode, HImode, SImode.  */
a46658
+  {6, 6, 6},			/* cost if storing mask register
a46658
+					   in QImode, HImode, SImode.  */
a46658
+  2,					/* cost of moving mask register.  */
a46658
+  /* End of register allocator costs.  */
a46658
+  },
a46658
+
a46658
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
a46658
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
a46658
+  COSTS_N_INSNS (1),			/* variable shift costs */
a46658
+  COSTS_N_INSNS (1),			/* constant shift costs */
a46658
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
a46658
+   COSTS_N_INSNS (4),			/*				 HI */
a46658
+   COSTS_N_INSNS (3),			/*				 SI */
a46658
+   COSTS_N_INSNS (4),			/*				 DI */
a46658
+   COSTS_N_INSNS (4)},			/*			      other */
a46658
+  0,					/* cost of multiply per each bit set */
a46658
+  {COSTS_N_INSNS (16),			/* cost of a divide/mod for QI */
a46658
+   COSTS_N_INSNS (22),			/*			    HI */
a46658
+   COSTS_N_INSNS (30),			/*			    SI */
a46658
+   COSTS_N_INSNS (74),			/*			    DI */
a46658
+   COSTS_N_INSNS (74)},			/*			    other */
a46658
+  COSTS_N_INSNS (1),			/* cost of movsx */
a46658
+  COSTS_N_INSNS (1),			/* cost of movzx */
a46658
+  8,					/* "large" insn */
a46658
+  17,					/* MOVE_RATIO */
a46658
+  17,					/* CLEAR_RATIO */
a46658
+  {6, 6, 6},				/* cost of loading integer registers
a46658
+					   in QImode, HImode and SImode.
a46658
+					   Relative to reg-reg move (2).  */
a46658
+  {6, 6, 6},				/* cost of storing integer registers */
a46658
+  {6, 6, 6, 10, 15},			/* cost of loading SSE register
a46658
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
a46658
+  {6, 6, 6, 10, 15},			/* cost of storing SSE register
a46658
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
a46658
+  {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
a46658
+  {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
a46658
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
a46658
+  6,					/* cost of moving SSE register to integer.  */
a46658
+  18, 6,				/* Gather load static, per_elt.  */
a46658
+  18, 6,				/* Gather store static, per_elt.  */
a46658
+  32,					/* size of l1 cache.  */
a46658
+  512,					/* size of l2 cache.  */
a46658
+  64,					/* size of prefetch block */
a46658
+  6,					/* number of parallel prefetches */
a46658
+  3,					/* Branch cost */
a46658
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
a46658
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
a46658
+  COSTS_N_INSNS (17),			/* cost of FDIV instruction.  */
a46658
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
a46658
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
a46658
+  COSTS_N_INSNS (14),			/* cost of FSQRT instruction.  */
a46658
+
a46658
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
a46658
+  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
a46658
+  COSTS_N_INSNS (4),			/* cost of MULSS instruction.  */
a46658
+  COSTS_N_INSNS (5),			/* cost of MULSD instruction.  */
a46658
+  COSTS_N_INSNS (5),			/* cost of FMA SS instruction.  */
a46658
+  COSTS_N_INSNS (5),			/* cost of FMA SD instruction.  */
a46658
+  COSTS_N_INSNS (13),			/* cost of DIVSS instruction.  */
a46658
+  COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
a46658
+  COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
a46658
+  COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
a46658
+  1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
a46658
+  alderlake_memcpy,
a46658
+  alderlake_memset,
a46658
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
a46658
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
a46658
+  "16:11:8",				/* Loop alignment.  */
a46658
+  "16:11:8",				/* Jump alignment.  */
a46658
+  "0:0:8",				/* Label alignment.  */
a46658
+  "16",					/* Func alignment.  */
a46658
+};
a46658
+
a46658
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
a46658
      very small blocks it is better to use loop. For large blocks, libcall can
a46658
      do nontemporary accesses and beat inline considerably.  */
a46658
diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c
a46658
index 56ada99a450..0c149a09531 100644
a46658
--- a/gcc/config/i386/x86-tune-sched.c
a46658
+++ b/gcc/config/i386/x86-tune-sched.c
a46658
@@ -72,6 +72,7 @@ ix86_issue_rate (void)
a46658
     case PROCESSOR_SANDYBRIDGE:
a46658
     case PROCESSOR_HASWELL:
a46658
     case PROCESSOR_TREMONT:
a46658
+    case PROCESSOR_ALDERLAKE:
a46658
     case PROCESSOR_GENERIC:
a46658
       return 4;
a46658
 
a46658
@@ -431,6 +432,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
a46658
     case PROCESSOR_SANDYBRIDGE:
a46658
     case PROCESSOR_HASWELL:
a46658
     case PROCESSOR_TREMONT:
a46658
+    case PROCESSOR_ALDERLAKE:
a46658
     case PROCESSOR_GENERIC:
a46658
       /* Stack engine allows to execute push&pop instructions in parall.  */
a46658
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
a46658
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
a46658
index 58e8ead56b4..4ae0b569841 100644
a46658
--- a/gcc/config/i386/x86-tune.def
a46658
+++ b/gcc/config/i386/x86-tune.def
a46658
@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
a46658
 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
a46658
           m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
a46658
 	  | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
a46658
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
a46658
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE |m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
a46658
    on modern chips.  Preffer stores affecting whole integer register
a46658
@@ -51,7 +51,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
a46658
 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
a46658
           m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2
a46658
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
a46658
-	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT
a46658
+	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE
a46658
 	  | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
a46658
@@ -62,7 +62,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
a46658
    that can be partly masked by careful scheduling of moves.  */
a46658
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
a46658
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
a46658
-	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
a46658
+	  | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
a46658
    partial write to the destination in scalar SSE conversion from FP
a46658
@@ -70,14 +70,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
a46658
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
a46658
 	  "sse_partial_reg_fp_converts_dependency",
a46658
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
a46658
-	  | m_BDVER | m_ZNVER | m_GENERIC)
a46658
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE| m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
a46658
    write to the destination in scalar SSE conversion from integer to FP.  */
a46658
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
a46658
 	  "sse_partial_reg_converts_dependency",
a46658
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
a46658
-	  | m_BDVER | m_ZNVER | m_GENERIC)
a46658
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
a46658
    are resolved on SSE register parts instead of whole registers, so we may
a46658
@@ -103,14 +103,14 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
a46658
           m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
a46658
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
a46658
 	  | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE
a46658
-	  | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
a46658
+	  | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
a46658
    full sized loads.  */
a46658
 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
a46658
           m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
a46658
 	  | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
a46658
-	  | m_TREMONT | m_GENERIC)
a46658
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
a46658
    conditional jump instruction for 32 bit TARGET.  */
a46658
@@ -166,14 +166,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
a46658
 /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits.  */
a46658
 DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
a46658
 	  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT
a46658
-	  | m_GENERIC)
a46658
+	  | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
a46658
    Some chips, like 486 and Pentium works faster with separate load
a46658
    and push instructions.  */
a46658
 DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
a46658
           m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
a46658
-          | m_TREMONT | m_GENERIC)
a46658
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
a46658
    over esp subtraction.  */
a46658
@@ -243,14 +243,14 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
a46658
 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
a46658
           ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
a46658
 	    | m_BONNELL | m_SILVERMONT | m_INTEL |  m_KNL | m_KNM | m_GOLDMONT
a46658
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
a46658
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
a46658
 
a46658
 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
a46658
    for DFmode copies */
a46658
 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
a46658
           ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
a46658
 	    | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
a46658
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
a46658
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
a46658
 
a46658
 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
a46658
    will impact LEA instruction selection. */
a46658
@@ -298,14 +298,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
a46658
 DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
a46658
 	  "misaligned_move_string_pro_epilogues",
a46658
 	  m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT
a46658
-	  | m_GENERIC)
a46658
+	  | m_ALDERLAKE |m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_USE_SAHF: Controls use of SAHF.  */
a46658
 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
a46658
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
a46658
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
a46658
 	  | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
a46658
-	  | m_GENERIC)
a46658
+	  | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions.  */
a46658
 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
a46658
@@ -316,12 +316,12 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
a46658
 DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
a46658
           m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
a46658
 	  | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS
a46658
-	  | m_TREMONT | m_GENERIC)
a46658
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
a46658
    for bit-manipulation instructions.  */
a46658
 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
a46658
-	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
a46658
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
a46658
    on hardware capabilities. Bdver3 hardware has a loop buffer which makes
a46658
@@ -333,11 +333,11 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
a46658
    if-converted sequence to one.  */
a46658
 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
a46658
 	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
a46658
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
a46658
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence.  */
a46658
 DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
a46658
-	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
a46658
+	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
a46658
    generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
a46658
@@ -361,7 +361,8 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
a46658
 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
a46658
           ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
a46658
 	    | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
a46658
-	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
a46658
+	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
a46658
+	    | m_GENERIC))
a46658
 
a46658
 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
a46658
 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
a46658
@@ -370,7 +371,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
a46658
 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
a46658
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
a46658
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT
a46658
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
a46658
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /*****************************************************************************/
a46658
 /* SSE instruction selection tuning                                          */
a46658
@@ -385,15 +386,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
a46658
    of a sequence loading registers by parts.  */
a46658
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
a46658
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
a46658
-	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
a46658
-	  | m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
a46658
+	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
a46658
+	  | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
a46658
    instead of a sequence loading registers by parts.  */
a46658
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
a46658
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
a46658
 	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
a46658
-	  | m_TREMONT | m_BDVER | m_ZNVER | m_GENERIC)
a46658
+	  | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
a46658
    precision 128bit instructions instead of double where possible.   */
a46658
@@ -402,13 +403,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim
a46658
 
a46658
 /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores.   */
a46658
 DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
a46658
-	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_GENERIC)
a46658
+	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
a46658
    xorps/xorpd and other variants.  */
a46658
 DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
a46658
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
a46658
-	  | m_TREMONT | m_GENERIC)
a46658
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
a46658
 
a46658
 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
a46658
    to SSE registers.  If disabled, the moves will be done by storing
a46658
@@ -454,11 +455,12 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
a46658
 
a46658
 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes.  */
a46658
 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
a46658
-	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL)
a46658
+	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
a46658
+	  | m_INTEL)
a46658
 
a46658
 /* X86_TUNE_USE_GATHER: Use gather instructions.  */
a46658
 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
a46658
-	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC))
a46658
+	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC | m_ALDERLAKE))
a46658
 
a46658
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
a46658
    smaller FMA chain.  */
a46658
-- 
a46658
2.17.1
a46658