Blame SOURCES/gcc11-mtune-alderlake.patch

f49307
From 54ccc52ba3f842cd94718967465a6015a752ca47 Mon Sep 17 00:00:00 2001
f49307
From: "Cui,Lili" <lili.cui@intel.com>
f49307
Date: Thu, 4 Nov 2021 10:38:56 +0800
f49307
Subject: [PATCH] x86: Update -mtune=alderlake
f49307
MIME-Version: 1.0
f49307
Content-Type: text/plain; charset=UTF-8
f49307
Content-Transfer-Encoding: 8bit
f49307
f49307
Update mtune for alderlake, Alder Lake Intel Hybrid Technology will not support
f49307
Intel® AVX-512. ISA features such as Intel® AVX, AVX-VNNI, Intel® AVX2, and
f49307
UMONITOR/UMWAIT/TPAUSE are supported.
f49307
f49307
gcc/ChangeLog
f49307
f49307
	* config/i386/i386-options.c (m_CORE_AVX2): Remove Alderlake
f49307
	from m_CORE_AVX2.
f49307
	(processor_cost_table): Use alderlake_cost for Alderlake.
f49307
	* config/i386/i386.c (ix86_sched_init_global): Handle Alderlake.
f49307
	* config/i386/x86-tune-costs.h (struct processor_costs): Add alderlake
f49307
	cost.
f49307
	* config/i386/x86-tune-sched.c (ix86_issue_rate): Change Alderlake
f49307
	issue rate to 4.
f49307
	(ix86_adjust_cost): Handle Alderlake.
f49307
	* config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Enable for Alderlake.
f49307
	(X86_TUNE_PARTIAL_REG_DEPENDENCY): Likewise.
f49307
	(X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Likewise.
f49307
	(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Likewise.
f49307
	(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.
f49307
	(X86_TUNE_MEMORY_MISMATCH_STALL): Likewise.
f49307
	(X86_TUNE_USE_LEAVE): Likewise.
f49307
	(X86_TUNE_PUSH_MEMORY): Likewise.
f49307
	(X86_TUNE_USE_INCDEC): Likewise.
f49307
	(X86_TUNE_INTEGER_DFMODE_MOVES): Likewise.
f49307
	(X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Likewise.
f49307
	(X86_TUNE_USE_SAHF): Likewise.
f49307
	(X86_TUNE_USE_BT): Likewise.
f49307
	(X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Likewise.
f49307
	(X86_TUNE_ONE_IF_CONV_INSN): Likewise.
f49307
	(X86_TUNE_AVOID_MFENCE): Likewise.
f49307
	(X86_TUNE_USE_SIMODE_FIOP): Likewise.
f49307
	(X86_TUNE_EXT_80387_CONSTANTS): Likewise.
f49307
	(X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Likewise.
f49307
	(X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Likewise.
f49307
	(X86_TUNE_SSE_TYPELESS_STORES): Likewise.
f49307
	(X86_TUNE_SSE_LOAD0_BY_PXOR): Likewise.
f49307
	(X86_TUNE_AVOID_4BYTE_PREFIXES): Likewise.
f49307
	(X86_TUNE_USE_GATHER): Disable for Alderlake.
f49307
---
f49307
 gcc/config/i386/i386-options.c   |   4 +-
f49307
 gcc/config/i386/i386.c           |   1 +
f49307
 gcc/config/i386/x86-tune-costs.h | 120 +++++++++++++++++++++++++++++++
f49307
 gcc/config/i386/x86-tune-sched.c |   2 +
f49307
 gcc/config/i386/x86-tune.def     |  58 +++++++--------
f49307
 5 files changed, 155 insertions(+), 30 deletions(-)
f49307
f49307
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
f49307
index e7a3bd4aaea..a8cc0664f11 100644
f49307
--- a/gcc/config/i386/i386-options.c
f49307
+++ b/gcc/config/i386/i386-options.c
f49307
@@ -131,7 +131,7 @@ along with GCC; see the file COPYING3.  If not see
f49307
 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
f49307
 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
f49307
 		       | m_ROCKETLAKE)
f49307
-#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512)
f49307
+#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
f49307
 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
f49307
 #define m_GOLDMONT (HOST_WIDE_INT_1U<
f49307
 #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<
f49307
@@ -736,7 +736,7 @@ static const struct processor_costs *processor_cost_table[] =
f49307
   &icelake_cost,
f49307
   &skylake_cost,
f49307
   &icelake_cost,
f49307
-  &icelake_cost,
f49307
+  &alderlake_cost,
f49307
   &icelake_cost,
f49307
   &intel_cost,
f49307
   &geode_cost,
f49307
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
f49307
index e94efdf39fb..73c4d5115bb 100644
f49307
--- a/gcc/config/i386/i386.c
f49307
+++ b/gcc/config/i386/i386.c
f49307
@@ -17014,6 +17014,7 @@ ix86_sched_init_global (FILE *, int, int)
f49307
     case PROCESSOR_SANDYBRIDGE:
f49307
     case PROCESSOR_HASWELL:
f49307
     case PROCESSOR_TREMONT:
f49307
+    case PROCESSOR_ALDERLAKE:
f49307
     case PROCESSOR_GENERIC:
f49307
       /* Do not perform multipass scheduling for pre-reload schedule
f49307
          to save compile time.  */
f49307
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
f49307
index 93644be9cb3..dd5563d2e64 100644
f49307
--- a/gcc/config/i386/x86-tune-costs.h
f49307
+++ b/gcc/config/i386/x86-tune-costs.h
f49307
@@ -2070,6 +2070,126 @@ struct processor_costs icelake_cost = {
f49307
   "16",					/* Func alignment.  */
f49307
 };
f49307
 
f49307
+/* alderlake_cost should produce code tuned for alderlake family of CPUs.  */
f49307
+static stringop_algs alderlake_memcpy[2] = {
f49307
+  {libcall,
f49307
+   {{256, rep_prefix_1_byte, true},
f49307
+    {256, loop, false},
f49307
+    {-1, libcall, false}}},
f49307
+  {libcall,
f49307
+   {{256, rep_prefix_1_byte, true},
f49307
+    {256, loop, false},
f49307
+    {-1, libcall, false}}}};
f49307
+static stringop_algs alderlake_memset[2] = {
f49307
+  {libcall,
f49307
+   {{256, rep_prefix_1_byte, true},
f49307
+    {256, loop, false},
f49307
+    {-1, libcall, false}}},
f49307
+  {libcall,
f49307
+   {{256, rep_prefix_1_byte, true},
f49307
+    {256, loop, false},
f49307
+    {-1, libcall, false}}}};
f49307
+static const
f49307
+struct processor_costs alderlake_cost = {
f49307
+  {
f49307
+  /* Start of register allocator costs.  integer->integer move cost is 2.  */
f49307
+  6,				     /* cost for loading QImode using movzbl */
f49307
+  {6, 6, 6},				/* cost of loading integer registers
f49307
+					   in QImode, HImode and SImode.
f49307
+					   Relative to reg-reg move (2).  */
f49307
+  {6, 6, 6},				/* cost of storing integer registers */
f49307
+  4,					/* cost of reg,reg fld/fst */
f49307
+  {6, 6, 12},				/* cost of loading fp registers
f49307
+					   in SFmode, DFmode and XFmode */
f49307
+  {6, 6, 12},				/* cost of storing fp registers
f49307
+					   in SFmode, DFmode and XFmode */
f49307
+  2,					/* cost of moving MMX register */
f49307
+  {6, 6},				/* cost of loading MMX registers
f49307
+					   in SImode and DImode */
f49307
+  {6, 6},				/* cost of storing MMX registers
f49307
+					   in SImode and DImode */
f49307
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
f49307
+  {6, 6, 6, 10, 15},			/* cost of loading SSE registers
f49307
+					   in 32,64,128,256 and 512-bit */
f49307
+  {6, 6, 6, 10, 15},			/* cost of storing SSE registers
f49307
+					   in 32,64,128,256 and 512-bit */
f49307
+  6, 6,				/* SSE->integer and integer->SSE moves */
f49307
+  6, 6,				/* mask->integer and integer->mask moves */
f49307
+  {6, 6, 6},				/* cost of loading mask register
f49307
+					   in QImode, HImode, SImode.  */
f49307
+  {6, 6, 6},			/* cost if storing mask register
f49307
+					   in QImode, HImode, SImode.  */
f49307
+  2,					/* cost of moving mask register.  */
f49307
+  /* End of register allocator costs.  */
f49307
+  },
f49307
+
f49307
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
f49307
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
f49307
+  COSTS_N_INSNS (1),			/* variable shift costs */
f49307
+  COSTS_N_INSNS (1),			/* constant shift costs */
f49307
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
f49307
+   COSTS_N_INSNS (4),			/*				 HI */
f49307
+   COSTS_N_INSNS (3),			/*				 SI */
f49307
+   COSTS_N_INSNS (4),			/*				 DI */
f49307
+   COSTS_N_INSNS (4)},			/*			      other */
f49307
+  0,					/* cost of multiply per each bit set */
f49307
+  {COSTS_N_INSNS (16),			/* cost of a divide/mod for QI */
f49307
+   COSTS_N_INSNS (22),			/*			    HI */
f49307
+   COSTS_N_INSNS (30),			/*			    SI */
f49307
+   COSTS_N_INSNS (74),			/*			    DI */
f49307
+   COSTS_N_INSNS (74)},			/*			    other */
f49307
+  COSTS_N_INSNS (1),			/* cost of movsx */
f49307
+  COSTS_N_INSNS (1),			/* cost of movzx */
f49307
+  8,					/* "large" insn */
f49307
+  17,					/* MOVE_RATIO */
f49307
+  17,					/* CLEAR_RATIO */
f49307
+  {6, 6, 6},				/* cost of loading integer registers
f49307
+					   in QImode, HImode and SImode.
f49307
+					   Relative to reg-reg move (2).  */
f49307
+  {6, 6, 6},				/* cost of storing integer registers */
f49307
+  {6, 6, 6, 10, 15},			/* cost of loading SSE register
f49307
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
f49307
+  {6, 6, 6, 10, 15},			/* cost of storing SSE register
f49307
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
f49307
+  {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
f49307
+  {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
f49307
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
f49307
+  6,					/* cost of moving SSE register to integer.  */
f49307
+  18, 6,				/* Gather load static, per_elt.  */
f49307
+  18, 6,				/* Gather store static, per_elt.  */
f49307
+  32,					/* size of l1 cache.  */
f49307
+  512,					/* size of l2 cache.  */
f49307
+  64,					/* size of prefetch block */
f49307
+  6,					/* number of parallel prefetches */
f49307
+  3,					/* Branch cost */
f49307
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
f49307
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
f49307
+  COSTS_N_INSNS (17),			/* cost of FDIV instruction.  */
f49307
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
f49307
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
f49307
+  COSTS_N_INSNS (14),			/* cost of FSQRT instruction.  */
f49307
+
f49307
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
f49307
+  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
f49307
+  COSTS_N_INSNS (4),			/* cost of MULSS instruction.  */
f49307
+  COSTS_N_INSNS (5),			/* cost of MULSD instruction.  */
f49307
+  COSTS_N_INSNS (5),			/* cost of FMA SS instruction.  */
f49307
+  COSTS_N_INSNS (5),			/* cost of FMA SD instruction.  */
f49307
+  COSTS_N_INSNS (13),			/* cost of DIVSS instruction.  */
f49307
+  COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
f49307
+  COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
f49307
+  COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
f49307
+  1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
f49307
+  alderlake_memcpy,
f49307
+  alderlake_memset,
f49307
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
f49307
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
f49307
+  "16:11:8",				/* Loop alignment.  */
f49307
+  "16:11:8",				/* Jump alignment.  */
f49307
+  "0:0:8",				/* Label alignment.  */
f49307
+  "16",					/* Func alignment.  */
f49307
+};
f49307
+
f49307
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
f49307
      very small blocks it is better to use loop. For large blocks, libcall can
f49307
      do nontemporary accesses and beat inline considerably.  */
f49307
diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c
f49307
index 56ada99a450..0c149a09531 100644
f49307
--- a/gcc/config/i386/x86-tune-sched.c
f49307
+++ b/gcc/config/i386/x86-tune-sched.c
f49307
@@ -72,6 +72,7 @@ ix86_issue_rate (void)
f49307
     case PROCESSOR_SANDYBRIDGE:
f49307
     case PROCESSOR_HASWELL:
f49307
     case PROCESSOR_TREMONT:
f49307
+    case PROCESSOR_ALDERLAKE:
f49307
     case PROCESSOR_GENERIC:
f49307
       return 4;
f49307
 
f49307
@@ -431,6 +432,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
f49307
     case PROCESSOR_SANDYBRIDGE:
f49307
     case PROCESSOR_HASWELL:
f49307
     case PROCESSOR_TREMONT:
f49307
+    case PROCESSOR_ALDERLAKE:
f49307
     case PROCESSOR_GENERIC:
f49307
       /* Stack engine allows to execute push&pop instructions in parall.  */
f49307
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
f49307
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
f49307
index 58e8ead56b4..4ae0b569841 100644
f49307
--- a/gcc/config/i386/x86-tune.def
f49307
+++ b/gcc/config/i386/x86-tune.def
f49307
@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
f49307
 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
f49307
           m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
f49307
 	  | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
f49307
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
f49307
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE |m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
f49307
    on modern chips.  Preffer stores affecting whole integer register
f49307
@@ -51,7 +51,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
f49307
 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
f49307
           m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2
f49307
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
f49307
-	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT
f49307
+	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE
f49307
 	  | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
f49307
@@ -62,7 +62,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
f49307
    that can be partly masked by careful scheduling of moves.  */
f49307
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
f49307
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
-	  | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
f49307
+	  | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
f49307
    partial write to the destination in scalar SSE conversion from FP
f49307
@@ -70,14 +70,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
f49307
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
f49307
 	  "sse_partial_reg_fp_converts_dependency",
f49307
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
-	  | m_BDVER | m_ZNVER | m_GENERIC)
f49307
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE| m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
f49307
    write to the destination in scalar SSE conversion from integer to FP.  */
f49307
 DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
f49307
 	  "sse_partial_reg_converts_dependency",
f49307
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
f49307
-	  | m_BDVER | m_ZNVER | m_GENERIC)
f49307
+	  | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
f49307
    are resolved on SSE register parts instead of whole registers, so we may
f49307
@@ -103,14 +103,14 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
f49307
           m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
f49307
 	  | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
f49307
 	  | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE
f49307
-	  | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
f49307
+	  | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
f49307
    full sized loads.  */
f49307
 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
f49307
           m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
f49307
 	  | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
f49307
-	  | m_TREMONT | m_GENERIC)
f49307
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
f49307
    conditional jump instruction for 32 bit TARGET.  */
f49307
@@ -166,14 +166,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
f49307
 /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits.  */
f49307
 DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
f49307
 	  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT
f49307
-	  | m_GENERIC)
f49307
+	  | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
f49307
    Some chips, like 486 and Pentium works faster with separate load
f49307
    and push instructions.  */
f49307
 DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
f49307
           m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
f49307
-          | m_TREMONT | m_GENERIC)
f49307
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
f49307
    over esp subtraction.  */
f49307
@@ -243,14 +243,14 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
f49307
 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
f49307
           ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
f49307
 	    | m_BONNELL | m_SILVERMONT | m_INTEL |  m_KNL | m_KNM | m_GOLDMONT
f49307
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
f49307
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
f49307
 
f49307
 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
f49307
    for DFmode copies */
f49307
 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
f49307
           ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
f49307
 	    | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT
f49307
-	    | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
f49307
+	    | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC))
f49307
 
f49307
 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
f49307
    will impact LEA instruction selection. */
f49307
@@ -298,14 +298,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
f49307
 DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
f49307
 	  "misaligned_move_string_pro_epilogues",
f49307
 	  m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT
f49307
-	  | m_GENERIC)
f49307
+	  | m_ALDERLAKE |m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_USE_SAHF: Controls use of SAHF.  */
f49307
 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
f49307
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
f49307
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
f49307
 	  | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
f49307
-	  | m_GENERIC)
f49307
+	  | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions.  */
f49307
 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
f49307
@@ -316,12 +316,12 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
f49307
 DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
f49307
           m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
f49307
 	  | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS
f49307
-	  | m_TREMONT | m_GENERIC)
f49307
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
f49307
    for bit-manipulation instructions.  */
f49307
 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
f49307
-	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_GENERIC)
f49307
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
f49307
    on hardware capabilities. Bdver3 hardware has a loop buffer which makes
f49307
@@ -333,11 +333,11 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
f49307
    if-converted sequence to one.  */
f49307
 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
f49307
 	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
f49307
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
f49307
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence.  */
f49307
 DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
f49307
-	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
f49307
+	 m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
f49307
    generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
f49307
@@ -361,7 +361,8 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
f49307
 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
f49307
           ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
f49307
 	    | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
f49307
-	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC))
f49307
+	    | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
f49307
+	    | m_GENERIC))
f49307
 
f49307
 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
f49307
 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
f49307
@@ -370,7 +371,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
f49307
 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
f49307
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
f49307
 	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT
f49307
-	  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
f49307
+	  | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /*****************************************************************************/
f49307
 /* SSE instruction selection tuning                                          */
f49307
@@ -385,15 +386,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
f49307
    of a sequence loading registers by parts.  */
f49307
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
f49307
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
f49307
-	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
f49307
-	  | m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
f49307
+	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
f49307
+	  | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
f49307
    instead of a sequence loading registers by parts.  */
f49307
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
f49307
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
f49307
 	  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
f49307
-	  | m_TREMONT | m_BDVER | m_ZNVER | m_GENERIC)
f49307
+	  | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
f49307
    precision 128bit instructions instead of double where possible.   */
f49307
@@ -402,13 +403,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim
f49307
 
f49307
 /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores.   */
f49307
 DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
f49307
-	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_GENERIC)
f49307
+	  m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
f49307
    xorps/xorpd and other variants.  */
f49307
 DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
f49307
 	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
f49307
-	  | m_TREMONT | m_GENERIC)
f49307
+	  | m_TREMONT | m_ALDERLAKE | m_GENERIC)
f49307
 
f49307
 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
f49307
    to SSE registers.  If disabled, the moves will be done by storing
f49307
@@ -454,11 +455,12 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
f49307
 
f49307
 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes.  */
f49307
 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
f49307
-	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL)
f49307
+	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
f49307
+	  | m_INTEL)
f49307
 
f49307
 /* X86_TUNE_USE_GATHER: Use gather instructions.  */
f49307
 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
f49307
-	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC))
f49307
+	  ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC | m_ALDERLAKE))
f49307
 
f49307
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
f49307
    smaller FMA chain.  */
f49307
-- 
f49307
2.17.1
f49307