Tree - rpms/devtoolset-11-gcc - CentOS Git server

rpms / devtoolset-11-gcc

Blame SOURCES/gcc11-mtune-alderlake.patch

Blob History Raw

		a46658	`From 54ccc52ba3f842cd94718967465a6015a752ca47 Mon Sep 17 00:00:00 2001`
		a46658	`From: "Cui,Lili" <lili.cui@intel.com>`
		a46658	`Date: Thu, 4 Nov 2021 10:38:56 +0800`
		a46658	`Subject: [PATCH] x86: Update -mtune=alderlake`
		a46658	`MIME-Version: 1.0`
		a46658	`Content-Type: text/plain; charset=UTF-8`
		a46658	`Content-Transfer-Encoding: 8bit`
		a46658
		a46658	`Update mtune for alderlake, Alder Lake Intel Hybrid Technology will not support`
		a46658	`Intel® AVX-512. ISA features such as Intel® AVX, AVX-VNNI, Intel® AVX2, and`
		a46658	`UMONITOR/UMWAIT/TPAUSE are supported.`
		a46658
		a46658	`gcc/ChangeLog`
		a46658
		a46658	`* config/i386/i386-options.c (m_CORE_AVX2): Remove Alderlake`
		a46658	`from m_CORE_AVX2.`
		a46658	`(processor_cost_table): Use alderlake_cost for Alderlake.`
		a46658	`* config/i386/i386.c (ix86_sched_init_global): Handle Alderlake.`
		a46658	`* config/i386/x86-tune-costs.h (struct processor_costs): Add alderlake`
		a46658	`cost.`
		a46658	`* config/i386/x86-tune-sched.c (ix86_issue_rate): Change Alderlake`
		a46658	`issue rate to 4.`
		a46658	`(ix86_adjust_cost): Handle Alderlake.`
		a46658	`* config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Enable for Alderlake.`
		a46658	`(X86_TUNE_PARTIAL_REG_DEPENDENCY): Likewise.`
		a46658	`(X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Likewise.`
		a46658	`(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Likewise.`
		a46658	`(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise.`
		a46658	`(X86_TUNE_MEMORY_MISMATCH_STALL): Likewise.`
		a46658	`(X86_TUNE_USE_LEAVE): Likewise.`
		a46658	`(X86_TUNE_PUSH_MEMORY): Likewise.`
		a46658	`(X86_TUNE_USE_INCDEC): Likewise.`
		a46658	`(X86_TUNE_INTEGER_DFMODE_MOVES): Likewise.`
		a46658	`(X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Likewise.`
		a46658	`(X86_TUNE_USE_SAHF): Likewise.`
		a46658	`(X86_TUNE_USE_BT): Likewise.`
		a46658	`(X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Likewise.`
		a46658	`(X86_TUNE_ONE_IF_CONV_INSN): Likewise.`
		a46658	`(X86_TUNE_AVOID_MFENCE): Likewise.`
		a46658	`(X86_TUNE_USE_SIMODE_FIOP): Likewise.`
		a46658	`(X86_TUNE_EXT_80387_CONSTANTS): Likewise.`
		a46658	`(X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Likewise.`
		a46658	`(X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Likewise.`
		a46658	`(X86_TUNE_SSE_TYPELESS_STORES): Likewise.`
		a46658	`(X86_TUNE_SSE_LOAD0_BY_PXOR): Likewise.`
		a46658	`(X86_TUNE_AVOID_4BYTE_PREFIXES): Likewise.`
		a46658	`(X86_TUNE_USE_GATHER): Disable for Alderlake.`
		a46658	`---`
		a46658	`gcc/config/i386/i386-options.c \| 4 +-`
		a46658	`gcc/config/i386/i386.c \| 1 +`
		a46658	`gcc/config/i386/x86-tune-costs.h \| 120 +++++++++++++++++++++++++++++++`
		a46658	`gcc/config/i386/x86-tune-sched.c \| 2 +`
		a46658	`gcc/config/i386/x86-tune.def \| 58 +++++++--------`
		a46658	`5 files changed, 155 insertions(+), 30 deletions(-)`
		a46658
		a46658	`diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c`
		a46658	`index e7a3bd4aaea..a8cc0664f11 100644`
		a46658	`--- a/gcc/config/i386/i386-options.c`
		a46658	`+++ b/gcc/config/i386/i386-options.c`
		a46658	`@@ -131,7 +131,7 @@ along with GCC; see the file COPYING3. If not see`
		a46658	`\| m_ICELAKE_CLIENT \| m_ICELAKE_SERVER \| m_CASCADELAKE \`
		a46658	`\| m_TIGERLAKE \| m_COOPERLAKE \| m_SAPPHIRERAPIDS \`
		a46658	`\| m_ROCKETLAKE)`
		a46658	`-#define m_CORE_AVX2 (m_HASWELL \| m_SKYLAKE \| m_ALDERLAKE \| m_CORE_AVX512)`
		a46658	`+#define m_CORE_AVX2 (m_HASWELL \| m_SKYLAKE \| m_CORE_AVX512)`
		a46658	`#define m_CORE_ALL (m_CORE2 \| m_NEHALEM \| m_SANDYBRIDGE \| m_CORE_AVX2)`
		a46658	`#define m_GOLDMONT (HOST_WIDE_INT_1U<`
		a46658	`#define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<`
		a46658	`@@ -736,7 +736,7 @@ static const struct processor_costs *processor_cost_table[] =`
		a46658	`&icelake_cost,`
		a46658	`&skylake_cost,`
		a46658	`&icelake_cost,`
		a46658	`- &icelake_cost,`
		a46658	`+ &alderlake_cost,`
		a46658	`&icelake_cost,`
		a46658	`&intel_cost,`
		a46658	`&geode_cost,`
		a46658	`diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c`
		a46658	`index e94efdf39fb..73c4d5115bb 100644`
		a46658	`--- a/gcc/config/i386/i386.c`
		a46658	`+++ b/gcc/config/i386/i386.c`
		a46658	`@@ -17014,6 +17014,7 @@ ix86_sched_init_global (FILE *, int, int)`
		a46658	`case PROCESSOR_SANDYBRIDGE:`
		a46658	`case PROCESSOR_HASWELL:`
		a46658	`case PROCESSOR_TREMONT:`
		a46658	`+ case PROCESSOR_ALDERLAKE:`
		a46658	`case PROCESSOR_GENERIC:`
		a46658	`/* Do not perform multipass scheduling for pre-reload schedule`
		a46658	`to save compile time. */`
		a46658	`diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h`
		a46658	`index 93644be9cb3..dd5563d2e64 100644`
		a46658	`--- a/gcc/config/i386/x86-tune-costs.h`
		a46658	`+++ b/gcc/config/i386/x86-tune-costs.h`
		a46658	`@@ -2070,6 +2070,126 @@ struct processor_costs icelake_cost = {`
		a46658	`"16", /* Func alignment. */`
		a46658	`};`
		a46658
		a46658	`+/* alderlake_cost should produce code tuned for alderlake family of CPUs. */`
		a46658	`+static stringop_algs alderlake_memcpy[2] = {`
		a46658	`+ {libcall,`
		a46658	`+ {{256, rep_prefix_1_byte, true},`
		a46658	`+ {256, loop, false},`
		a46658	`+ {-1, libcall, false}}},`
		a46658	`+ {libcall,`
		a46658	`+ {{256, rep_prefix_1_byte, true},`
		a46658	`+ {256, loop, false},`
		a46658	`+ {-1, libcall, false}}}};`
		a46658	`+static stringop_algs alderlake_memset[2] = {`
		a46658	`+ {libcall,`
		a46658	`+ {{256, rep_prefix_1_byte, true},`
		a46658	`+ {256, loop, false},`
		a46658	`+ {-1, libcall, false}}},`
		a46658	`+ {libcall,`
		a46658	`+ {{256, rep_prefix_1_byte, true},`
		a46658	`+ {256, loop, false},`
		a46658	`+ {-1, libcall, false}}}};`
		a46658	`+static const`
		a46658	`+struct processor_costs alderlake_cost = {`
		a46658	`+ {`
		a46658	`+ /* Start of register allocator costs. integer->integer move cost is 2. */`
		a46658	`+ 6, /* cost for loading QImode using movzbl */`
		a46658	`+ {6, 6, 6}, /* cost of loading integer registers`
		a46658	`+ in QImode, HImode and SImode.`
		a46658	`+ Relative to reg-reg move (2). */`
		a46658	`+ {6, 6, 6}, /* cost of storing integer registers */`
		a46658	`+ 4, /* cost of reg,reg fld/fst */`
		a46658	`+ {6, 6, 12}, /* cost of loading fp registers`
		a46658	`+ in SFmode, DFmode and XFmode */`
		a46658	`+ {6, 6, 12}, /* cost of storing fp registers`
		a46658	`+ in SFmode, DFmode and XFmode */`
		a46658	`+ 2, /* cost of moving MMX register */`
		a46658	`+ {6, 6}, /* cost of loading MMX registers`
		a46658	`+ in SImode and DImode */`
		a46658	`+ {6, 6}, /* cost of storing MMX registers`
		a46658	`+ in SImode and DImode */`
		a46658	`+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of loading SSE registers`
		a46658	`+ in 32,64,128,256 and 512-bit */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of storing SSE registers`
		a46658	`+ in 32,64,128,256 and 512-bit */`
		a46658	`+ 6, 6, /* SSE->integer and integer->SSE moves */`
		a46658	`+ 6, 6, /* mask->integer and integer->mask moves */`
		a46658	`+ {6, 6, 6}, /* cost of loading mask register`
		a46658	`+ in QImode, HImode, SImode. */`
		a46658	`+ {6, 6, 6}, /* cost if storing mask register`
		a46658	`+ in QImode, HImode, SImode. */`
		a46658	`+ 2, /* cost of moving mask register. */`
		a46658	`+ /* End of register allocator costs. */`
		a46658	`+ },`
		a46658	`+`
		a46658	`+ COSTS_N_INSNS (1), /* cost of an add instruction */`
		a46658	`+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */`
		a46658	`+ COSTS_N_INSNS (1), /* variable shift costs */`
		a46658	`+ COSTS_N_INSNS (1), /* constant shift costs */`
		a46658	`+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */`
		a46658	`+ COSTS_N_INSNS (4), /* HI */`
		a46658	`+ COSTS_N_INSNS (3), /* SI */`
		a46658	`+ COSTS_N_INSNS (4), /* DI */`
		a46658	`+ COSTS_N_INSNS (4)}, /* other */`
		a46658	`+ 0, /* cost of multiply per each bit set */`
		a46658	`+ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */`
		a46658	`+ COSTS_N_INSNS (22), /* HI */`
		a46658	`+ COSTS_N_INSNS (30), /* SI */`
		a46658	`+ COSTS_N_INSNS (74), /* DI */`
		a46658	`+ COSTS_N_INSNS (74)}, /* other */`
		a46658	`+ COSTS_N_INSNS (1), /* cost of movsx */`
		a46658	`+ COSTS_N_INSNS (1), /* cost of movzx */`
		a46658	`+ 8, /* "large" insn */`
		a46658	`+ 17, /* MOVE_RATIO */`
		a46658	`+ 17, /* CLEAR_RATIO */`
		a46658	`+ {6, 6, 6}, /* cost of loading integer registers`
		a46658	`+ in QImode, HImode and SImode.`
		a46658	`+ Relative to reg-reg move (2). */`
		a46658	`+ {6, 6, 6}, /* cost of storing integer registers */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of loading SSE register`
		a46658	`+ in 32bit, 64bit, 128bit, 256bit and 512bit */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of storing SSE register`
		a46658	`+ in 32bit, 64bit, 128bit, 256bit and 512bit */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of unaligned loads. */`
		a46658	`+ {6, 6, 6, 10, 15}, /* cost of unaligned storess. */`
		a46658	`+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */`
		a46658	`+ 6, /* cost of moving SSE register to integer. */`
		a46658	`+ 18, 6, /* Gather load static, per_elt. */`
		a46658	`+ 18, 6, /* Gather store static, per_elt. */`
		a46658	`+ 32, /* size of l1 cache. */`
		a46658	`+ 512, /* size of l2 cache. */`
		a46658	`+ 64, /* size of prefetch block */`
		a46658	`+ 6, /* number of parallel prefetches */`
		a46658	`+ 3, /* Branch cost */`
		a46658	`+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */`
		a46658	`+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */`
		a46658	`+ COSTS_N_INSNS (17), /* cost of FDIV instruction. */`
		a46658	`+ COSTS_N_INSNS (1), /* cost of FABS instruction. */`
		a46658	`+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */`
		a46658	`+ COSTS_N_INSNS (14), /* cost of FSQRT instruction. */`
		a46658	`+`
		a46658	`+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */`
		a46658	`+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */`
		a46658	`+ COSTS_N_INSNS (4), /* cost of MULSS instruction. */`
		a46658	`+ COSTS_N_INSNS (5), /* cost of MULSD instruction. */`
		a46658	`+ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */`
		a46658	`+ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */`
		a46658	`+ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */`
		a46658	`+ COSTS_N_INSNS (17), /* cost of DIVSD instruction. */`
		a46658	`+ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */`
		a46658	`+ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */`
		a46658	`+ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */`
		a46658	`+ alderlake_memcpy,`
		a46658	`+ alderlake_memset,`
		a46658	`+ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */`
		a46658	`+ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */`
		a46658	`+ "16:11:8", /* Loop alignment. */`
		a46658	`+ "16:11:8", /* Jump alignment. */`
		a46658	`+ "0:0:8", /* Label alignment. */`
		a46658	`+ "16", /* Func alignment. */`
		a46658	`+};`
		a46658	`+`
		a46658	`/* BTVER1 has optimized REP instruction for medium sized blocks, but for`
		a46658	`very small blocks it is better to use loop. For large blocks, libcall can`
		a46658	`do nontemporary accesses and beat inline considerably. */`
		a46658	`diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c`
		a46658	`index 56ada99a450..0c149a09531 100644`
		a46658	`--- a/gcc/config/i386/x86-tune-sched.c`
		a46658	`+++ b/gcc/config/i386/x86-tune-sched.c`
		a46658	`@@ -72,6 +72,7 @@ ix86_issue_rate (void)`
		a46658	`case PROCESSOR_SANDYBRIDGE:`
		a46658	`case PROCESSOR_HASWELL:`
		a46658	`case PROCESSOR_TREMONT:`
		a46658	`+ case PROCESSOR_ALDERLAKE:`
		a46658	`case PROCESSOR_GENERIC:`
		a46658	`return 4;`
		a46658
		a46658	`@@ -431,6 +432,7 @@ ix86_adjust_cost (rtx_insn insn, int dep_type, rtx_insn dep_insn, int cost,`
		a46658	`case PROCESSOR_SANDYBRIDGE:`
		a46658	`case PROCESSOR_HASWELL:`
		a46658	`case PROCESSOR_TREMONT:`
		a46658	`+ case PROCESSOR_ALDERLAKE:`
		a46658	`case PROCESSOR_GENERIC:`
		a46658	`/* Stack engine allows to execute push&pop instructions in parall. */`
		a46658	`if ((insn_type == TYPE_PUSH \|\| insn_type == TYPE_POP)`
		a46658	`diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def`
		a46658	`index 58e8ead56b4..4ae0b569841 100644`
		a46658	`--- a/gcc/config/i386/x86-tune.def`
		a46658	`+++ b/gcc/config/i386/x86-tune.def`
		a46658	`@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`
		a46658	`DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",`
		a46658	`m_PENT \| m_LAKEMONT \| m_PPRO \| m_CORE_ALL \| m_BONNELL \| m_SILVERMONT`
		a46658	`\| m_INTEL \| m_KNL \| m_KNM \| m_K6_GEODE \| m_AMD_MULTIPLE \| m_GOLDMONT`
		a46658	`- \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE \|m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming`
		a46658	`on modern chips. Preffer stores affecting whole integer register`
		a46658	`@@ -51,7 +51,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",`
		a46658	`DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",`
		a46658	`m_P4_NOCONA \| m_CORE2 \| m_NEHALEM \| m_SANDYBRIDGE \| m_CORE_AVX2`
		a46658	`\| m_BONNELL \| m_SILVERMONT \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_INTEL`
		a46658	`- \| m_KNL \| m_KNM \| m_AMD_MULTIPLE \| m_TREMONT`
		a46658	`+ \| m_KNL \| m_KNM \| m_AMD_MULTIPLE \| m_TREMONT \| m_ALDERLAKE`
		a46658	`\| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store`
		a46658	`@@ -62,7 +62,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",`
		a46658	`that can be partly masked by careful scheduling of moves. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_AMDFAM10`
		a46658	`- \| m_BDVER \| m_ZNVER \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_BDVER \| m_ZNVER \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids`
		a46658	`partial write to the destination in scalar SSE conversion from FP`
		a46658	`@@ -70,14 +70,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",`
		a46658	`DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,`
		a46658	`"sse_partial_reg_fp_converts_dependency",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_AMDFAM10`
		a46658	`- \| m_BDVER \| m_ZNVER \| m_GENERIC)`
		a46658	`+ \| m_BDVER \| m_ZNVER \| m_ALDERLAKE\| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial`
		a46658	`write to the destination in scalar SSE conversion from integer to FP. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,`
		a46658	`"sse_partial_reg_converts_dependency",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_AMDFAM10`
		a46658	`- \| m_BDVER \| m_ZNVER \| m_GENERIC)`
		a46658	`+ \| m_BDVER \| m_ZNVER \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies`
		a46658	`are resolved on SSE register parts instead of whole registers, so we may`
		a46658	`@@ -103,14 +103,14 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE2 \| m_NEHALEM \| m_SANDYBRIDGE`
		a46658	`\| m_BONNELL \| m_SILVERMONT \| m_GOLDMONT \| m_KNL \| m_KNM \| m_INTEL`
		a46658	`\| m_GOLDMONT_PLUS \| m_GEODE \| m_AMD_MULTIPLE`
		a46658	`- \| m_CORE_AVX2 \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_CORE_AVX2 \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by`
		a46658	`full sized loads. */`
		a46658	`DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",`
		a46658	`m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_SILVERMONT \| m_INTEL`
		a46658	`\| m_KNL \| m_KNM \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_AMD_MULTIPLE`
		a46658	`- \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent`
		a46658	`conditional jump instruction for 32 bit TARGET. */`
		a46658	`@@ -166,14 +166,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",`
		a46658	`/* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */`
		a46658	`DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",`
		a46658	`m_386 \| m_CORE_ALL \| m_K6_GEODE \| m_AMD_MULTIPLE \| m_TREMONT`
		a46658	`- \| m_GENERIC)`
		a46658	`+ \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.`
		a46658	`Some chips, like 486 and Pentium works faster with separate load`
		a46658	`and push instructions. */`
		a46658	`DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",`
		a46658	`m_386 \| m_P4_NOCONA \| m_CORE_ALL \| m_K6_GEODE \| m_AMD_MULTIPLE`
		a46658	`- \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred`
		a46658	`over esp subtraction. */`
		a46658	`@@ -243,14 +243,14 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT \| m_LAKEMONT \| m_PPRO))`
		a46658	`DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",`
		a46658	`~(m_P4_NOCONA \| m_CORE2 \| m_NEHALEM \| m_SANDYBRIDGE`
		a46658	`\| m_BONNELL \| m_SILVERMONT \| m_INTEL \| m_KNL \| m_KNM \| m_GOLDMONT`
		a46658	`- \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC))`
		a46658	`+ \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC))`
		a46658
		a46658	`/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred`
		a46658	`for DFmode copies */`
		a46658	`DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",`
		a46658	`~(m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_SILVERMONT`
		a46658	`\| m_KNL \| m_KNM \| m_INTEL \| m_GEODE \| m_AMD_MULTIPLE \| m_GOLDMONT`
		a46658	`- \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC))`
		a46658	`+ \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC))`
		a46658
		a46658	`/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag`
		a46658	`will impact LEA instruction selection. */`
		a46658	`@@ -298,14 +298,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,`
		a46658	`DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,`
		a46658	`"misaligned_move_string_pro_epilogues",`
		a46658	`m_386 \| m_486 \| m_CORE_ALL \| m_AMD_MULTIPLE \| m_TREMONT`
		a46658	`- \| m_GENERIC)`
		a46658	`+ \| m_ALDERLAKE \|m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_USE_SAHF: Controls use of SAHF. */`
		a46658	`DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_SILVERMONT`
		a46658	`\| m_KNL \| m_KNM \| m_INTEL \| m_K6_GEODE \| m_K8 \| m_AMDFAM10 \| m_BDVER`
		a46658	`\| m_BTVER \| m_ZNVER \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT`
		a46658	`- \| m_GENERIC)`
		a46658	`+ \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */`
		a46658	`DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",`
		a46658	`@@ -316,12 +316,12 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",`
		a46658	`DEF_TUNE (X86_TUNE_USE_BT, "use_bt",`
		a46658	`m_CORE_ALL \| m_BONNELL \| m_SILVERMONT \| m_KNL \| m_KNM \| m_INTEL`
		a46658	`\| m_LAKEMONT \| m_AMD_MULTIPLE \| m_GOLDMONT \| m_GOLDMONT_PLUS`
		a46658	`- \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency`
		a46658	`for bit-manipulation instructions. */`
		a46658	`DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",`
		a46658	`- m_SANDYBRIDGE \| m_CORE_AVX2 \| m_TREMONT \| m_GENERIC)`
		a46658	`+ m_SANDYBRIDGE \| m_CORE_AVX2 \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based`
		a46658	`on hardware capabilities. Bdver3 hardware has a loop buffer which makes`
		a46658	`@@ -333,11 +333,11 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 \| m_BDVER4)`
		a46658	`if-converted sequence to one. */`
		a46658	`DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",`
		a46658	`m_SILVERMONT \| m_KNL \| m_KNM \| m_INTEL \| m_CORE_ALL \| m_GOLDMONT`
		a46658	`- \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */`
		a46658	`DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",`
		a46658	`- m_CORE_ALL \| m_BDVER \| m_ZNVER \| m_TREMONT \| m_GENERIC)`
		a46658	`+ m_CORE_ALL \| m_BDVER \| m_ZNVER \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by`
		a46658	`generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -`
		a46658	`@@ -361,7 +361,8 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",`
		a46658	`DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",`
		a46658	`~(m_PENT \| m_LAKEMONT \| m_PPRO \| m_CORE_ALL \| m_BONNELL`
		a46658	`\| m_SILVERMONT \| m_KNL \| m_KNM \| m_INTEL \| m_AMD_MULTIPLE`
		a46658	`- \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC))`
		a46658	`+ \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE`
		a46658	`+ \| m_GENERIC))`
		a46658
		a46658	`/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */`
		a46658	`DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)`
		a46658	`@@ -370,7 +371,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)`
		a46658	`DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BONNELL \| m_SILVERMONT`
		a46658	`\| m_KNL \| m_KNM \| m_INTEL \| m_K6_GEODE \| m_ATHLON_K8 \| m_GOLDMONT`
		a46658	`- \| m_GOLDMONT_PLUS \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/*****************************************************************************/`
		a46658	`/* SSE instruction selection tuning */`
		a46658	`@@ -385,15 +386,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",`
		a46658	`of a sequence loading registers by parts. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",`
		a46658	`m_NEHALEM \| m_SANDYBRIDGE \| m_CORE_AVX2 \| m_SILVERMONT \| m_KNL \| m_KNM`
		a46658	`- \| m_INTEL \| m_GOLDMONT \| m_GOLDMONT_PLUS`
		a46658	`- \| m_TREMONT \| m_AMDFAM10 \| m_BDVER \| m_BTVER \| m_ZNVER \| m_GENERIC)`
		a46658	`+ \| m_INTEL \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE`
		a46658	`+ \| m_AMDFAM10 \| m_BDVER \| m_BTVER \| m_ZNVER \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores`
		a46658	`instead of a sequence loading registers by parts. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",`
		a46658	`m_NEHALEM \| m_SANDYBRIDGE \| m_CORE_AVX2 \| m_SILVERMONT \| m_KNL \| m_KNM`
		a46658	`\| m_INTEL \| m_GOLDMONT \| m_GOLDMONT_PLUS`
		a46658	`- \| m_TREMONT \| m_BDVER \| m_ZNVER \| m_GENERIC)`
		a46658	`+ \| m_TREMONT \| m_ALDERLAKE \| m_BDVER \| m_ZNVER \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single`
		a46658	`precision 128bit instructions instead of double where possible. */`
		a46658	`@@ -402,13 +403,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim`
		a46658
		a46658	`/* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",`
		a46658	`- m_AMD_MULTIPLE \| m_CORE_ALL \| m_TREMONT \| m_GENERIC)`
		a46658	`+ m_AMD_MULTIPLE \| m_CORE_ALL \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to`
		a46658	`xorps/xorpd and other variants. */`
		a46658	`DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",`
		a46658	`m_PPRO \| m_P4_NOCONA \| m_CORE_ALL \| m_BDVER \| m_BTVER \| m_ZNVER`
		a46658	`- \| m_TREMONT \| m_GENERIC)`
		a46658	`+ \| m_TREMONT \| m_ALDERLAKE \| m_GENERIC)`
		a46658
		a46658	`/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer`
		a46658	`to SSE registers. If disabled, the moves will be done by storing`
		a46658	`@@ -454,11 +455,12 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",`
		a46658
		a46658	`/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */`
		a46658	`DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",`
		a46658	`- m_SILVERMONT \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT \| m_INTEL)`
		a46658	`+ m_SILVERMONT \| m_GOLDMONT \| m_GOLDMONT_PLUS \| m_TREMONT \| m_ALDERLAKE`
		a46658	`+ \| m_INTEL)`
		a46658
		a46658	`/* X86_TUNE_USE_GATHER: Use gather instructions. */`
		a46658	`DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",`
		a46658	`- ~(m_ZNVER1 \| m_ZNVER2 \| m_GENERIC))`
		a46658	`+ ~(m_ZNVER1 \| m_ZNVER2 \| m_GENERIC \| m_ALDERLAKE))`
		a46658
		a46658	`/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or`
		a46658	`smaller FMA chain. */`
		a46658	`--`
		a46658	`2.17.1`
		a46658

rpms / devtoolset-11-gcc

Source Code

Blame SOURCES/gcc11-mtune-alderlake.patch