|
|
0a1913 |
From 300c6315d2e644ae81b43fa2dd7bbf68b3afb5b2 Mon Sep 17 00:00:00 2001
|
|
|
0a1913 |
From: Daiki Ueno <ueno@gnu.org>
|
|
|
0a1913 |
Date: Thu, 18 Nov 2021 19:02:03 +0100
|
|
|
0a1913 |
Subject: [PATCH 1/2] accelerated: fix CPU feature detection for Intel CPUs
|
|
|
0a1913 |
|
|
|
0a1913 |
This fixes read_cpuid_vals to correctly read the CPUID quadruple, as
|
|
|
0a1913 |
well as to set the bit the ustream CRYPTOGAMS uses to identify Intel
|
|
|
0a1913 |
CPUs.
|
|
|
0a1913 |
|
|
|
0a1913 |
Suggested by Rafael Gieschke in:
|
|
|
0a1913 |
https://gitlab.com/gnutls/gnutls/-/issues/1282
|
|
|
0a1913 |
|
|
|
0a1913 |
Signed-off-by: Daiki Ueno <ueno@gnu.org>
|
|
|
0a1913 |
---
|
|
|
0a1913 |
lib/accelerated/x86/x86-common.c | 91 +++++++++++++++++++++++++-------
|
|
|
0a1913 |
1 file changed, 71 insertions(+), 20 deletions(-)
|
|
|
0a1913 |
|
|
|
0a1913 |
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
index 3845c6b4c9..cf615ef24f 100644
|
|
|
0a1913 |
--- a/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
+++ b/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
@@ -81,15 +81,38 @@ unsigned int _gnutls_x86_cpuid_s[4];
|
|
|
0a1913 |
# define bit_AVX 0x10000000
|
|
|
0a1913 |
#endif
|
|
|
0a1913 |
|
|
|
0a1913 |
-#ifndef OSXSAVE_MASK
|
|
|
0a1913 |
-/* OSXSAVE|FMA|MOVBE */
|
|
|
0a1913 |
-# define OSXSAVE_MASK (0x8000000|0x1000|0x400000)
|
|
|
0a1913 |
+#ifndef bit_AVX2
|
|
|
0a1913 |
+# define bit_AVX2 0x00000020
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+#ifndef bit_AVX512F
|
|
|
0a1913 |
+# define bit_AVX512F 0x00010000
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+#ifndef bit_AVX512IFMA
|
|
|
0a1913 |
+# define bit_AVX512IFMA 0x00200000
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+#ifndef bit_AVX512BW
|
|
|
0a1913 |
+# define bit_AVX512BW 0x40000000
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+#ifndef bit_AVX512VL
|
|
|
0a1913 |
+# define bit_AVX512VL 0x80000000
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+#ifndef bit_OSXSAVE
|
|
|
0a1913 |
+# define bit_OSXSAVE 0x8000000
|
|
|
0a1913 |
#endif
|
|
|
0a1913 |
|
|
|
0a1913 |
#ifndef bit_MOVBE
|
|
|
0a1913 |
# define bit_MOVBE 0x00400000
|
|
|
0a1913 |
#endif
|
|
|
0a1913 |
|
|
|
0a1913 |
+#ifndef OSXSAVE_MASK
|
|
|
0a1913 |
+# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
|
|
|
0a1913 |
+#endif
|
|
|
0a1913 |
+
|
|
|
0a1913 |
#define via_bit_PADLOCK (0x3 << 6)
|
|
|
0a1913 |
#define via_bit_PADLOCK_PHE (0x3 << 10)
|
|
|
0a1913 |
#define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25)
|
|
|
0a1913 |
@@ -127,7 +150,7 @@ static unsigned read_cpuid_vals(unsigned int vals[4])
|
|
|
0a1913 |
unsigned t1, t2, t3;
|
|
|
0a1913 |
vals[0] = vals[1] = vals[2] = vals[3] = 0;
|
|
|
0a1913 |
|
|
|
0a1913 |
- if (!__get_cpuid(1, &t1, &vals[0], &vals[1], &t2))
|
|
|
0a1913 |
+ if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0]))
|
|
|
0a1913 |
return 0;
|
|
|
0a1913 |
/* suppress AVX512; it works conditionally on certain CPUs on the original code */
|
|
|
0a1913 |
vals[1] &= 0xfffff7ff;
|
|
|
0a1913 |
@@ -145,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
|
|
|
0a1913 |
{
|
|
|
0a1913 |
uint32_t xcr0;
|
|
|
0a1913 |
|
|
|
0a1913 |
- if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
|
|
|
0a1913 |
+ if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
|
|
|
0a1913 |
return 0;
|
|
|
0a1913 |
|
|
|
0a1913 |
#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
0a1913 |
@@ -233,10 +256,7 @@ static unsigned check_sha(void)
|
|
|
0a1913 |
#ifdef ASM_X86_64
|
|
|
0a1913 |
static unsigned check_avx_movbe(void)
|
|
|
0a1913 |
{
|
|
|
0a1913 |
- if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
|
|
|
0a1913 |
- return 0;
|
|
|
0a1913 |
-
|
|
|
0a1913 |
- return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
|
|
|
0a1913 |
+ return (_gnutls_x86_cpuid_s[1] & bit_AVX);
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
static unsigned check_pclmul(void)
|
|
|
0a1913 |
@@ -514,33 +534,47 @@ void register_x86_padlock_crypto(unsigned capabilities)
|
|
|
0a1913 |
}
|
|
|
0a1913 |
#endif
|
|
|
0a1913 |
|
|
|
0a1913 |
-static unsigned check_intel_or_amd(void)
|
|
|
0a1913 |
+enum x86_cpu_vendor {
|
|
|
0a1913 |
+ X86_CPU_VENDOR_OTHER,
|
|
|
0a1913 |
+ X86_CPU_VENDOR_INTEL,
|
|
|
0a1913 |
+ X86_CPU_VENDOR_AMD,
|
|
|
0a1913 |
+};
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+static enum x86_cpu_vendor check_x86_cpu_vendor(void)
|
|
|
0a1913 |
{
|
|
|
0a1913 |
unsigned int a, b, c, d;
|
|
|
0a1913 |
|
|
|
0a1913 |
- if (!__get_cpuid(0, &a, &b, &c, &d))
|
|
|
0a1913 |
- return 0;
|
|
|
0a1913 |
+ if (!__get_cpuid(0, &a, &b, &c, &d)) {
|
|
|
0a1913 |
+ return X86_CPU_VENDOR_OTHER;
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
|
|
|
0a1913 |
- if ((memcmp(&b, "Genu", 4) == 0 &&
|
|
|
0a1913 |
- memcmp(&d, "ineI", 4) == 0 &&
|
|
|
0a1913 |
- memcmp(&c, "ntel", 4) == 0) ||
|
|
|
0a1913 |
- (memcmp(&b, "Auth", 4) == 0 &&
|
|
|
0a1913 |
- memcmp(&d, "enti", 4) == 0 && memcmp(&c, "cAMD", 4) == 0)) {
|
|
|
0a1913 |
- return 1;
|
|
|
0a1913 |
+ if (memcmp(&b, "Genu", 4) == 0 &&
|
|
|
0a1913 |
+ memcmp(&d, "ineI", 4) == 0 &&
|
|
|
0a1913 |
+ memcmp(&c, "ntel", 4) == 0) {
|
|
|
0a1913 |
+ return X86_CPU_VENDOR_INTEL;
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
- return 0;
|
|
|
0a1913 |
+ if (memcmp(&b, "Auth", 4) == 0 &&
|
|
|
0a1913 |
+ memcmp(&d, "enti", 4) == 0 &&
|
|
|
0a1913 |
+ memcmp(&c, "cAMD", 4) == 0) {
|
|
|
0a1913 |
+ return X86_CPU_VENDOR_AMD;
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+ return X86_CPU_VENDOR_OTHER;
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
static
|
|
|
0a1913 |
void register_x86_intel_crypto(unsigned capabilities)
|
|
|
0a1913 |
{
|
|
|
0a1913 |
int ret;
|
|
|
0a1913 |
+ enum x86_cpu_vendor vendor;
|
|
|
0a1913 |
|
|
|
0a1913 |
memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
|
|
|
0a1913 |
|
|
|
0a1913 |
- if (check_intel_or_amd() == 0)
|
|
|
0a1913 |
+ vendor = check_x86_cpu_vendor();
|
|
|
0a1913 |
+ if (vendor == X86_CPU_VENDOR_OTHER) {
|
|
|
0a1913 |
return;
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
|
|
|
0a1913 |
if (capabilities == 0) {
|
|
|
0a1913 |
if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
|
|
|
0a1913 |
@@ -549,6 +583,23 @@ void register_x86_intel_crypto(unsigned capabilities)
|
|
|
0a1913 |
capabilities_to_intel_cpuid(capabilities);
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
+ /* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */
|
|
|
0a1913 |
+ if (vendor == X86_CPU_VENDOR_INTEL) {
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[0] |= 1 << 30;
|
|
|
0a1913 |
+ } else {
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[0] &= ~(1 << 30);
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+ /* Clear AVX2 bits as well, according to what OpenSSL does.
|
|
|
0a1913 |
+ * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and
|
|
|
0a1913 |
+ * bit_AVX512CD? */
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA|
|
|
|
0a1913 |
+ bit_AVX512BW|bit_AVX512BW);
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
+
|
|
|
0a1913 |
if (check_ssse3()) {
|
|
|
0a1913 |
_gnutls_debug_log("Intel SSSE3 was detected\n");
|
|
|
0a1913 |
|
|
|
0a1913 |
--
|
|
|
0a1913 |
2.37.3
|
|
|
0a1913 |
|
|
|
0a1913 |
|
|
|
0a1913 |
From cd509dac9e6d1bf76fd12c72c1fd61f1708c254a Mon Sep 17 00:00:00 2001
|
|
|
0a1913 |
From: Daiki Ueno <ueno@gnu.org>
|
|
|
0a1913 |
Date: Mon, 15 Aug 2022 09:39:18 +0900
|
|
|
0a1913 |
Subject: [PATCH 2/2] accelerated: clear AVX bits if it cannot be queried
|
|
|
0a1913 |
through XSAVE
|
|
|
0a1913 |
MIME-Version: 1.0
|
|
|
0a1913 |
Content-Type: text/plain; charset=UTF-8
|
|
|
0a1913 |
Content-Transfer-Encoding: 8bit
|
|
|
0a1913 |
|
|
|
0a1913 |
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
|
|
|
0a1913 |
Architectures Software Developer’s Manual".
|
|
|
0a1913 |
|
|
|
0a1913 |
GnuTLS previously only followed that algorithm when registering the
|
|
|
0a1913 |
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
|
|
|
0a1913 |
that the extension bits are propagated to _gnutls_x86_cpuid_s.
|
|
|
0a1913 |
|
|
|
0a1913 |
Signed-off-by: Daiki Ueno <ueno@gnu.org>
|
|
|
0a1913 |
---
|
|
|
0a1913 |
lib/accelerated/x86/x86-common.c | 18 ++++++++++++++++--
|
|
|
0a1913 |
1 file changed, 16 insertions(+), 2 deletions(-)
|
|
|
0a1913 |
|
|
|
0a1913 |
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
index cf615ef24f..655d0c65f2 100644
|
|
|
0a1913 |
--- a/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
+++ b/lib/accelerated/x86/x86-common.c
|
|
|
0a1913 |
@@ -210,7 +210,8 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
if (capabilities & INTEL_AVX) {
|
|
|
0a1913 |
- if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
|
|
|
0a1913 |
+ if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
|
|
|
0a1913 |
+ check_4th_gen_intel_features(a[1])) {
|
|
|
0a1913 |
_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
|
|
|
0a1913 |
} else {
|
|
|
0a1913 |
_gnutls_debug_log
|
|
|
0a1913 |
@@ -256,7 +257,7 @@ static unsigned check_sha(void)
|
|
|
0a1913 |
#ifdef ASM_X86_64
|
|
|
0a1913 |
static unsigned check_avx_movbe(void)
|
|
|
0a1913 |
{
|
|
|
0a1913 |
- return (_gnutls_x86_cpuid_s[1] & bit_AVX);
|
|
|
0a1913 |
+ return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
|
|
|
0a1913 |
}
|
|
|
0a1913 |
|
|
|
0a1913 |
static unsigned check_pclmul(void)
|
|
|
0a1913 |
@@ -579,6 +580,19 @@ void register_x86_intel_crypto(unsigned capabilities)
|
|
|
0a1913 |
if (capabilities == 0) {
|
|
|
0a1913 |
if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
|
|
|
0a1913 |
return;
|
|
|
0a1913 |
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
|
|
|
0a1913 |
+
|
|
|
0a1913 |
+ /* Clear AVX2 bits as well, according to what
|
|
|
0a1913 |
+ * OpenSSL does. Should we clear
|
|
|
0a1913 |
+ * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
|
|
|
0a1913 |
+ * and bit_AVX512CD? */
|
|
|
0a1913 |
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
|
|
|
0a1913 |
+ bit_AVX512F|
|
|
|
0a1913 |
+ bit_AVX512IFMA|
|
|
|
0a1913 |
+ bit_AVX512BW|
|
|
|
0a1913 |
+ bit_AVX512BW);
|
|
|
0a1913 |
+ }
|
|
|
0a1913 |
} else {
|
|
|
0a1913 |
capabilities_to_intel_cpuid(capabilities);
|
|
|
0a1913 |
}
|
|
|
0a1913 |
--
|
|
|
0a1913 |
2.37.3
|
|
|
0a1913 |
|