47387d
From 300c6315d2e644ae81b43fa2dd7bbf68b3afb5b2 Mon Sep 17 00:00:00 2001
47387d
From: Daiki Ueno <ueno@gnu.org>
47387d
Date: Thu, 18 Nov 2021 19:02:03 +0100
47387d
Subject: [PATCH 1/2] accelerated: fix CPU feature detection for Intel CPUs
47387d
47387d
This fixes read_cpuid_vals to correctly read the CPUID quadruple, as
47387d
well as to set the bit the ustream CRYPTOGAMS uses to identify Intel
47387d
CPUs.
47387d
47387d
Suggested by Rafael Gieschke in:
47387d
https://gitlab.com/gnutls/gnutls/-/issues/1282
47387d
47387d
Signed-off-by: Daiki Ueno <ueno@gnu.org>
47387d
---
47387d
 lib/accelerated/x86/x86-common.c | 91 +++++++++++++++++++++++++-------
47387d
 1 file changed, 71 insertions(+), 20 deletions(-)
47387d
47387d
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
47387d
index 3845c6b4c9..cf615ef24f 100644
47387d
--- a/lib/accelerated/x86/x86-common.c
47387d
+++ b/lib/accelerated/x86/x86-common.c
47387d
@@ -81,15 +81,38 @@ unsigned int _gnutls_x86_cpuid_s[4];
47387d
 # define bit_AVX 0x10000000
47387d
 #endif
47387d
 
47387d
-#ifndef OSXSAVE_MASK
47387d
-/* OSXSAVE|FMA|MOVBE */
47387d
-# define OSXSAVE_MASK (0x8000000|0x1000|0x400000)
47387d
+#ifndef bit_AVX2
47387d
+# define bit_AVX2 0x00000020
47387d
+#endif
47387d
+
47387d
+#ifndef bit_AVX512F
47387d
+# define bit_AVX512F 0x00010000
47387d
+#endif
47387d
+
47387d
+#ifndef bit_AVX512IFMA
47387d
+# define bit_AVX512IFMA 0x00200000
47387d
+#endif
47387d
+
47387d
+#ifndef bit_AVX512BW
47387d
+# define bit_AVX512BW 0x40000000
47387d
+#endif
47387d
+
47387d
+#ifndef bit_AVX512VL
47387d
+# define bit_AVX512VL 0x80000000
47387d
+#endif
47387d
+
47387d
+#ifndef bit_OSXSAVE
47387d
+# define bit_OSXSAVE 0x8000000
47387d
 #endif
47387d
 
47387d
 #ifndef bit_MOVBE
47387d
 # define bit_MOVBE 0x00400000
47387d
 #endif
47387d
 
47387d
+#ifndef OSXSAVE_MASK
47387d
+# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
47387d
+#endif
47387d
+
47387d
 #define via_bit_PADLOCK (0x3 << 6)
47387d
 #define via_bit_PADLOCK_PHE (0x3 << 10)
47387d
 #define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25)
47387d
@@ -127,7 +150,7 @@ static unsigned read_cpuid_vals(unsigned int vals[4])
47387d
 	unsigned t1, t2, t3;
47387d
 	vals[0] = vals[1] = vals[2] = vals[3] = 0;
47387d
 
47387d
-	if (!__get_cpuid(1, &t1, &vals[0], &vals[1], &t2))
47387d
+	if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0]))
47387d
 		return 0;
47387d
 	/* suppress AVX512; it works conditionally on certain CPUs on the original code */
47387d
 	vals[1] &= 0xfffff7ff;
47387d
@@ -145,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
47387d
 {
47387d
 	uint32_t xcr0;
47387d
 
47387d
-	if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
47387d
+	if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
47387d
 		return 0;
47387d
 
47387d
 #if defined(_MSC_VER) && !defined(__clang__)
47387d
@@ -233,10 +256,7 @@ static unsigned check_sha(void)
47387d
 #ifdef ASM_X86_64
47387d
 static unsigned check_avx_movbe(void)
47387d
 {
47387d
-	if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
47387d
-		return 0;
47387d
-
47387d
-	return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
47387d
+	return (_gnutls_x86_cpuid_s[1] & bit_AVX);
47387d
 }
47387d
 
47387d
 static unsigned check_pclmul(void)
47387d
@@ -514,33 +534,47 @@ void register_x86_padlock_crypto(unsigned capabilities)
47387d
 }
47387d
 #endif
47387d
 
47387d
-static unsigned check_intel_or_amd(void)
47387d
+enum x86_cpu_vendor {
47387d
+	X86_CPU_VENDOR_OTHER,
47387d
+	X86_CPU_VENDOR_INTEL,
47387d
+	X86_CPU_VENDOR_AMD,
47387d
+};
47387d
+
47387d
+static enum x86_cpu_vendor check_x86_cpu_vendor(void)
47387d
 {
47387d
 	unsigned int a, b, c, d;
47387d
 
47387d
-	if (!__get_cpuid(0, &a, &b, &c, &d))
47387d
-		return 0;
47387d
+	if (!__get_cpuid(0, &a, &b, &c, &d)) {
47387d
+		return X86_CPU_VENDOR_OTHER;
47387d
+	}
47387d
 
47387d
-	if ((memcmp(&b, "Genu", 4) == 0 &&
47387d
-	     memcmp(&d, "ineI", 4) == 0 &&
47387d
-	     memcmp(&c, "ntel", 4) == 0) ||
47387d
-	    (memcmp(&b, "Auth", 4) == 0 &&
47387d
-	     memcmp(&d, "enti", 4) == 0 && memcmp(&c, "cAMD", 4) == 0)) {
47387d
-		return 1;
47387d
+	if (memcmp(&b, "Genu", 4) == 0 &&
47387d
+	    memcmp(&d, "ineI", 4) == 0 &&
47387d
+	    memcmp(&c, "ntel", 4) == 0) {
47387d
+		return X86_CPU_VENDOR_INTEL;
47387d
 	}
47387d
 
47387d
-	return 0;
47387d
+	if (memcmp(&b, "Auth", 4) == 0 &&
47387d
+	    memcmp(&d, "enti", 4) == 0 &&
47387d
+	    memcmp(&c, "cAMD", 4) == 0) {
47387d
+		return X86_CPU_VENDOR_AMD;
47387d
+	}
47387d
+
47387d
+	return X86_CPU_VENDOR_OTHER;
47387d
 }
47387d
 
47387d
 static
47387d
 void register_x86_intel_crypto(unsigned capabilities)
47387d
 {
47387d
 	int ret;
47387d
+	enum x86_cpu_vendor vendor;
47387d
 
47387d
 	memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
47387d
 
47387d
-	if (check_intel_or_amd() == 0)
47387d
+	vendor = check_x86_cpu_vendor();
47387d
+	if (vendor == X86_CPU_VENDOR_OTHER) {
47387d
 		return;
47387d
+	}
47387d
 
47387d
 	if (capabilities == 0) {
47387d
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
47387d
@@ -549,6 +583,23 @@ void register_x86_intel_crypto(unsigned capabilities)
47387d
 		capabilities_to_intel_cpuid(capabilities);
47387d
 	}
47387d
 
47387d
+	/* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */
47387d
+	if (vendor == X86_CPU_VENDOR_INTEL) {
47387d
+		_gnutls_x86_cpuid_s[0] |= 1 << 30;
47387d
+	} else {
47387d
+		_gnutls_x86_cpuid_s[0] &= ~(1 << 30);
47387d
+	}
47387d
+
47387d
+	if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
47387d
+		_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
47387d
+
47387d
+		/* Clear AVX2 bits as well, according to what OpenSSL does.
47387d
+		 * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and
47387d
+		 * bit_AVX512CD? */
47387d
+		_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA|
47387d
+					    bit_AVX512BW|bit_AVX512BW);
47387d
+	}
47387d
+
47387d
 	if (check_ssse3()) {
47387d
 		_gnutls_debug_log("Intel SSSE3 was detected\n");
47387d
 
47387d
-- 
47387d
2.37.3
47387d
47387d
47387d
From cd509dac9e6d1bf76fd12c72c1fd61f1708c254a Mon Sep 17 00:00:00 2001
47387d
From: Daiki Ueno <ueno@gnu.org>
47387d
Date: Mon, 15 Aug 2022 09:39:18 +0900
47387d
Subject: [PATCH 2/2] accelerated: clear AVX bits if it cannot be queried
47387d
 through XSAVE
47387d
MIME-Version: 1.0
47387d
Content-Type: text/plain; charset=UTF-8
47387d
Content-Transfer-Encoding: 8bit
47387d
47387d
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
47387d
Architectures Software Developer’s Manual".
47387d
47387d
GnuTLS previously only followed that algorithm when registering the
47387d
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
47387d
that the extension bits are propagated to _gnutls_x86_cpuid_s.
47387d
47387d
Signed-off-by: Daiki Ueno <ueno@gnu.org>
47387d
---
47387d
 lib/accelerated/x86/x86-common.c | 18 ++++++++++++++++--
47387d
 1 file changed, 16 insertions(+), 2 deletions(-)
47387d
47387d
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
47387d
index cf615ef24f..655d0c65f2 100644
47387d
--- a/lib/accelerated/x86/x86-common.c
47387d
+++ b/lib/accelerated/x86/x86-common.c
47387d
@@ -210,7 +210,8 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
47387d
 	}
47387d
 
47387d
 	if (capabilities & INTEL_AVX) {
47387d
-		if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
47387d
+		if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
47387d
+		    check_4th_gen_intel_features(a[1])) {
47387d
 			_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
47387d
 		} else {
47387d
 			_gnutls_debug_log
47387d
@@ -256,7 +257,7 @@ static unsigned check_sha(void)
47387d
 #ifdef ASM_X86_64
47387d
 static unsigned check_avx_movbe(void)
47387d
 {
47387d
-	return (_gnutls_x86_cpuid_s[1] & bit_AVX);
47387d
+	return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
47387d
 }
47387d
 
47387d
 static unsigned check_pclmul(void)
47387d
@@ -579,6 +580,19 @@ void register_x86_intel_crypto(unsigned capabilities)
47387d
 	if (capabilities == 0) {
47387d
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
47387d
 			return;
47387d
+		if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
47387d
+			_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
47387d
+
47387d
+			/* Clear AVX2 bits as well, according to what
47387d
+			 * OpenSSL does.  Should we clear
47387d
+			 * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
47387d
+			 * and bit_AVX512CD? */
47387d
+			_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
47387d
+						    bit_AVX512F|
47387d
+						    bit_AVX512IFMA|
47387d
+						    bit_AVX512BW|
47387d
+						    bit_AVX512BW);
47387d
+		}
47387d
 	} else {
47387d
 		capabilities_to_intel_cpuid(capabilities);
47387d
 	}
47387d
-- 
47387d
2.37.3
47387d