e79d4b
From 8ff391fa011e02c88b0d099061ca62e88ab68011 Mon Sep 17 00:00:00 2001
e79d4b
From: Daiki Ueno <ueno@gnu.org>
e79d4b
Date: Mon, 15 Aug 2022 09:39:18 +0900
e79d4b
Subject: [PATCH] accelerated: clear AVX bits if it cannot be queried through
e79d4b
 XSAVE
e79d4b
MIME-Version: 1.0
e79d4b
Content-Type: text/plain; charset=UTF-8
e79d4b
Content-Transfer-Encoding: 8bit
e79d4b
e79d4b
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
e79d4b
Architectures Software Developer’s Manual".
e79d4b
e79d4b
GnuTLS previously only followed that algorithm when registering the
e79d4b
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
e79d4b
that the extension bits are propagated to _gnutls_x86_cpuid_s.
e79d4b
e79d4b
Signed-off-by: Daiki Ueno <ueno@gnu.org>
e79d4b
---
e79d4b
 lib/accelerated/x86/x86-common.c | 49 +++++++++++++++++++++++++-------
e79d4b
 1 file changed, 38 insertions(+), 11 deletions(-)
e79d4b
e79d4b
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
e79d4b
index 7ddaa594e6..b7a88ddeca 100644
e79d4b
--- a/lib/accelerated/x86/x86-common.c
e79d4b
+++ b/lib/accelerated/x86/x86-common.c
e79d4b
@@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
e79d4b
 # define bit_AVX 0x10000000
e79d4b
 #endif
e79d4b
 
e79d4b
+#ifndef bit_AVX2
e79d4b
+# define bit_AVX2 0x00000020
e79d4b
+#endif
e79d4b
+
e79d4b
+#ifndef bit_AVX512F
e79d4b
+# define bit_AVX512F 0x00010000
e79d4b
+#endif
e79d4b
+
e79d4b
+#ifndef bit_AVX512IFMA
e79d4b
+# define bit_AVX512IFMA 0x00200000
e79d4b
+#endif
e79d4b
+
e79d4b
+#ifndef bit_AVX512BW
e79d4b
+# define bit_AVX512BW 0x40000000
e79d4b
+#endif
e79d4b
+
e79d4b
+#ifndef bit_AVX512VL
e79d4b
+# define bit_AVX512VL 0x80000000
e79d4b
+#endif
e79d4b
+
e79d4b
 #ifndef bit_OSXSAVE
e79d4b
 # define bit_OSXSAVE 0x8000000
e79d4b
 #endif
e79d4b
@@ -89,10 +109,6 @@ unsigned int _gnutls_x86_cpuid_s[4];
e79d4b
 # define bit_MOVBE 0x00400000
e79d4b
 #endif
e79d4b
 
e79d4b
-#ifndef OSXSAVE_MASK
e79d4b
-# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
e79d4b
-#endif
e79d4b
-
e79d4b
 #define bit_PADLOCK (0x3 << 6)
e79d4b
 #define bit_PADLOCK_PHE (0x3 << 10)
e79d4b
 #define bit_PADLOCK_PHE_SHA512 (0x3 << 25)
e79d4b
@@ -148,7 +164,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
e79d4b
 {
e79d4b
 	uint32_t xcr0;
e79d4b
 
e79d4b
-	if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
e79d4b
+	if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
e79d4b
 		return 0;
e79d4b
 
e79d4b
 #if defined(_MSC_VER) && !defined(__clang__)
e79d4b
@@ -190,8 +206,9 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
e79d4b
 	}
e79d4b
 
e79d4b
 	if (capabilities & INTEL_AVX) {
e79d4b
-		if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
e79d4b
-			_gnutls_x86_cpuid_s[1] |= bit_AVX|OSXSAVE_MASK;
e79d4b
+		if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
e79d4b
+		    check_4th_gen_intel_features(a[1])) {
e79d4b
+			_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
e79d4b
 		} else {
e79d4b
 			_gnutls_debug_log
e79d4b
 			    ("AVX acceleration requested but not available\n");
e79d4b
@@ -236,10 +253,7 @@ static unsigned check_sha(void)
e79d4b
 #ifdef ASM_X86_64
e79d4b
 static unsigned check_avx_movbe(void)
e79d4b
 {
e79d4b
-	if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
e79d4b
-		return 0;
e79d4b
-
e79d4b
-	return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
e79d4b
+	return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
e79d4b
 }
e79d4b
 
e79d4b
 static unsigned check_pclmul(void)
e79d4b
@@ -884,6 +898,19 @@ void register_x86_intel_crypto(unsigned capabilities)
e79d4b
 	if (capabilities == 0) {
e79d4b
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
e79d4b
 			return;
e79d4b
+		if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
e79d4b
+			_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
e79d4b
+
e79d4b
+			/* Clear AVX2 bits as well, according to what
e79d4b
+			 * OpenSSL does.  Should we clear
e79d4b
+			 * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
e79d4b
+			 * and bit_AVX512CD? */
e79d4b
+			_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
e79d4b
+						    bit_AVX512F|
e79d4b
+						    bit_AVX512IFMA|
e79d4b
+						    bit_AVX512BW|
e79d4b
+						    bit_AVX512BW);
e79d4b
+		}
e79d4b
 	} else {
e79d4b
 		capabilities_to_intel_cpuid(capabilities);
e79d4b
 	}
e79d4b
-- 
e79d4b
2.37.2
e79d4b