|
|
e79d4b |
From 8ff391fa011e02c88b0d099061ca62e88ab68011 Mon Sep 17 00:00:00 2001
|
|
|
e79d4b |
From: Daiki Ueno <ueno@gnu.org>
|
|
|
e79d4b |
Date: Mon, 15 Aug 2022 09:39:18 +0900
|
|
|
e79d4b |
Subject: [PATCH] accelerated: clear AVX bits if it cannot be queried through
|
|
|
e79d4b |
XSAVE
|
|
|
e79d4b |
MIME-Version: 1.0
|
|
|
e79d4b |
Content-Type: text/plain; charset=UTF-8
|
|
|
e79d4b |
Content-Transfer-Encoding: 8bit
|
|
|
e79d4b |
|
|
|
e79d4b |
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
|
|
|
e79d4b |
Architectures Software Developer’s Manual".
|
|
|
e79d4b |
|
|
|
e79d4b |
GnuTLS previously only followed that algorithm when registering the
|
|
|
e79d4b |
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
|
|
|
e79d4b |
that the extension bits are propagated to _gnutls_x86_cpuid_s.
|
|
|
e79d4b |
|
|
|
e79d4b |
Signed-off-by: Daiki Ueno <ueno@gnu.org>
|
|
|
e79d4b |
---
|
|
|
e79d4b |
lib/accelerated/x86/x86-common.c | 49 +++++++++++++++++++++++++-------
|
|
|
e79d4b |
1 file changed, 38 insertions(+), 11 deletions(-)
|
|
|
e79d4b |
|
|
|
e79d4b |
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
|
|
|
e79d4b |
index 7ddaa594e6..b7a88ddeca 100644
|
|
|
e79d4b |
--- a/lib/accelerated/x86/x86-common.c
|
|
|
e79d4b |
+++ b/lib/accelerated/x86/x86-common.c
|
|
|
e79d4b |
@@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
|
|
|
e79d4b |
# define bit_AVX 0x10000000
|
|
|
e79d4b |
#endif
|
|
|
e79d4b |
|
|
|
e79d4b |
+#ifndef bit_AVX2
|
|
|
e79d4b |
+# define bit_AVX2 0x00000020
|
|
|
e79d4b |
+#endif
|
|
|
e79d4b |
+
|
|
|
e79d4b |
+#ifndef bit_AVX512F
|
|
|
e79d4b |
+# define bit_AVX512F 0x00010000
|
|
|
e79d4b |
+#endif
|
|
|
e79d4b |
+
|
|
|
e79d4b |
+#ifndef bit_AVX512IFMA
|
|
|
e79d4b |
+# define bit_AVX512IFMA 0x00200000
|
|
|
e79d4b |
+#endif
|
|
|
e79d4b |
+
|
|
|
e79d4b |
+#ifndef bit_AVX512BW
|
|
|
e79d4b |
+# define bit_AVX512BW 0x40000000
|
|
|
e79d4b |
+#endif
|
|
|
e79d4b |
+
|
|
|
e79d4b |
+#ifndef bit_AVX512VL
|
|
|
e79d4b |
+# define bit_AVX512VL 0x80000000
|
|
|
e79d4b |
+#endif
|
|
|
e79d4b |
+
|
|
|
e79d4b |
#ifndef bit_OSXSAVE
|
|
|
e79d4b |
# define bit_OSXSAVE 0x8000000
|
|
|
e79d4b |
#endif
|
|
|
e79d4b |
@@ -89,10 +109,6 @@ unsigned int _gnutls_x86_cpuid_s[4];
|
|
|
e79d4b |
# define bit_MOVBE 0x00400000
|
|
|
e79d4b |
#endif
|
|
|
e79d4b |
|
|
|
e79d4b |
-#ifndef OSXSAVE_MASK
|
|
|
e79d4b |
-# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
|
|
|
e79d4b |
-#endif
|
|
|
e79d4b |
-
|
|
|
e79d4b |
#define bit_PADLOCK (0x3 << 6)
|
|
|
e79d4b |
#define bit_PADLOCK_PHE (0x3 << 10)
|
|
|
e79d4b |
#define bit_PADLOCK_PHE_SHA512 (0x3 << 25)
|
|
|
e79d4b |
@@ -148,7 +164,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
|
|
|
e79d4b |
{
|
|
|
e79d4b |
uint32_t xcr0;
|
|
|
e79d4b |
|
|
|
e79d4b |
- if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
|
|
|
e79d4b |
+ if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
|
|
|
e79d4b |
return 0;
|
|
|
e79d4b |
|
|
|
e79d4b |
#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
e79d4b |
@@ -190,8 +206,9 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
|
|
|
e79d4b |
}
|
|
|
e79d4b |
|
|
|
e79d4b |
if (capabilities & INTEL_AVX) {
|
|
|
e79d4b |
- if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
|
|
|
e79d4b |
- _gnutls_x86_cpuid_s[1] |= bit_AVX|OSXSAVE_MASK;
|
|
|
e79d4b |
+ if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
|
|
|
e79d4b |
+ check_4th_gen_intel_features(a[1])) {
|
|
|
e79d4b |
+ _gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
|
|
|
e79d4b |
} else {
|
|
|
e79d4b |
_gnutls_debug_log
|
|
|
e79d4b |
("AVX acceleration requested but not available\n");
|
|
|
e79d4b |
@@ -236,10 +253,7 @@ static unsigned check_sha(void)
|
|
|
e79d4b |
#ifdef ASM_X86_64
|
|
|
e79d4b |
static unsigned check_avx_movbe(void)
|
|
|
e79d4b |
{
|
|
|
e79d4b |
- if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
|
|
|
e79d4b |
- return 0;
|
|
|
e79d4b |
-
|
|
|
e79d4b |
- return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
|
|
|
e79d4b |
+ return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
|
|
|
e79d4b |
}
|
|
|
e79d4b |
|
|
|
e79d4b |
static unsigned check_pclmul(void)
|
|
|
e79d4b |
@@ -884,6 +898,19 @@ void register_x86_intel_crypto(unsigned capabilities)
|
|
|
e79d4b |
if (capabilities == 0) {
|
|
|
e79d4b |
if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
|
|
|
e79d4b |
return;
|
|
|
e79d4b |
+ if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
|
|
|
e79d4b |
+ _gnutls_x86_cpuid_s[1] &= ~bit_AVX;
|
|
|
e79d4b |
+
|
|
|
e79d4b |
+ /* Clear AVX2 bits as well, according to what
|
|
|
e79d4b |
+ * OpenSSL does. Should we clear
|
|
|
e79d4b |
+ * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
|
|
|
e79d4b |
+ * and bit_AVX512CD? */
|
|
|
e79d4b |
+ _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
|
|
|
e79d4b |
+ bit_AVX512F|
|
|
|
e79d4b |
+ bit_AVX512IFMA|
|
|
|
e79d4b |
+ bit_AVX512BW|
|
|
|
e79d4b |
+ bit_AVX512BW);
|
|
|
e79d4b |
+ }
|
|
|
e79d4b |
} else {
|
|
|
e79d4b |
capabilities_to_intel_cpuid(capabilities);
|
|
|
e79d4b |
}
|
|
|
e79d4b |
--
|
|
|
e79d4b |
2.37.2
|
|
|
e79d4b |
|