Tree - rpms/libgcrypt - CentOS Git server

rpms / libgcrypt

Blame SOURCES/libgcrypt-1.8.5-ppc-crc32.patch

Blob History Raw

		d28a9c	`diff --git a/cipher/Makefile.am b/cipher/Makefile.am`
		d28a9c	`index cb41c251..1728e9f9 100644`
		d28a9c	`--- a/cipher/Makefile.am`
		d28a9c	`+++ b/cipher/Makefile.am`
		d28a9c	`@@ -67,7 +67,7 @@ cast5.c cast5-amd64.S cast5-arm.S \`
		d28a9c	`chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \`
		d28a9c	`chacha20-armv7-neon.S \`
		d28a9c	`crc.c \`
		d28a9c	`- crc-intel-pclmul.c \`
		d28a9c	`+ crc-intel-pclmul.c crc-ppc.c \`
		d28a9c	`des.c des-amd64.S \`
		d28a9c	`dsa.c \`
		d28a9c	`elgamal.c \`
		d28a9c	`@@ -159,3 +159,9 @@ sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile`
		d28a9c
		d28a9c	`sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile`
		d28a9c	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
		d28a9c	`+`
		d28a9c	`+crc-ppc.o: $(srcdir)/crc-ppc.c Makefile`
		d28a9c	+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< `
		d28a9c	`+`
		d28a9c	`+crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile`
		d28a9c	+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
		d28a9c	`diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c`
		d28a9c	`new file mode 100644`
		d28a9c	`index 00000000..4d7f0add`
		d28a9c	`--- /dev/null`
		d28a9c	`+++ b/cipher/crc-ppc.c`
		d28a9c	`@@ -0,0 +1,619 @@`
		d28a9c	`+/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation`
		d28a9c	`+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>`
		d28a9c	`+ *`
		d28a9c	`+ * This file is part of Libgcrypt.`
		d28a9c	`+ *`
		d28a9c	`+ * Libgcrypt is free software; you can redistribute it and/or modify`
		d28a9c	`+ * it under the terms of the GNU Lesser General Public License as`
		d28a9c	`+ * published by the Free Software Foundation; either version 2.1 of`
		d28a9c	`+ * the License, or (at your option) any later version.`
		d28a9c	`+ *`
		d28a9c	`+ * Libgcrypt is distributed in the hope that it will be useful,`
		d28a9c	`+ * but WITHOUT ANY WARRANTY; without even the implied warranty of`
		d28a9c	`+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
		d28a9c	`+ * GNU Lesser General Public License for more details.`
		d28a9c	`+ *`
		d28a9c	`+ * You should have received a copy of the GNU Lesser General Public`
		d28a9c	`+ * License along with this program; if not, write to the Free Software`
		d28a9c	`+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA`
		d28a9c	`+ *`
		d28a9c	`+ */`
		d28a9c	`+`
		d28a9c	`+#include <config.h>`
		d28a9c	`+#include <stdio.h>`
		d28a9c	`+#include <stdlib.h>`
		d28a9c	`+#include <string.h>`
		d28a9c	`+`
		d28a9c	`+#include "g10lib.h"`
		d28a9c	`+`
		d28a9c	`+#include "bithelp.h"`
		d28a9c	`+#include "bufhelp.h"`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \`
		d28a9c	`+ defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \`
		d28a9c	`+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \`
		d28a9c	`+ __GNUC__ >= 4`
		d28a9c	`+`
		d28a9c	`+#include <altivec.h>`
		d28a9c	`+#include "bufhelp.h"`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+#define ALWAYS_INLINE inline __attribute__((always_inline))`
		d28a9c	`+#define NO_INLINE __attribute__((noinline))`
		d28a9c	`+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))`
		d28a9c	`+`
		d28a9c	`+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION`
		d28a9c	`+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE`
		d28a9c	`+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE`
		d28a9c	`+`
		d28a9c	`+#define ALIGNED_64 __attribute__ ((aligned (64)))`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+typedef vector unsigned char vector16x_u8;`
		d28a9c	`+typedef vector unsigned int vector4x_u32;`
		d28a9c	`+typedef vector unsigned long long vector2x_u64;`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+/* Constants structure for generic reflected/non-reflected CRC32 PMULL`
		d28a9c	`+ * functions. */`
		d28a9c	`+struct crc32_consts_s`
		d28a9c	`+{`
		d28a9c	`+ /* k: { x^(3217), x^(3215), x^(325), x^(323), x^(322), 0 } mod P(x) /`
		d28a9c	`+ unsigned long long k[6];`
		d28a9c	`+ /* my_p: { floor(x^64 / P(x)), P(x) } */`
		d28a9c	`+ unsigned long long my_p[2];`
		d28a9c	`+};`
		d28a9c	`+`
		d28a9c	`+/* PMULL constants for CRC32 and CRC32RFC1510. */`
		d28a9c	`+static const struct crc32_consts_s crc32_consts ALIGNED_64 =`
		d28a9c	`+{`
		d28a9c	`+ { /* k[6] = reverse_33bits( x^(32y) mod P(x) ) /`
		d28a9c	`+ U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */`
		d28a9c	`+ U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */`
		d28a9c	`+ U64_C(0x163cd6124), 0 /* y = 2 */`
		d28a9c	`+ },`
		d28a9c	`+ { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */`
		d28a9c	`+ U64_C(0x1f7011641), U64_C(0x1db710641)`
		d28a9c	`+ }`
		d28a9c	`+};`
		d28a9c	`+`
		d28a9c	`+/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */`
		d28a9c	`+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 =`
		d28a9c	`+{`
		d28a9c	`+ { /* k[6] = x^(32y) mod P(x) << 32/`
		d28a9c	`+ U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */`
		d28a9c	`+ U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */`
		d28a9c	`+ U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */`
		d28a9c	`+ },`
		d28a9c	`+ { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */`
		d28a9c	`+ U64_C(0x1f845fe24), U64_C(0x1864cfb00)`
		d28a9c	`+ }`
		d28a9c	`+};`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE vector2x_u64`
		d28a9c	`+asm_vpmsumd(vector2x_u64 a, vector2x_u64 b)`
		d28a9c	`+{`
		d28a9c	`+ __asm__("vpmsumd %0, %1, %2"`
		d28a9c	`+ : "=v" (a)`
		d28a9c	`+ : "v" (a), "v" (b));`
		d28a9c	`+ return a;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE vector2x_u64`
		d28a9c	`+asm_swap_u64(vector2x_u64 a)`
		d28a9c	`+{`
		d28a9c	`+ __asm__("xxswapd %x0, %x1"`
		d28a9c	`+ : "=wa" (a)`
		d28a9c	`+ : "wa" (a));`
		d28a9c	`+ return a;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE vector4x_u32`
		d28a9c	`+vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx)`
		d28a9c	`+{`
		d28a9c	`+ return vec_sld (a, b, (4 * idx) & 15);`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 =`
		d28a9c	`+ {`
		d28a9c	`+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,`
		d28a9c	`+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,`
		d28a9c	`+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,`
		d28a9c	`+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,`
		d28a9c	`+ };`
		d28a9c	`+static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 =`
		d28a9c	`+ {`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,`
		d28a9c	`+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ };`
		d28a9c	`+static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 =`
		d28a9c	`+ {`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,`
		d28a9c	`+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,`
		d28a9c	`+ };`
		d28a9c	`+static const vector16x_u8 bswap_const ALIGNED_64 =`
		d28a9c	`+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+#define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \`
		d28a9c	`+ vec_perm(__vecu64, __vecu64, bswap_const); })`
		d28a9c	`+`
		d28a9c	`+#ifdef WORDS_BIGENDIAN`
		d28a9c	`+# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }`
		d28a9c	`+# define CRC_VEC_U64_LOAD(offs, ptr) \`
		d28a9c	`+ asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))`
		d28a9c	`+# define CRC_VEC_U64_LOAD_LE(offs, ptr) \`
		d28a9c	`+ CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))`
		d28a9c	`+# define CRC_VEC_U64_LOAD_BE(offs, ptr) \`
		d28a9c	`+ vec_vsx_ld((offs), (const unsigned long long *)(ptr))`
		d28a9c	`+# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)`
		d28a9c	`+# define CRC_VEC_SWAP_TO_BE(v) (v)`
		d28a9c	`+# define VEC_U64_LO 1`
		d28a9c	`+# define VEC_U64_HI 0`
		d28a9c	`+#else`
		d28a9c	`+# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }`
		d28a9c	`+# define CRC_VEC_U64_LOAD(offs, ptr) \`
		d28a9c	`+ vec_vsx_ld((offs), (const unsigned long long *)(ptr))`
		d28a9c	`+# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))`
		d28a9c	`+# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)`
		d28a9c	`+# define CRC_VEC_SWAP_TO_LE(v) (v)`
		d28a9c	`+# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)`
		d28a9c	`+# define VEC_U64_LO 0`
		d28a9c	`+# define VEC_U64_HI 1`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE vector2x_u64`
		d28a9c	`+asm_vec_u64_load_be(unsigned int offset, const void *ptr)`
		d28a9c	`+{`
		d28a9c	`+ static const vector16x_u8 vec_load_le_const =`
		d28a9c	`+ { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };`
		d28a9c	`+ vector2x_u64 vecu64;`
		d28a9c	`+`
		d28a9c	`+#if __GNUC__ >= 4`
		d28a9c	`+ if (__builtin_constant_p (offset) && offset == 0)`
		d28a9c	`+ __asm__ ("lxvd2x %%vs32,0,%1\n\t"`
		d28a9c	`+ "vperm %0,%%v0,%%v0,%2\n\t"`
		d28a9c	`+ : "=v" (vecu64)`
		d28a9c	`+ : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)`
		d28a9c	`+ : "memory", "v0");`
		d28a9c	`+#endif`
		d28a9c	`+ else`
		d28a9c	`+ __asm__ ("lxvd2x %%vs32,%1,%2\n\t"`
		d28a9c	`+ "vperm %0,%%v0,%%v0,%3\n\t"`
		d28a9c	`+ : "=v" (vecu64)`
		d28a9c	`+ : "r" (offset), "r" ((uintptr_t)(ptr)),`
		d28a9c	`+ "v" (vec_load_le_const)`
		d28a9c	`+ : "memory", "r0", "v0");`
		d28a9c	`+`
		d28a9c	`+ return vecu64;`
		d28a9c	`+}`
		d28a9c	`+#endif`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE void`
		d28a9c	`+crc32r_ppc8_ce_bulk (u32 pcrc, const byte inbuf, size_t inlen,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ vector4x_u32 zero = { 0, 0, 0, 0 };`
		d28a9c	`+ vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0);`
		d28a9c	`+ vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0);`
		d28a9c	`+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);`
		d28a9c	`+ vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]);`
		d28a9c	`+ vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]);`
		d28a9c	`+ vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0);`
		d28a9c	`+ vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]);`
		d28a9c	`+ vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0);`
		d28a9c	`+ vector2x_u64 crc0, crc1, crc2, crc3;`
		d28a9c	`+ vector2x_u64 v0;`
		d28a9c	`+`
		d28a9c	`+ if (inlen >= 8 * 16)`
		d28a9c	`+ {`
		d28a9c	`+ crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);`
		d28a9c	`+ crc0 ^= crc;`
		d28a9c	`+ crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);`
		d28a9c	`+ crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);`
		d28a9c	`+ crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);`
		d28a9c	`+`
		d28a9c	`+ inbuf += 4 * 16;`
		d28a9c	`+ inlen -= 4 * 16;`
		d28a9c	`+`
		d28a9c	`+ /* Fold by 4. */`
		d28a9c	`+ while (inlen >= 4 * 16)`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);`
		d28a9c	`+ crc0 = asm_vpmsumd(crc0, k1k2) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);`
		d28a9c	`+ crc1 = asm_vpmsumd(crc1, k1k2) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);`
		d28a9c	`+ crc2 = asm_vpmsumd(crc2, k1k2) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);`
		d28a9c	`+ crc3 = asm_vpmsumd(crc3, k1k2) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 4 * 16;`
		d28a9c	`+ inlen -= 4 * 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Fold 4 to 1. */`
		d28a9c	`+ crc1 ^= asm_vpmsumd(crc0, k3k4);`
		d28a9c	`+ crc2 ^= asm_vpmsumd(crc1, k3k4);`
		d28a9c	`+ crc3 ^= asm_vpmsumd(crc2, k3k4);`
		d28a9c	`+ crc = crc3;`
		d28a9c	`+ }`
		d28a9c	`+ else`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 16;`
		d28a9c	`+ inlen -= 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Fold by 1. */`
		d28a9c	`+ while (inlen >= 16)`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);`
		d28a9c	`+ crc = asm_vpmsumd(k3k4, crc);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 16;`
		d28a9c	`+ inlen -= 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Partial fold. */`
		d28a9c	`+ if (inlen)`
		d28a9c	`+ {`
		d28a9c	`+ /* Load last input and add padding zeros. */`
		d28a9c	`+ vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);`
		d28a9c	`+ vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, crc32_refl_shuf_shift);`
		d28a9c	`+ vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_refl_shuf_shift);`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);`
		d28a9c	`+ v0 &= mask;`
		d28a9c	`+`
		d28a9c	`+ crc = CRC_VEC_SWAP_TO_LE(crc);`
		d28a9c	`+ v0 \|= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,`
		d28a9c	`+ (vector16x_u8)shr_shuf);`
		d28a9c	`+ crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,`
		d28a9c	`+ (vector16x_u8)shl_shuf);`
		d28a9c	`+ crc = asm_vpmsumd(k3k4, crc);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += inlen;`
		d28a9c	`+ inlen -= inlen;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Final fold. */`
		d28a9c	`+`
		d28a9c	`+ /* reduce 128-bits to 96-bits */`
		d28a9c	`+ v0 = asm_swap_u64(crc);`
		d28a9c	`+ v0 &= low_64bit_mask;`
		d28a9c	`+ crc = asm_vpmsumd(k4lo, crc);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ /* reduce 96-bits to 64-bits */`
		d28a9c	`+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,`
		d28a9c	`+ (vector4x_u32)crc, 3); /* [x0][x3][x2][x1] */`
		d28a9c	`+ v0 &= low_64bit_mask; /* [00][00][x2][x1] */`
		d28a9c	`+ crc = crc & low_32bit_mask; /* [00][00][00][x0] */`
		d28a9c	`+ crc = v0 ^ asm_vpmsumd(k5lo, crc); /* [00][00][xx][xx] */`
		d28a9c	`+`
		d28a9c	`+ /* barrett reduction */`
		d28a9c	`+ v0 = crc << 32; /* [00][00][x0][00] */`
		d28a9c	`+ v0 = asm_vpmsumd(my_p, v0);`
		d28a9c	`+ v0 = asm_swap_u64(v0);`
		d28a9c	`+ v0 = asm_vpmsumd(my_p, v0);`
		d28a9c	`+ crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,`
		d28a9c	`+ zero, 1); /* [00][x1][x0][00] */`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ *pcrc = (u32)crc[VEC_U64_HI];`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE u32`
		d28a9c	`+crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ vector4x_u32 zero = { 0, 0, 0, 0 };`
		d28a9c	`+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);`
		d28a9c	`+ vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0);`
		d28a9c	`+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */`
		d28a9c	`+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,`
		d28a9c	`+ zero, 3); /* [x0][00][00][00] */`
		d28a9c	`+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,`
		d28a9c	`+ (vector4x_u32)v0, 3); /* [00][x0][00][00] */`
		d28a9c	`+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */`
		d28a9c	`+ return (v0[VEC_U64_LO] >> 32) ^ crc;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE void`
		d28a9c	`+crc32r_less_than_16 (u32 pcrc, const byte inbuf, size_t inlen,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ u32 crc = *pcrc;`
		d28a9c	`+ u32 data;`
		d28a9c	`+`
		d28a9c	`+ while (inlen >= 4)`
		d28a9c	`+ {`
		d28a9c	`+ data = buf_get_le32(inbuf);`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+`
		d28a9c	`+ inlen -= 4;`
		d28a9c	`+ inbuf += 4;`
		d28a9c	`+`
		d28a9c	`+ crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts);`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ switch (inlen)`
		d28a9c	`+ {`
		d28a9c	`+ case 0:`
		d28a9c	`+ break;`
		d28a9c	`+ case 1:`
		d28a9c	`+ data = inbuf[0];`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data <<= 24;`
		d28a9c	`+ crc >>= 8;`
		d28a9c	`+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ case 2:`
		d28a9c	`+ data = inbuf[0] << 0;`
		d28a9c	`+ data \|= inbuf[1] << 8;`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data <<= 16;`
		d28a9c	`+ crc >>= 16;`
		d28a9c	`+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ case 3:`
		d28a9c	`+ data = inbuf[0] << 0;`
		d28a9c	`+ data \|= inbuf[1] << 8;`
		d28a9c	`+ data \|= inbuf[2] << 16;`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data <<= 8;`
		d28a9c	`+ crc >>= 24;`
		d28a9c	`+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ *pcrc = crc;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE void`
		d28a9c	`+crc32_ppc8_ce_bulk (u32 pcrc, const byte inbuf, size_t inlen,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ vector4x_u32 zero = { 0, 0, 0, 0 };`
		d28a9c	`+ vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32));`
		d28a9c	`+ vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0]));`
		d28a9c	`+ vector2x_u64 p_my_lo, p_my_hi;`
		d28a9c	`+ vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]));`
		d28a9c	`+ vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]));`
		d28a9c	`+ vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]);`
		d28a9c	`+ vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]);`
		d28a9c	`+ vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc));`
		d28a9c	`+ vector2x_u64 crc0, crc1, crc2, crc3;`
		d28a9c	`+ vector2x_u64 v0;`
		d28a9c	`+`
		d28a9c	`+ if (inlen >= 8 * 16)`
		d28a9c	`+ {`
		d28a9c	`+ crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);`
		d28a9c	`+ crc0 ^= crc;`
		d28a9c	`+ crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);`
		d28a9c	`+ crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);`
		d28a9c	`+ crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);`
		d28a9c	`+`
		d28a9c	`+ inbuf += 4 * 16;`
		d28a9c	`+ inlen -= 4 * 16;`
		d28a9c	`+`
		d28a9c	`+ /* Fold by 4. */`
		d28a9c	`+ while (inlen >= 4 * 16)`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);`
		d28a9c	`+ crc0 = asm_vpmsumd(crc0, k2k1) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);`
		d28a9c	`+ crc1 = asm_vpmsumd(crc1, k2k1) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);`
		d28a9c	`+ crc2 = asm_vpmsumd(crc2, k2k1) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);`
		d28a9c	`+ crc3 = asm_vpmsumd(crc3, k2k1) ^ v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 4 * 16;`
		d28a9c	`+ inlen -= 4 * 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Fold 4 to 1. */`
		d28a9c	`+ crc1 ^= asm_vpmsumd(crc0, k4k3);`
		d28a9c	`+ crc2 ^= asm_vpmsumd(crc1, k4k3);`
		d28a9c	`+ crc3 ^= asm_vpmsumd(crc2, k4k3);`
		d28a9c	`+ crc = crc3;`
		d28a9c	`+ }`
		d28a9c	`+ else`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 16;`
		d28a9c	`+ inlen -= 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Fold by 1. */`
		d28a9c	`+ while (inlen >= 16)`
		d28a9c	`+ {`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);`
		d28a9c	`+ crc = asm_vpmsumd(k4k3, crc);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += 16;`
		d28a9c	`+ inlen -= 16;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Partial fold. */`
		d28a9c	`+ if (inlen)`
		d28a9c	`+ {`
		d28a9c	`+ /* Load last input and add padding zeros. */`
		d28a9c	`+ vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);`
		d28a9c	`+ vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, crc32_refl_shuf_shift);`
		d28a9c	`+ vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_shuf_shift);`
		d28a9c	`+`
		d28a9c	`+ v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);`
		d28a9c	`+ v0 &= mask;`
		d28a9c	`+`
		d28a9c	`+ crc = CRC_VEC_SWAP_TO_LE(crc);`
		d28a9c	`+ crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,`
		d28a9c	`+ (vector16x_u8)shr_shuf);`
		d28a9c	`+ v0 \|= crc2;`
		d28a9c	`+ v0 = CRC_VEC_SWAP(v0);`
		d28a9c	`+ crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,`
		d28a9c	`+ (vector16x_u8)shl_shuf);`
		d28a9c	`+ crc = asm_vpmsumd(k4k3, crc);`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ inbuf += inlen;`
		d28a9c	`+ inlen -= inlen;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ /* Final fold. */`
		d28a9c	`+`
		d28a9c	`+ /* reduce 128-bits to 96-bits */`
		d28a9c	`+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,`
		d28a9c	`+ (vector4x_u32)zero, 2);`
		d28a9c	`+ crc = asm_vpmsumd(k4hi, crc);`
		d28a9c	`+ crc ^= v0; /* bottom 32-bit are zero */`
		d28a9c	`+`
		d28a9c	`+ /* reduce 96-bits to 64-bits */`
		d28a9c	`+ v0 = crc & low_96bit_mask; /* [00][x2][x1][00] */`
		d28a9c	`+ crc >>= 32; /* [00][x3][00][x0] */`
		d28a9c	`+ crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */`
		d28a9c	`+ crc ^= v0; /* top and bottom 32-bit are zero */`
		d28a9c	`+`
		d28a9c	`+ /* barrett reduction */`
		d28a9c	`+ p_my_hi = p_my;`
		d28a9c	`+ p_my_lo = p_my;`
		d28a9c	`+ p_my_hi[VEC_U64_LO] = 0;`
		d28a9c	`+ p_my_lo[VEC_U64_HI] = 0;`
		d28a9c	`+ v0 = crc >> 32; /* [00][00][00][x1] */`
		d28a9c	`+ crc = asm_vpmsumd(p_my_hi, crc); /* [00][xx][xx][xx] */`
		d28a9c	`+ crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,`
		d28a9c	`+ (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */`
		d28a9c	`+ crc = asm_vpmsumd(p_my_lo, crc); /* [00][xx][xx][xx] */`
		d28a9c	`+ crc ^= v0;`
		d28a9c	`+`
		d28a9c	`+ *pcrc = _gcry_bswap32(crc[VEC_U64_LO]);`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE u32`
		d28a9c	`+crc32_ppc8_ce_reduction_4 (u32 data, u32 crc,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);`
		d28a9c	`+ vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0);`
		d28a9c	`+ v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */`
		d28a9c	`+ v0[VEC_U64_LO] = 0; /* [00][x1][00][00] */`
		d28a9c	`+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */`
		d28a9c	`+ return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`+static ASM_FUNC_ATTR_INLINE void`
		d28a9c	`+crc32_less_than_16 (u32 pcrc, const byte inbuf, size_t inlen,`
		d28a9c	`+ const struct crc32_consts_s *consts)`
		d28a9c	`+{`
		d28a9c	`+ u32 crc = *pcrc;`
		d28a9c	`+ u32 data;`
		d28a9c	`+`
		d28a9c	`+ while (inlen >= 4)`
		d28a9c	`+ {`
		d28a9c	`+ data = buf_get_le32(inbuf);`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data = _gcry_bswap32(data);`
		d28a9c	`+`
		d28a9c	`+ inlen -= 4;`
		d28a9c	`+ inbuf += 4;`
		d28a9c	`+`
		d28a9c	`+ crc = crc32_ppc8_ce_reduction_4 (data, 0, consts);`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ switch (inlen)`
		d28a9c	`+ {`
		d28a9c	`+ case 0:`
		d28a9c	`+ break;`
		d28a9c	`+ case 1:`
		d28a9c	`+ data = inbuf[0];`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data = data & 0xffU;`
		d28a9c	`+ crc = crc >> 8;`
		d28a9c	`+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ case 2:`
		d28a9c	`+ data = inbuf[0] << 0;`
		d28a9c	`+ data \|= inbuf[1] << 8;`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data = _gcry_bswap32(data << 16);`
		d28a9c	`+ crc = crc >> 16;`
		d28a9c	`+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ case 3:`
		d28a9c	`+ data = inbuf[0] << 0;`
		d28a9c	`+ data \|= inbuf[1] << 8;`
		d28a9c	`+ data \|= inbuf[2] << 16;`
		d28a9c	`+ data ^= crc;`
		d28a9c	`+ data = _gcry_bswap32(data << 8);`
		d28a9c	`+ crc = crc >> 24;`
		d28a9c	`+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);`
		d28a9c	`+ break;`
		d28a9c	`+ }`
		d28a9c	`+`
		d28a9c	`+ *pcrc = crc;`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+void ASM_FUNC_ATTR`
		d28a9c	`+_gcry_crc32_ppc8_vpmsum (u32 pcrc, const byte inbuf, size_t inlen)`
		d28a9c	`+{`
		d28a9c	`+ const struct crc32_consts_s *consts = &crc32_consts;`
		d28a9c	`+`
		d28a9c	`+ if (!inlen)`
		d28a9c	`+ return;`
		d28a9c	`+`
		d28a9c	`+ if (inlen >= 16)`
		d28a9c	`+ crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);`
		d28a9c	`+ else`
		d28a9c	`+ crc32r_less_than_16 (pcrc, inbuf, inlen, consts);`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+void ASM_FUNC_ATTR`
		d28a9c	`+_gcry_crc24rfc2440_ppc8_vpmsum (u32 pcrc, const byte inbuf, size_t inlen)`
		d28a9c	`+{`
		d28a9c	`+ const struct crc32_consts_s *consts = &crc24rfc2440_consts;`
		d28a9c	`+`
		d28a9c	`+ if (!inlen)`
		d28a9c	`+ return;`
		d28a9c	`+`
		d28a9c	`+ /* Note: pcrc in input endian. /`
		d28a9c	`+`
		d28a9c	`+ if (inlen >= 16)`
		d28a9c	`+ crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);`
		d28a9c	`+ else`
		d28a9c	`+ crc32_less_than_16 (pcrc, inbuf, inlen, consts);`
		d28a9c	`+}`
		d28a9c	`+`
		d28a9c	`+#endif`
		d28a9c	`diff --git a/cipher/crc.c b/cipher/crc.c`
		d28a9c	`index a1ce50b6..bbb159ce 100644`
		d28a9c	`--- a/cipher/crc.c`
		d28a9c	`+++ b/cipher/crc.c`
		d28a9c	`@@ -43,11 +43,27 @@`
		d28a9c	`#endif /* USE_INTEL_PCLMUL */`
		d28a9c
		d28a9c
		d28a9c	`+/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector`
		d28a9c	`+ * accelerated code. */`
		d28a9c	`+#undef USE_PPC_VPMSUM`
		d28a9c	`+#ifdef ENABLE_PPC_CRYPTO_SUPPORT`
		d28a9c	`+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \`
		d28a9c	`+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)`
		d28a9c	`+# if __GNUC__ >= 4`
		d28a9c	`+# define USE_PPC_VPMSUM 1`
		d28a9c	`+# endif`
		d28a9c	`+# endif`
		d28a9c	`+#endif /* USE_PPC_VPMSUM */`
		d28a9c	`+`
		d28a9c	`+`
		d28a9c	`typedef struct`
		d28a9c	`{`
		d28a9c	`u32 CRC;`
		d28a9c	`#ifdef USE_INTEL_PCLMUL`
		d28a9c	`unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */`
		d28a9c	`+#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ unsigned int use_vpmsum:1; /* POWER vpmsum shall be used. */`
		d28a9c	`#endif`
		d28a9c	`byte buf[4];`
		d28a9c	`}`
		d28a9c	`@@ -61,6 +77,20 @@ void _gcry_crc24rfc2440_intel_pclmul (u32 pcrc, const byte inbuf,`
		d28a9c	`size_t inlen);`
		d28a9c	`#endif`
		d28a9c
		d28a9c	`+#ifdef USE_ARM_PMULL`
		d28a9c	`+/-- crc-armv8-ce.c --/`
		d28a9c	`+void _gcry_crc32_armv8_ce_pmull (u32 pcrc, const byte inbuf, size_t inlen);`
		d28a9c	`+void _gcry_crc24rfc2440_armv8_ce_pmull (u32 pcrc, const byte inbuf,`
		d28a9c	`+ size_t inlen);`
		d28a9c	`+#endif`
		d28a9c	`+`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+/-- crc-ppc.c --/`
		d28a9c	`+void _gcry_crc32_ppc8_vpmsum (u32 pcrc, const byte inbuf, size_t inlen);`
		d28a9c	`+void _gcry_crc24rfc2440_ppc8_vpmsum (u32 pcrc, const byte inbuf,`
		d28a9c	`+ size_t inlen);`
		d28a9c	`+#endif`
		d28a9c	`+`
		d28a9c
		d28a9c	`/*`
		d28a9c	`* Code generated by universal_crc by Danjel McGougan`
		d28a9c	`@@ -361,11 +391,13 @@ static void`
		d28a9c	`crc32_init (void *context, unsigned int flags)`
		d28a9c	`{`
		d28a9c	`CRC_CONTEXT ctx = (CRC_CONTEXT ) context;`
		d28a9c	`-#ifdef USE_INTEL_PCLMUL`
		d28a9c	`u32 hwf = _gcry_get_hw_features ();`
		d28a9c	`-`
		d28a9c	`+#ifdef USE_INTEL_PCLMUL`
		d28a9c	`ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);`
		d28a9c	`#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);`
		d28a9c	`+#endif`
		d28a9c
		d28a9c	`(void)flags;`
		d28a9c
		d28a9c	`@@ -386,6 +418,13 @@ crc32_write (void context, const void inbuf_arg, size_t inlen)`
		d28a9c	`return;`
		d28a9c	`}`
		d28a9c	`#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ if (ctx->use_vpmsum)`
		d28a9c	`+ {`
		d28a9c	`+ _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);`
		d28a9c	`+ return;`
		d28a9c	`+ }`
		d28a9c	`+#endif`
		d28a9c
		d28a9c	`if (!inbuf \|\| !inlen)`
		d28a9c	`return;`
		d28a9c	`@@ -444,6 +483,10 @@ crc32rfc1510_init (void *context, unsigned int flags)`
		d28a9c
		d28a9c	`ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);`
		d28a9c	`#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ u32 hwf = _gcry_get_hw_features ();`
		d28a9c	`+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);`
		d28a9c	`+#endif`
		d28a9c
		d28a9c	`(void)flags;`
		d28a9c
		d28a9c	`@@ -774,6 +817,10 @@ crc24rfc2440_init (void *context, unsigned int flags)`
		d28a9c
		d28a9c	`ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);`
		d28a9c	`#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ u32 hwf = _gcry_get_hw_features ();`
		d28a9c	`+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);`
		d28a9c	`+#endif`
		d28a9c
		d28a9c	`(void)flags;`
		d28a9c
		d28a9c	`@@ -794,6 +841,13 @@ crc24rfc2440_write (void context, const void inbuf_arg, size_t inlen)`
		d28a9c	`return;`
		d28a9c	`}`
		d28a9c	`#endif`
		d28a9c	`+#ifdef USE_PPC_VPMSUM`
		d28a9c	`+ if (ctx->use_vpmsum)`
		d28a9c	`+ {`
		d28a9c	`+ _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);`
		d28a9c	`+ return;`
		d28a9c	`+ }`
		d28a9c	`+#endif`
		d28a9c
		d28a9c	`if (!inbuf \|\| !inlen)`
		d28a9c	`return;`
		d28a9c	`diff --git a/configure.ac b/configure.ac`
		d28a9c	`index 953a20e9..b6b6455a 100644`
		d28a9c	`--- a/configure.ac`
		d28a9c	`+++ b/configure.ac`
		d28a9c	`@@ -1916,6 +1916,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto`
		d28a9c	`"vadduwm %v0, %v1, %v22;\n"`
		d28a9c	`"vshasigmaw %v0, %v1, 0, 15;\n"`
		d28a9c	`"vshasigmad %v0, %v1, 0, 15;\n"`
		d28a9c	`+ "vpmsumd %v11, %v11, %v11;\n"`
		d28a9c	`);`
		d28a9c	`]])],`
		d28a9c	`[gcry_cv_gcc_inline_asm_ppc_altivec=yes])`
		d28a9c	`@@ -2556,6 +2557,15 @@ if test "$found" = "1" ; then`
		d28a9c	`# Build with the assembly implementation`
		d28a9c	`GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"`
		d28a9c	`;;`
		d28a9c	`+ powerpc64le--)`
		d28a9c	`+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"`
		d28a9c	`+ ;;`
		d28a9c	`+ powerpc64--)`
		d28a9c	`+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"`
		d28a9c	`+ ;;`
		d28a9c	`+ powerpc--)`
		d28a9c	`+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"`
		d28a9c	`+ ;;`
		d28a9c	`esac`
		d28a9c	`fi`

rpms / libgcrypt

Source Code

Blame SOURCES/libgcrypt-1.8.5-ppc-crc32.patch