d7a338
From 71b1812bf9a785b66e3f17175580d3d20cea9c0c Mon Sep 17 00:00:00 2001
d7a338
From: Daiki Ueno <ueno@gnu.org>
d7a338
Date: Tue, 12 Oct 2021 13:33:31 +0200
d7a338
Subject: [PATCH] x86: port Intel CET support
d7a338
d7a338
Signed-off-by: Daiki Ueno <ueno@gnu.org>
d7a338
---
d7a338
 lib/accelerated/x86/elf/aes-ssse3-x86.s       | 30 ++++++++++++++
d7a338
 lib/accelerated/x86/elf/aes-ssse3-x86_64.s    | 26 +++++++++++++
d7a338
 lib/accelerated/x86/elf/aesni-gcm-x86_64.s    | 21 ++++++++++
d7a338
 lib/accelerated/x86/elf/aesni-x86.s           | 39 +++++++++++++++++++
d7a338
 lib/accelerated/x86/elf/aesni-x86_64.s        | 32 +++++++++++++++
d7a338
 lib/accelerated/x86/elf/ghash-x86_64.s        | 27 +++++++++++++
d7a338
 lib/accelerated/x86/elf/sha1-ssse3-x86.s      | 18 +++++++++
d7a338
 lib/accelerated/x86/elf/sha1-ssse3-x86_64.s   | 21 ++++++++++
d7a338
 lib/accelerated/x86/elf/sha256-ssse3-x86.s    | 18 +++++++++
d7a338
 lib/accelerated/x86/elf/sha256-ssse3-x86_64.s | 21 ++++++++++
d7a338
 lib/accelerated/x86/elf/sha512-ssse3-x86.s    | 18 +++++++++
d7a338
 lib/accelerated/x86/elf/sha512-ssse3-x86_64.s | 21 ++++++++++
d7a338
 12 files changed, 292 insertions(+)
d7a338
d7a338
diff --git a/lib/accelerated/x86/elf/aes-ssse3-x86.s b/lib/accelerated/x86/elf/aes-ssse3-x86.s
d7a338
index 265e28a7ef..7be53059f7 100644
d7a338
--- a/lib/accelerated/x86/elf/aes-ssse3-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/aes-ssse3-x86.s
d7a338
@@ -71,6 +71,7 @@
d7a338
 .type	_vpaes_preheat,@function
d7a338
 .align	16
d7a338
 _vpaes_preheat:
d7a338
+.byte	243,15,30,251
d7a338
 	addl	(%esp),%ebp
d7a338
 	movdqa	-48(%ebp),%xmm7
d7a338
 	movdqa	-16(%ebp),%xmm6
d7a338
@@ -79,6 +80,7 @@ _vpaes_preheat:
d7a338
 .type	_vpaes_encrypt_core,@function
d7a338
 .align	16
d7a338
 _vpaes_encrypt_core:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	$16,%ecx
d7a338
 	movl	240(%edx),%eax
d7a338
 	movdqa	%xmm6,%xmm1
d7a338
@@ -156,6 +158,7 @@ _vpaes_encrypt_core:
d7a338
 .type	_vpaes_decrypt_core,@function
d7a338
 .align	16
d7a338
 _vpaes_decrypt_core:
d7a338
+.byte	243,15,30,251
d7a338
 	leal	608(%ebp),%ebx
d7a338
 	movl	240(%edx),%eax
d7a338
 	movdqa	%xmm6,%xmm1
d7a338
@@ -244,6 +247,7 @@ _vpaes_decrypt_core:
d7a338
 .type	_vpaes_schedule_core,@function
d7a338
 .align	16
d7a338
 _vpaes_schedule_core:
d7a338
+.byte	243,15,30,251
d7a338
 	addl	(%esp),%ebp
d7a338
 	movdqu	(%esi),%xmm0
d7a338
 	movdqa	320(%ebp),%xmm2
d7a338
@@ -338,6 +342,7 @@ _vpaes_schedule_core:
d7a338
 .type	_vpaes_schedule_192_smear,@function
d7a338
 .align	16
d7a338
 _vpaes_schedule_192_smear:
d7a338
+.byte	243,15,30,251
d7a338
 	pshufd	$128,%xmm6,%xmm1
d7a338
 	pshufd	$254,%xmm7,%xmm0
d7a338
 	pxor	%xmm1,%xmm6
d7a338
@@ -350,6 +355,7 @@ _vpaes_schedule_192_smear:
d7a338
 .type	_vpaes_schedule_round,@function
d7a338
 .align	16
d7a338
 _vpaes_schedule_round:
d7a338
+.byte	243,15,30,251
d7a338
 	movdqa	8(%esp),%xmm2
d7a338
 	pxor	%xmm1,%xmm1
d7a338
 .byte	102,15,58,15,202,15
d7a338
@@ -399,6 +405,7 @@ _vpaes_schedule_round:
d7a338
 .type	_vpaes_schedule_transform,@function
d7a338
 .align	16
d7a338
 _vpaes_schedule_transform:
d7a338
+.byte	243,15,30,251
d7a338
 	movdqa	-16(%ebp),%xmm2
d7a338
 	movdqa	%xmm2,%xmm1
d7a338
 	pandn	%xmm0,%xmm1
d7a338
@@ -414,6 +421,7 @@ _vpaes_schedule_transform:
d7a338
 .type	_vpaes_schedule_mangle,@function
d7a338
 .align	16
d7a338
 _vpaes_schedule_mangle:
d7a338
+.byte	243,15,30,251
d7a338
 	movdqa	%xmm0,%xmm4
d7a338
 	movdqa	128(%ebp),%xmm5
d7a338
 	testl	%edi,%edi
d7a338
@@ -475,6 +483,7 @@ _vpaes_schedule_mangle:
d7a338
 .align	16
d7a338
 vpaes_set_encrypt_key:
d7a338
 .L_vpaes_set_encrypt_key_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -508,6 +517,7 @@ vpaes_set_encrypt_key:
d7a338
 .align	16
d7a338
 vpaes_set_decrypt_key:
d7a338
 .L_vpaes_set_decrypt_key_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -546,6 +556,7 @@ vpaes_set_decrypt_key:
d7a338
 .align	16
d7a338
 vpaes_encrypt:
d7a338
 .L_vpaes_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -575,6 +586,7 @@ vpaes_encrypt:
d7a338
 .align	16
d7a338
 vpaes_decrypt:
d7a338
 .L_vpaes_decrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -604,6 +616,7 @@ vpaes_decrypt:
d7a338
 .align	16
d7a338
 vpaes_cbc_encrypt:
d7a338
 .L_vpaes_cbc_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -671,4 +684,21 @@ vpaes_cbc_encrypt:
d7a338
 	ret
d7a338
 .size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
+
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/aes-ssse3-x86_64.s b/lib/accelerated/x86/elf/aes-ssse3-x86_64.s
d7a338
index ea1216baf7..5a3f336f26 100644
d7a338
--- a/lib/accelerated/x86/elf/aes-ssse3-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/aes-ssse3-x86_64.s
d7a338
@@ -635,6 +635,7 @@ _vpaes_schedule_mangle:
d7a338
 .align	16
d7a338
 vpaes_set_encrypt_key:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	%esi,%eax
d7a338
 	shrl	$5,%eax
d7a338
 	addl	$5,%eax
d7a338
@@ -653,6 +654,7 @@ vpaes_set_encrypt_key:
d7a338
 .align	16
d7a338
 vpaes_set_decrypt_key:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	%esi,%eax
d7a338
 	shrl	$5,%eax
d7a338
 	addl	$5,%eax
d7a338
@@ -676,6 +678,7 @@ vpaes_set_decrypt_key:
d7a338
 .align	16
d7a338
 vpaes_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movdqu	(%rdi),%xmm0
d7a338
 	call	_vpaes_preheat
d7a338
 	call	_vpaes_encrypt_core
d7a338
@@ -689,6 +692,7 @@ vpaes_encrypt:
d7a338
 .align	16
d7a338
 vpaes_decrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movdqu	(%rdi),%xmm0
d7a338
 	call	_vpaes_preheat
d7a338
 	call	_vpaes_decrypt_core
d7a338
@@ -701,6 +705,7 @@ vpaes_decrypt:
d7a338
 .align	16
d7a338
 vpaes_cbc_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	xchgq	%rcx,%rdx
d7a338
 	subq	$16,%rcx
d7a338
 	jc	.Lcbc_abort
d7a338
@@ -863,5 +868,26 @@ _vpaes_consts:
d7a338
 .byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
d7a338
 .align	64
d7a338
 .size	_vpaes_consts,.-_vpaes_consts
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/aesni-gcm-x86_64.s b/lib/accelerated/x86/elf/aesni-gcm-x86_64.s
d7a338
index 461dd026b9..ea5398bc2c 100644
d7a338
--- a/lib/accelerated/x86/elf/aesni-gcm-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/aesni-gcm-x86_64.s
d7a338
@@ -826,5 +826,26 @@ aesni_gcm_encrypt:
d7a338
 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
d7a338
 .byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 .align	64
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/aesni-x86.s b/lib/accelerated/x86/elf/aesni-x86.s
d7a338
index 6e4860209f..f41d5f9ef3 100644
d7a338
--- a/lib/accelerated/x86/elf/aesni-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/aesni-x86.s
d7a338
@@ -43,6 +43,7 @@
d7a338
 .align	16
d7a338
 aesni_encrypt:
d7a338
 .L_aesni_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%eax
d7a338
 	movl	12(%esp),%edx
d7a338
 	movups	(%eax),%xmm2
d7a338
@@ -70,6 +71,7 @@ aesni_encrypt:
d7a338
 .align	16
d7a338
 aesni_decrypt:
d7a338
 .L_aesni_decrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%eax
d7a338
 	movl	12(%esp),%edx
d7a338
 	movups	(%eax),%xmm2
d7a338
@@ -95,6 +97,7 @@ aesni_decrypt:
d7a338
 .type	_aesni_encrypt2,@function
d7a338
 .align	16
d7a338
 _aesni_encrypt2:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -122,6 +125,7 @@ _aesni_encrypt2:
d7a338
 .type	_aesni_decrypt2,@function
d7a338
 .align	16
d7a338
 _aesni_decrypt2:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -149,6 +153,7 @@ _aesni_decrypt2:
d7a338
 .type	_aesni_encrypt3,@function
d7a338
 .align	16
d7a338
 _aesni_encrypt3:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -181,6 +186,7 @@ _aesni_encrypt3:
d7a338
 .type	_aesni_decrypt3,@function
d7a338
 .align	16
d7a338
 _aesni_decrypt3:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -213,6 +219,7 @@ _aesni_decrypt3:
d7a338
 .type	_aesni_encrypt4,@function
d7a338
 .align	16
d7a338
 _aesni_encrypt4:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	movups	16(%edx),%xmm1
d7a338
 	shll	$4,%ecx
d7a338
@@ -251,6 +258,7 @@ _aesni_encrypt4:
d7a338
 .type	_aesni_decrypt4,@function
d7a338
 .align	16
d7a338
 _aesni_decrypt4:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	movups	16(%edx),%xmm1
d7a338
 	shll	$4,%ecx
d7a338
@@ -289,6 +297,7 @@ _aesni_decrypt4:
d7a338
 .type	_aesni_encrypt6,@function
d7a338
 .align	16
d7a338
 _aesni_encrypt6:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -343,6 +352,7 @@ _aesni_encrypt6:
d7a338
 .type	_aesni_decrypt6,@function
d7a338
 .align	16
d7a338
 _aesni_decrypt6:
d7a338
+.byte	243,15,30,251
d7a338
 	movups	(%edx),%xmm0
d7a338
 	shll	$4,%ecx
d7a338
 	movups	16(%edx),%xmm1
d7a338
@@ -399,6 +409,7 @@ _aesni_decrypt6:
d7a338
 .align	16
d7a338
 aesni_ecb_encrypt:
d7a338
 .L_aesni_ecb_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -634,6 +645,7 @@ aesni_ecb_encrypt:
d7a338
 .align	16
d7a338
 aesni_ccm64_encrypt_blocks:
d7a338
 .L_aesni_ccm64_encrypt_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -722,6 +734,7 @@ aesni_ccm64_encrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_ccm64_decrypt_blocks:
d7a338
 .L_aesni_ccm64_decrypt_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -845,6 +858,7 @@ aesni_ccm64_decrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_ctr32_encrypt_blocks:
d7a338
 .L_aesni_ctr32_encrypt_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -1083,6 +1097,7 @@ aesni_ctr32_encrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_xts_encrypt:
d7a338
 .L_aesni_xts_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -1443,6 +1458,7 @@ aesni_xts_encrypt:
d7a338
 .align	16
d7a338
 aesni_xts_decrypt:
d7a338
 .L_aesni_xts_decrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -1833,6 +1849,7 @@ aesni_xts_decrypt:
d7a338
 .align	16
d7a338
 aesni_ocb_encrypt:
d7a338
 .L_aesni_ocb_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -2228,6 +2245,7 @@ aesni_ocb_encrypt:
d7a338
 .align	16
d7a338
 aesni_ocb_decrypt:
d7a338
 .L_aesni_ocb_decrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -2623,6 +2641,7 @@ aesni_ocb_decrypt:
d7a338
 .align	16
d7a338
 aesni_cbc_encrypt:
d7a338
 .L_aesni_cbc_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -2882,6 +2901,7 @@ aesni_cbc_encrypt:
d7a338
 .type	_aesni_set_encrypt_key,@function
d7a338
 .align	16
d7a338
 _aesni_set_encrypt_key:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	testl	%eax,%eax
d7a338
@@ -3217,6 +3237,7 @@ _aesni_set_encrypt_key:
d7a338
 .align	16
d7a338
 aesni_set_encrypt_key:
d7a338
 .L_aesni_set_encrypt_key_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%eax
d7a338
 	movl	8(%esp),%ecx
d7a338
 	movl	12(%esp),%edx
d7a338
@@ -3228,6 +3249,7 @@ aesni_set_encrypt_key:
d7a338
 .align	16
d7a338
 aesni_set_decrypt_key:
d7a338
 .L_aesni_set_decrypt_key_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%eax
d7a338
 	movl	8(%esp),%ecx
d7a338
 	movl	12(%esp),%edx
d7a338
@@ -3275,4 +3297,21 @@ aesni_set_decrypt_key:
d7a338
 .byte	115,108,46,111,114,103,62,0
d7a338
 .comm	_gnutls_x86_cpuid_s,16,4
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
+
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/aesni-x86_64.s b/lib/accelerated/x86/elf/aesni-x86_64.s
d7a338
index acc7c2c555..e3f9d5a995 100644
d7a338
--- a/lib/accelerated/x86/elf/aesni-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/aesni-x86_64.s
d7a338
@@ -44,6 +44,7 @@
d7a338
 .align	16
d7a338
 aesni_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movups	(%rdi),%xmm2
d7a338
 	movl	240(%rdx),%eax
d7a338
 	movups	(%rdx),%xmm0
d7a338
@@ -70,6 +71,7 @@ aesni_encrypt:
d7a338
 .align	16
d7a338
 aesni_decrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movups	(%rdi),%xmm2
d7a338
 	movl	240(%rdx),%eax
d7a338
 	movups	(%rdx),%xmm0
d7a338
@@ -557,6 +559,7 @@ _aesni_decrypt8:
d7a338
 .align	16
d7a338
 aesni_ecb_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	andq	$-16,%rdx
d7a338
 	jz	.Lecb_ret
d7a338
 
d7a338
@@ -901,6 +904,7 @@ aesni_ecb_encrypt:
d7a338
 .align	16
d7a338
 aesni_ccm64_encrypt_blocks:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	240(%rcx),%eax
d7a338
 	movdqu	(%r8),%xmm6
d7a338
 	movdqa	.Lincrement64(%rip),%xmm9
d7a338
@@ -966,6 +970,7 @@ aesni_ccm64_encrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_ccm64_decrypt_blocks:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	240(%rcx),%eax
d7a338
 	movups	(%r8),%xmm6
d7a338
 	movdqu	(%r9),%xmm3
d7a338
@@ -1065,6 +1070,7 @@ aesni_ccm64_decrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_ctr32_encrypt_blocks:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	cmpq	$1,%rdx
d7a338
 	jne	.Lctr32_bulk
d7a338
 
d7a338
@@ -1643,6 +1649,7 @@ aesni_ctr32_encrypt_blocks:
d7a338
 .align	16
d7a338
 aesni_xts_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	leaq	(%rsp),%r11
d7a338
 .cfi_def_cfa_register	%r11
d7a338
 	pushq	%rbp
d7a338
@@ -2113,6 +2120,7 @@ aesni_xts_encrypt:
d7a338
 .align	16
d7a338
 aesni_xts_decrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	leaq	(%rsp),%r11
d7a338
 .cfi_def_cfa_register	%r11
d7a338
 	pushq	%rbp
d7a338
@@ -2620,6 +2628,7 @@ aesni_xts_decrypt:
d7a338
 .align	32
d7a338
 aesni_ocb_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	leaq	(%rsp),%rax
d7a338
 	pushq	%rbx
d7a338
 .cfi_adjust_cfa_offset	8
d7a338
@@ -3047,6 +3056,7 @@ __ocb_encrypt1:
d7a338
 .align	32
d7a338
 aesni_ocb_decrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	leaq	(%rsp),%rax
d7a338
 	pushq	%rbx
d7a338
 .cfi_adjust_cfa_offset	8
d7a338
@@ -3484,6 +3494,7 @@ __ocb_decrypt1:
d7a338
 .align	16
d7a338
 aesni_cbc_encrypt:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	testq	%rdx,%rdx
d7a338
 	jz	.Lcbc_ret
d7a338
 
d7a338
@@ -4511,5 +4522,26 @@ __aesni_set_encrypt_key:
d7a338
 
d7a338
 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 .align	64
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/ghash-x86_64.s b/lib/accelerated/x86/elf/ghash-x86_64.s
d7a338
index 1e4d18b341..8da3f294c7 100644
d7a338
--- a/lib/accelerated/x86/elf/ghash-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/ghash-x86_64.s
d7a338
@@ -45,6 +45,7 @@
d7a338
 .align	16
d7a338
 gcm_gmult_4bit:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbx
d7a338
 .cfi_adjust_cfa_offset	8
d7a338
 .cfi_offset	%rbx,-16
d7a338
@@ -156,6 +157,7 @@ gcm_gmult_4bit:
d7a338
 .align	16
d7a338
 gcm_ghash_4bit:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbx
d7a338
 .cfi_adjust_cfa_offset	8
d7a338
 .cfi_offset	%rbx,-16
d7a338
@@ -903,6 +905,7 @@ gcm_init_clmul:
d7a338
 .align	16
d7a338
 gcm_gmult_clmul:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 .L_gmult_clmul:
d7a338
 	movdqu	(%rdi),%xmm0
d7a338
 	movdqa	.Lbswap_mask(%rip),%xmm5
d7a338
@@ -956,6 +959,7 @@ gcm_gmult_clmul:
d7a338
 .align	32
d7a338
 gcm_ghash_clmul:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 .L_ghash_clmul:
d7a338
 	movdqa	.Lbswap_mask(%rip),%xmm10
d7a338
 
d7a338
@@ -1450,6 +1454,7 @@ gcm_init_avx:
d7a338
 .align	32
d7a338
 gcm_gmult_avx:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	jmp	.L_gmult_clmul
d7a338
 .cfi_endproc	
d7a338
 .size	gcm_gmult_avx,.-gcm_gmult_avx
d7a338
@@ -1458,6 +1463,7 @@ gcm_gmult_avx:
d7a338
 .align	32
d7a338
 gcm_ghash_avx:
d7a338
 .cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	vzeroupper
d7a338
 
d7a338
 	vmovdqu	(%rdi),%xmm10
d7a338
@@ -1884,5 +1890,26 @@ gcm_ghash_avx:
d7a338
 
d7a338
 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 .align	64
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86.s b/lib/accelerated/x86/elf/sha1-ssse3-x86.s
d7a338
index 8bfbcb6b39..57b6ba58f6 100644
d7a338
--- a/lib/accelerated/x86/elf/sha1-ssse3-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/sha1-ssse3-x86.s
d7a338
@@ -43,6 +43,7 @@
d7a338
 .align	16
d7a338
 sha1_block_data_order:
d7a338
 .L_sha1_block_data_order_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -1417,4 +1418,21 @@ sha1_block_data_order:
d7a338
 .byte	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
d7a338
 .byte	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
+
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
d7a338
index d34f34497c..54095050c8 100644
d7a338
--- a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
d7a338
@@ -5487,5 +5487,26 @@ K_XX_XX:
d7a338
 .byte	0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
d7a338
 .byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 .align	64
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha256-ssse3-x86.s b/lib/accelerated/x86/elf/sha256-ssse3-x86.s
d7a338
index 8d9aaa4a81..6d16b9140e 100644
d7a338
--- a/lib/accelerated/x86/elf/sha256-ssse3-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/sha256-ssse3-x86.s
d7a338
@@ -43,6 +43,7 @@
d7a338
 .align	16
d7a338
 sha256_block_data_order:
d7a338
 .L_sha256_block_data_order_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -3384,4 +3385,21 @@ sha256_block_data_order:
d7a338
 	ret
d7a338
 .size	sha256_block_data_order,.-.L_sha256_block_data_order_begin
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
+
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha256-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha256-ssse3-x86_64.s
d7a338
index d196c6a793..1514ee45c0 100644
d7a338
--- a/lib/accelerated/x86/elf/sha256-ssse3-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/sha256-ssse3-x86_64.s
d7a338
@@ -5493,5 +5493,26 @@ sha256_block_data_order_avx2:
d7a338
 	.byte	0xf3,0xc3
d7a338
 .cfi_endproc	
d7a338
 .size	sha256_block_data_order_avx2,.-sha256_block_data_order_avx2
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86.s b/lib/accelerated/x86/elf/sha512-ssse3-x86.s
d7a338
index 481c777154..afca4eae7b 100644
d7a338
--- a/lib/accelerated/x86/elf/sha512-ssse3-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/sha512-ssse3-x86.s
d7a338
@@ -43,6 +43,7 @@
d7a338
 .align	16
d7a338
 sha512_block_data_order:
d7a338
 .L_sha512_block_data_order_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -602,4 +603,21 @@ sha512_block_data_order:
d7a338
 .byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
d7a338
 .byte	62,0
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
+
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
d7a338
index 446c06a3e6..a7be2cd444 100644
d7a338
--- a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
d7a338
@@ -5498,5 +5498,26 @@ sha512_block_data_order_avx2:
d7a338
 	.byte	0xf3,0xc3
d7a338
 .cfi_endproc	
d7a338
 .size	sha512_block_data_order_avx2,.-sha512_block_data_order_avx2
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
-- 
d7a338
2.31.1
d7a338
d7a338
diff --git a/lib/accelerated/x86/elf/e_padlock-x86.s b/lib/accelerated/x86/elf/e_padlock-x86.s
d7a338
index ed8681ee4..dd56518f6 100644
d7a338
--- a/lib/accelerated/x86/elf/e_padlock-x86.s
d7a338
+++ b/lib/accelerated/x86/elf/e_padlock-x86.s
d7a338
@@ -1,4 +1,4 @@
d7a338
-# Copyright (c) 2011-2013, Andy Polyakov <appro@openssl.org>
d7a338
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
d7a338
 # All rights reserved.
d7a338
 #
d7a338
 # Redistribution and use in source and binary forms, with or without
d7a338
@@ -37,13 +37,13 @@
d7a338
 #
d7a338
 # *** This file is auto-generated ***
d7a338
 #
d7a338
-.file	"devel/perlasm/e_padlock-x86.s"
d7a338
 .text
d7a338
 .globl	padlock_capability
d7a338
 .type	padlock_capability,@function
d7a338
 .align	16
d7a338
 padlock_capability:
d7a338
 .L_padlock_capability_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebx
d7a338
 	pushfl
d7a338
 	popl	%eax
d7a338
@@ -60,11 +60,20 @@ padlock_capability:
d7a338
 	.byte	0x0f,0xa2
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpl	$0x746e6543,%ebx
d7a338
-	jne	.L000noluck
d7a338
+	jne	.L001zhaoxin
d7a338
 	cmpl	$0x48727561,%edx
d7a338
 	jne	.L000noluck
d7a338
 	cmpl	$0x736c7561,%ecx
d7a338
 	jne	.L000noluck
d7a338
+	jmp	.L002zhaoxinEnd
d7a338
+.L001zhaoxin:
d7a338
+	cmpl	$0x68532020,%ebx
d7a338
+	jne	.L000noluck
d7a338
+	cmpl	$0x68676e61,%edx
d7a338
+	jne	.L000noluck
d7a338
+	cmpl	$0x20206961,%ecx
d7a338
+	jne	.L000noluck
d7a338
+.L002zhaoxinEnd:
d7a338
 	movl	$3221225472,%eax
d7a338
 	.byte	0x0f,0xa2
d7a338
 	movl	%eax,%edx
d7a338
@@ -95,15 +104,16 @@ padlock_capability:
d7a338
 .align	16
d7a338
 padlock_key_bswap:
d7a338
 .L_padlock_key_bswap_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%edx
d7a338
 	movl	240(%edx),%ecx
d7a338
-.L001bswap_loop:
d7a338
+.L003bswap_loop:
d7a338
 	movl	(%edx),%eax
d7a338
 	bswap	%eax
d7a338
 	movl	%eax,(%edx)
d7a338
 	leal	4(%edx),%edx
d7a338
 	subl	$1,%ecx
d7a338
-	jnz	.L001bswap_loop
d7a338
+	jnz	.L003bswap_loop
d7a338
 	ret
d7a338
 .size	padlock_key_bswap,.-.L_padlock_key_bswap_begin
d7a338
 .globl	padlock_verify_context
d7a338
@@ -111,25 +121,27 @@ padlock_key_bswap:
d7a338
 .align	16
d7a338
 padlock_verify_context:
d7a338
 .L_padlock_verify_context_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	4(%esp),%edx
d7a338
-	leal	.Lpadlock_saved_context-.L002verify_pic_point,%eax
d7a338
+	leal	.Lpadlock_saved_context-.L004verify_pic_point,%eax
d7a338
 	pushfl
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L002verify_pic_point:
d7a338
+.L004verify_pic_point:
d7a338
 	leal	4(%esp),%esp
d7a338
 	ret
d7a338
 .size	padlock_verify_context,.-.L_padlock_verify_context_begin
d7a338
 .type	_padlock_verify_ctx,@function
d7a338
 .align	16
d7a338
 _padlock_verify_ctx:
d7a338
+.byte	243,15,30,251
d7a338
 	addl	(%esp),%eax
d7a338
 	btl	$30,4(%esp)
d7a338
-	jnc	.L003verified
d7a338
+	jnc	.L005verified
d7a338
 	cmpl	(%eax),%edx
d7a338
-	je	.L003verified
d7a338
+	je	.L005verified
d7a338
 	pushfl
d7a338
 	popfl
d7a338
-.L003verified:
d7a338
+.L005verified:
d7a338
 	movl	%edx,(%eax)
d7a338
 	ret
d7a338
 .size	_padlock_verify_ctx,.-_padlock_verify_ctx
d7a338
@@ -138,6 +150,7 @@ _padlock_verify_ctx:
d7a338
 .align	16
d7a338
 padlock_reload_key:
d7a338
 .L_padlock_reload_key_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushfl
d7a338
 	popfl
d7a338
 	ret
d7a338
@@ -147,6 +160,7 @@ padlock_reload_key:
d7a338
 .align	16
d7a338
 padlock_aes_block:
d7a338
 .L_padlock_aes_block_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	pushl	%ebx
d7a338
@@ -167,6 +181,7 @@ padlock_aes_block:
d7a338
 .align	16
d7a338
 padlock_ecb_encrypt:
d7a338
 .L_padlock_ecb_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -176,25 +191,25 @@ padlock_ecb_encrypt:
d7a338
 	movl	28(%esp),%edx
d7a338
 	movl	32(%esp),%ecx
d7a338
 	testl	$15,%edx
d7a338
-	jnz	.L004ecb_abort
d7a338
+	jnz	.L006ecb_abort
d7a338
 	testl	$15,%ecx
d7a338
-	jnz	.L004ecb_abort
d7a338
-	leal	.Lpadlock_saved_context-.L005ecb_pic_point,%eax
d7a338
+	jnz	.L006ecb_abort
d7a338
+	leal	.Lpadlock_saved_context-.L007ecb_pic_point,%eax
d7a338
 	pushfl
d7a338
 	cld
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L005ecb_pic_point:
d7a338
+.L007ecb_pic_point:
d7a338
 	leal	16(%edx),%edx
d7a338
 	xorl	%eax,%eax
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%edx)
d7a338
-	jnz	.L006ecb_aligned
d7a338
+	jnz	.L008ecb_aligned
d7a338
 	testl	$15,%edi
d7a338
 	setz	%al
d7a338
 	testl	$15,%esi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
-	jnz	.L006ecb_aligned
d7a338
+	jnz	.L008ecb_aligned
d7a338
 	negl	%eax
d7a338
 	movl	$512,%ebx
d7a338
 	notl	%eax
d7a338
@@ -213,7 +228,7 @@ padlock_ecb_encrypt:
d7a338
 	andl	$-16,%esp
d7a338
 	movl	%eax,16(%ebp)
d7a338
 	cmpl	%ebx,%ecx
d7a338
-	ja	.L007ecb_loop
d7a338
+	ja	.L009ecb_loop
d7a338
 	movl	%esi,%eax
d7a338
 	cmpl	%esp,%ebp
d7a338
 	cmovel	%edi,%eax
d7a338
@@ -224,10 +239,10 @@ padlock_ecb_encrypt:
d7a338
 	movl	$-128,%eax
d7a338
 	cmovael	%ebx,%eax
d7a338
 	andl	%eax,%ebx
d7a338
-	jz	.L008ecb_unaligned_tail
d7a338
-	jmp	.L007ecb_loop
d7a338
+	jz	.L010ecb_unaligned_tail
d7a338
+	jmp	.L009ecb_loop
d7a338
 .align	16
d7a338
-.L007ecb_loop:
d7a338
+.L009ecb_loop:
d7a338
 	movl	%edi,(%ebp)
d7a338
 	movl	%esi,4(%ebp)
d7a338
 	movl	%ecx,8(%ebp)
d7a338
@@ -236,13 +251,13 @@ padlock_ecb_encrypt:
d7a338
 	testl	$15,%edi
d7a338
 	cmovnzl	%esp,%edi
d7a338
 	testl	$15,%esi
d7a338
-	jz	.L009ecb_inp_aligned
d7a338
+	jz	.L011ecb_inp_aligned
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
 	movl	%edi,%esi
d7a338
-.L009ecb_inp_aligned:
d7a338
+.L011ecb_inp_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
@@ -250,23 +265,23 @@ padlock_ecb_encrypt:
d7a338
 	movl	(%ebp),%edi
d7a338
 	movl	12(%ebp),%ebx
d7a338
 	testl	$15,%edi
d7a338
-	jz	.L010ecb_out_aligned
d7a338
+	jz	.L012ecb_out_aligned
d7a338
 	movl	%ebx,%ecx
d7a338
 	leal	(%esp),%esi
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
-.L010ecb_out_aligned:
d7a338
+.L012ecb_out_aligned:
d7a338
 	movl	4(%ebp),%esi
d7a338
 	movl	8(%ebp),%ecx
d7a338
 	addl	%ebx,%edi
d7a338
 	addl	%ebx,%esi
d7a338
 	subl	%ebx,%ecx
d7a338
 	movl	$512,%ebx
d7a338
-	jz	.L011ecb_break
d7a338
+	jz	.L013ecb_break
d7a338
 	cmpl	%ebx,%ecx
d7a338
-	jae	.L007ecb_loop
d7a338
-.L008ecb_unaligned_tail:
d7a338
+	jae	.L009ecb_loop
d7a338
+.L010ecb_unaligned_tail:
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpl	%ebp,%esp
d7a338
 	cmovel	%ecx,%eax
d7a338
@@ -279,24 +294,24 @@ padlock_ecb_encrypt:
d7a338
 	movl	%esp,%esi
d7a338
 	movl	%eax,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
-	jmp	.L007ecb_loop
d7a338
+	jmp	.L009ecb_loop
d7a338
 .align	16
d7a338
-.L011ecb_break:
d7a338
+.L013ecb_break:
d7a338
 	cmpl	%ebp,%esp
d7a338
-	je	.L012ecb_done
d7a338
+	je	.L014ecb_done
d7a338
 	pxor	%xmm0,%xmm0
d7a338
 	leal	(%esp),%eax
d7a338
-.L013ecb_bzero:
d7a338
+.L015ecb_bzero:
d7a338
 	movaps	%xmm0,(%eax)
d7a338
 	leal	16(%eax),%eax
d7a338
 	cmpl	%eax,%ebp
d7a338
-	ja	.L013ecb_bzero
d7a338
-.L012ecb_done:
d7a338
+	ja	.L015ecb_bzero
d7a338
+.L014ecb_done:
d7a338
 	movl	16(%ebp),%ebp
d7a338
 	leal	24(%ebp),%esp
d7a338
-	jmp	.L014ecb_exit
d7a338
+	jmp	.L016ecb_exit
d7a338
 .align	16
d7a338
-.L006ecb_aligned:
d7a338
+.L008ecb_aligned:
d7a338
 	leal	(%esi,%ecx,1),%ebp
d7a338
 	negl	%ebp
d7a338
 	andl	$4095,%ebp
d7a338
@@ -306,14 +321,14 @@ padlock_ecb_encrypt:
d7a338
 	cmovael	%eax,%ebp
d7a338
 	andl	%ecx,%ebp
d7a338
 	subl	%ebp,%ecx
d7a338
-	jz	.L015ecb_aligned_tail
d7a338
+	jz	.L017ecb_aligned_tail
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
 .byte	243,15,167,200
d7a338
 	testl	%ebp,%ebp
d7a338
-	jz	.L014ecb_exit
d7a338
-.L015ecb_aligned_tail:
d7a338
+	jz	.L016ecb_exit
d7a338
+.L017ecb_aligned_tail:
d7a338
 	movl	%ebp,%ecx
d7a338
 	leal	-24(%esp),%ebp
d7a338
 	movl	%ebp,%esp
d7a338
@@ -330,11 +345,11 @@ padlock_ecb_encrypt:
d7a338
 	movl	%esp,%esi
d7a338
 	movl	%eax,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
-	jmp	.L007ecb_loop
d7a338
-.L014ecb_exit:
d7a338
+	jmp	.L009ecb_loop
d7a338
+.L016ecb_exit:
d7a338
 	movl	$1,%eax
d7a338
 	leal	4(%esp),%esp
d7a338
-.L004ecb_abort:
d7a338
+.L006ecb_abort:
d7a338
 	popl	%edi
d7a338
 	popl	%esi
d7a338
 	popl	%ebx
d7a338
@@ -346,6 +361,7 @@ padlock_ecb_encrypt:
d7a338
 .align	16
d7a338
 padlock_cbc_encrypt:
d7a338
 .L_padlock_cbc_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -355,25 +371,25 @@ padlock_cbc_encrypt:
d7a338
 	movl	28(%esp),%edx
d7a338
 	movl	32(%esp),%ecx
d7a338
 	testl	$15,%edx
d7a338
-	jnz	.L016cbc_abort
d7a338
+	jnz	.L018cbc_abort
d7a338
 	testl	$15,%ecx
d7a338
-	jnz	.L016cbc_abort
d7a338
-	leal	.Lpadlock_saved_context-.L017cbc_pic_point,%eax
d7a338
+	jnz	.L018cbc_abort
d7a338
+	leal	.Lpadlock_saved_context-.L019cbc_pic_point,%eax
d7a338
 	pushfl
d7a338
 	cld
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L017cbc_pic_point:
d7a338
+.L019cbc_pic_point:
d7a338
 	leal	16(%edx),%edx
d7a338
 	xorl	%eax,%eax
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%edx)
d7a338
-	jnz	.L018cbc_aligned
d7a338
+	jnz	.L020cbc_aligned
d7a338
 	testl	$15,%edi
d7a338
 	setz	%al
d7a338
 	testl	$15,%esi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
-	jnz	.L018cbc_aligned
d7a338
+	jnz	.L020cbc_aligned
d7a338
 	negl	%eax
d7a338
 	movl	$512,%ebx
d7a338
 	notl	%eax
d7a338
@@ -392,7 +408,7 @@ padlock_cbc_encrypt:
d7a338
 	andl	$-16,%esp
d7a338
 	movl	%eax,16(%ebp)
d7a338
 	cmpl	%ebx,%ecx
d7a338
-	ja	.L019cbc_loop
d7a338
+	ja	.L021cbc_loop
d7a338
 	movl	%esi,%eax
d7a338
 	cmpl	%esp,%ebp
d7a338
 	cmovel	%edi,%eax
d7a338
@@ -403,10 +419,10 @@ padlock_cbc_encrypt:
d7a338
 	movl	$-64,%eax
d7a338
 	cmovael	%ebx,%eax
d7a338
 	andl	%eax,%ebx
d7a338
-	jz	.L020cbc_unaligned_tail
d7a338
-	jmp	.L019cbc_loop
d7a338
+	jz	.L022cbc_unaligned_tail
d7a338
+	jmp	.L021cbc_loop
d7a338
 .align	16
d7a338
-.L019cbc_loop:
d7a338
+.L021cbc_loop:
d7a338
 	movl	%edi,(%ebp)
d7a338
 	movl	%esi,4(%ebp)
d7a338
 	movl	%ecx,8(%ebp)
d7a338
@@ -415,13 +431,13 @@ padlock_cbc_encrypt:
d7a338
 	testl	$15,%edi
d7a338
 	cmovnzl	%esp,%edi
d7a338
 	testl	$15,%esi
d7a338
-	jz	.L021cbc_inp_aligned
d7a338
+	jz	.L023cbc_inp_aligned
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
 	movl	%edi,%esi
d7a338
-.L021cbc_inp_aligned:
d7a338
+.L023cbc_inp_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
@@ -431,23 +447,23 @@ padlock_cbc_encrypt:
d7a338
 	movl	(%ebp),%edi
d7a338
 	movl	12(%ebp),%ebx
d7a338
 	testl	$15,%edi
d7a338
-	jz	.L022cbc_out_aligned
d7a338
+	jz	.L024cbc_out_aligned
d7a338
 	movl	%ebx,%ecx
d7a338
 	leal	(%esp),%esi
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
-.L022cbc_out_aligned:
d7a338
+.L024cbc_out_aligned:
d7a338
 	movl	4(%ebp),%esi
d7a338
 	movl	8(%ebp),%ecx
d7a338
 	addl	%ebx,%edi
d7a338
 	addl	%ebx,%esi
d7a338
 	subl	%ebx,%ecx
d7a338
 	movl	$512,%ebx
d7a338
-	jz	.L023cbc_break
d7a338
+	jz	.L025cbc_break
d7a338
 	cmpl	%ebx,%ecx
d7a338
-	jae	.L019cbc_loop
d7a338
-.L020cbc_unaligned_tail:
d7a338
+	jae	.L021cbc_loop
d7a338
+.L022cbc_unaligned_tail:
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpl	%ebp,%esp
d7a338
 	cmovel	%ecx,%eax
d7a338
@@ -460,24 +476,24 @@ padlock_cbc_encrypt:
d7a338
 	movl	%esp,%esi
d7a338
 	movl	%eax,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
-	jmp	.L019cbc_loop
d7a338
+	jmp	.L021cbc_loop
d7a338
 .align	16
d7a338
-.L023cbc_break:
d7a338
+.L025cbc_break:
d7a338
 	cmpl	%ebp,%esp
d7a338
-	je	.L024cbc_done
d7a338
+	je	.L026cbc_done
d7a338
 	pxor	%xmm0,%xmm0
d7a338
 	leal	(%esp),%eax
d7a338
-.L025cbc_bzero:
d7a338
+.L027cbc_bzero:
d7a338
 	movaps	%xmm0,(%eax)
d7a338
 	leal	16(%eax),%eax
d7a338
 	cmpl	%eax,%ebp
d7a338
-	ja	.L025cbc_bzero
d7a338
-.L024cbc_done:
d7a338
+	ja	.L027cbc_bzero
d7a338
+.L026cbc_done:
d7a338
 	movl	16(%ebp),%ebp
d7a338
 	leal	24(%ebp),%esp
d7a338
-	jmp	.L026cbc_exit
d7a338
+	jmp	.L028cbc_exit
d7a338
 .align	16
d7a338
-.L018cbc_aligned:
d7a338
+.L020cbc_aligned:
d7a338
 	leal	(%esi,%ecx,1),%ebp
d7a338
 	negl	%ebp
d7a338
 	andl	$4095,%ebp
d7a338
@@ -487,7 +503,7 @@ padlock_cbc_encrypt:
d7a338
 	cmovael	%eax,%ebp
d7a338
 	andl	%ecx,%ebp
d7a338
 	subl	%ebp,%ecx
d7a338
-	jz	.L027cbc_aligned_tail
d7a338
+	jz	.L029cbc_aligned_tail
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
@@ -495,8 +511,8 @@ padlock_cbc_encrypt:
d7a338
 	movaps	(%eax),%xmm0
d7a338
 	movaps	%xmm0,-16(%edx)
d7a338
 	testl	%ebp,%ebp
d7a338
-	jz	.L026cbc_exit
d7a338
-.L027cbc_aligned_tail:
d7a338
+	jz	.L028cbc_exit
d7a338
+.L029cbc_aligned_tail:
d7a338
 	movl	%ebp,%ecx
d7a338
 	leal	-24(%esp),%ebp
d7a338
 	movl	%ebp,%esp
d7a338
@@ -513,11 +529,11 @@ padlock_cbc_encrypt:
d7a338
 	movl	%esp,%esi
d7a338
 	movl	%eax,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
-	jmp	.L019cbc_loop
d7a338
-.L026cbc_exit:
d7a338
+	jmp	.L021cbc_loop
d7a338
+.L028cbc_exit:
d7a338
 	movl	$1,%eax
d7a338
 	leal	4(%esp),%esp
d7a338
-.L016cbc_abort:
d7a338
+.L018cbc_abort:
d7a338
 	popl	%edi
d7a338
 	popl	%esi
d7a338
 	popl	%ebx
d7a338
@@ -529,6 +545,7 @@ padlock_cbc_encrypt:
d7a338
 .align	16
d7a338
 padlock_cfb_encrypt:
d7a338
 .L_padlock_cfb_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -538,25 +555,25 @@ padlock_cfb_encrypt:
d7a338
 	movl	28(%esp),%edx
d7a338
 	movl	32(%esp),%ecx
d7a338
 	testl	$15,%edx
d7a338
-	jnz	.L028cfb_abort
d7a338
+	jnz	.L030cfb_abort
d7a338
 	testl	$15,%ecx
d7a338
-	jnz	.L028cfb_abort
d7a338
-	leal	.Lpadlock_saved_context-.L029cfb_pic_point,%eax
d7a338
+	jnz	.L030cfb_abort
d7a338
+	leal	.Lpadlock_saved_context-.L031cfb_pic_point,%eax
d7a338
 	pushfl
d7a338
 	cld
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L029cfb_pic_point:
d7a338
+.L031cfb_pic_point:
d7a338
 	leal	16(%edx),%edx
d7a338
 	xorl	%eax,%eax
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%edx)
d7a338
-	jnz	.L030cfb_aligned
d7a338
+	jnz	.L032cfb_aligned
d7a338
 	testl	$15,%edi
d7a338
 	setz	%al
d7a338
 	testl	$15,%esi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
-	jnz	.L030cfb_aligned
d7a338
+	jnz	.L032cfb_aligned
d7a338
 	negl	%eax
d7a338
 	movl	$512,%ebx
d7a338
 	notl	%eax
d7a338
@@ -574,9 +591,9 @@ padlock_cfb_encrypt:
d7a338
 	andl	$-16,%ebp
d7a338
 	andl	$-16,%esp
d7a338
 	movl	%eax,16(%ebp)
d7a338
-	jmp	.L031cfb_loop
d7a338
+	jmp	.L033cfb_loop
d7a338
 .align	16
d7a338
-.L031cfb_loop:
d7a338
+.L033cfb_loop:
d7a338
 	movl	%edi,(%ebp)
d7a338
 	movl	%esi,4(%ebp)
d7a338
 	movl	%ecx,8(%ebp)
d7a338
@@ -585,13 +602,13 @@ padlock_cfb_encrypt:
d7a338
 	testl	$15,%edi
d7a338
 	cmovnzl	%esp,%edi
d7a338
 	testl	$15,%esi
d7a338
-	jz	.L032cfb_inp_aligned
d7a338
+	jz	.L034cfb_inp_aligned
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
 	movl	%edi,%esi
d7a338
-.L032cfb_inp_aligned:
d7a338
+.L034cfb_inp_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
@@ -601,45 +618,45 @@ padlock_cfb_encrypt:
d7a338
 	movl	(%ebp),%edi
d7a338
 	movl	12(%ebp),%ebx
d7a338
 	testl	$15,%edi
d7a338
-	jz	.L033cfb_out_aligned
d7a338
+	jz	.L035cfb_out_aligned
d7a338
 	movl	%ebx,%ecx
d7a338
 	leal	(%esp),%esi
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
-.L033cfb_out_aligned:
d7a338
+.L035cfb_out_aligned:
d7a338
 	movl	4(%ebp),%esi
d7a338
 	movl	8(%ebp),%ecx
d7a338
 	addl	%ebx,%edi
d7a338
 	addl	%ebx,%esi
d7a338
 	subl	%ebx,%ecx
d7a338
 	movl	$512,%ebx
d7a338
-	jnz	.L031cfb_loop
d7a338
+	jnz	.L033cfb_loop
d7a338
 	cmpl	%ebp,%esp
d7a338
-	je	.L034cfb_done
d7a338
+	je	.L036cfb_done
d7a338
 	pxor	%xmm0,%xmm0
d7a338
 	leal	(%esp),%eax
d7a338
-.L035cfb_bzero:
d7a338
+.L037cfb_bzero:
d7a338
 	movaps	%xmm0,(%eax)
d7a338
 	leal	16(%eax),%eax
d7a338
 	cmpl	%eax,%ebp
d7a338
-	ja	.L035cfb_bzero
d7a338
-.L034cfb_done:
d7a338
+	ja	.L037cfb_bzero
d7a338
+.L036cfb_done:
d7a338
 	movl	16(%ebp),%ebp
d7a338
 	leal	24(%ebp),%esp
d7a338
-	jmp	.L036cfb_exit
d7a338
+	jmp	.L038cfb_exit
d7a338
 .align	16
d7a338
-.L030cfb_aligned:
d7a338
+.L032cfb_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
 .byte	243,15,167,224
d7a338
 	movaps	(%eax),%xmm0
d7a338
 	movaps	%xmm0,-16(%edx)
d7a338
-.L036cfb_exit:
d7a338
+.L038cfb_exit:
d7a338
 	movl	$1,%eax
d7a338
 	leal	4(%esp),%esp
d7a338
-.L028cfb_abort:
d7a338
+.L030cfb_abort:
d7a338
 	popl	%edi
d7a338
 	popl	%esi
d7a338
 	popl	%ebx
d7a338
@@ -651,6 +668,7 @@ padlock_cfb_encrypt:
d7a338
 .align	16
d7a338
 padlock_ofb_encrypt:
d7a338
 .L_padlock_ofb_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -660,25 +678,25 @@ padlock_ofb_encrypt:
d7a338
 	movl	28(%esp),%edx
d7a338
 	movl	32(%esp),%ecx
d7a338
 	testl	$15,%edx
d7a338
-	jnz	.L037ofb_abort
d7a338
+	jnz	.L039ofb_abort
d7a338
 	testl	$15,%ecx
d7a338
-	jnz	.L037ofb_abort
d7a338
-	leal	.Lpadlock_saved_context-.L038ofb_pic_point,%eax
d7a338
+	jnz	.L039ofb_abort
d7a338
+	leal	.Lpadlock_saved_context-.L040ofb_pic_point,%eax
d7a338
 	pushfl
d7a338
 	cld
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L038ofb_pic_point:
d7a338
+.L040ofb_pic_point:
d7a338
 	leal	16(%edx),%edx
d7a338
 	xorl	%eax,%eax
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%edx)
d7a338
-	jnz	.L039ofb_aligned
d7a338
+	jnz	.L041ofb_aligned
d7a338
 	testl	$15,%edi
d7a338
 	setz	%al
d7a338
 	testl	$15,%esi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
-	jnz	.L039ofb_aligned
d7a338
+	jnz	.L041ofb_aligned
d7a338
 	negl	%eax
d7a338
 	movl	$512,%ebx
d7a338
 	notl	%eax
d7a338
@@ -696,9 +714,9 @@ padlock_ofb_encrypt:
d7a338
 	andl	$-16,%ebp
d7a338
 	andl	$-16,%esp
d7a338
 	movl	%eax,16(%ebp)
d7a338
-	jmp	.L040ofb_loop
d7a338
+	jmp	.L042ofb_loop
d7a338
 .align	16
d7a338
-.L040ofb_loop:
d7a338
+.L042ofb_loop:
d7a338
 	movl	%edi,(%ebp)
d7a338
 	movl	%esi,4(%ebp)
d7a338
 	movl	%ecx,8(%ebp)
d7a338
@@ -707,13 +725,13 @@ padlock_ofb_encrypt:
d7a338
 	testl	$15,%edi
d7a338
 	cmovnzl	%esp,%edi
d7a338
 	testl	$15,%esi
d7a338
-	jz	.L041ofb_inp_aligned
d7a338
+	jz	.L043ofb_inp_aligned
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
 	movl	%ebx,%ecx
d7a338
 	movl	%edi,%esi
d7a338
-.L041ofb_inp_aligned:
d7a338
+.L043ofb_inp_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
@@ -723,45 +741,45 @@ padlock_ofb_encrypt:
d7a338
 	movl	(%ebp),%edi
d7a338
 	movl	12(%ebp),%ebx
d7a338
 	testl	$15,%edi
d7a338
-	jz	.L042ofb_out_aligned
d7a338
+	jz	.L044ofb_out_aligned
d7a338
 	movl	%ebx,%ecx
d7a338
 	leal	(%esp),%esi
d7a338
 	shrl	$2,%ecx
d7a338
 .byte	243,165
d7a338
 	subl	%ebx,%edi
d7a338
-.L042ofb_out_aligned:
d7a338
+.L044ofb_out_aligned:
d7a338
 	movl	4(%ebp),%esi
d7a338
 	movl	8(%ebp),%ecx
d7a338
 	addl	%ebx,%edi
d7a338
 	addl	%ebx,%esi
d7a338
 	subl	%ebx,%ecx
d7a338
 	movl	$512,%ebx
d7a338
-	jnz	.L040ofb_loop
d7a338
+	jnz	.L042ofb_loop
d7a338
 	cmpl	%ebp,%esp
d7a338
-	je	.L043ofb_done
d7a338
+	je	.L045ofb_done
d7a338
 	pxor	%xmm0,%xmm0
d7a338
 	leal	(%esp),%eax
d7a338
-.L044ofb_bzero:
d7a338
+.L046ofb_bzero:
d7a338
 	movaps	%xmm0,(%eax)
d7a338
 	leal	16(%eax),%eax
d7a338
 	cmpl	%eax,%ebp
d7a338
-	ja	.L044ofb_bzero
d7a338
-.L043ofb_done:
d7a338
+	ja	.L046ofb_bzero
d7a338
+.L045ofb_done:
d7a338
 	movl	16(%ebp),%ebp
d7a338
 	leal	24(%ebp),%esp
d7a338
-	jmp	.L045ofb_exit
d7a338
+	jmp	.L047ofb_exit
d7a338
 .align	16
d7a338
-.L039ofb_aligned:
d7a338
+.L041ofb_aligned:
d7a338
 	leal	-16(%edx),%eax
d7a338
 	leal	16(%edx),%ebx
d7a338
 	shrl	$4,%ecx
d7a338
 .byte	243,15,167,232
d7a338
 	movaps	(%eax),%xmm0
d7a338
 	movaps	%xmm0,-16(%edx)
d7a338
-.L045ofb_exit:
d7a338
+.L047ofb_exit:
d7a338
 	movl	$1,%eax
d7a338
 	leal	4(%esp),%esp
d7a338
-.L037ofb_abort:
d7a338
+.L039ofb_abort:
d7a338
 	popl	%edi
d7a338
 	popl	%esi
d7a338
 	popl	%ebx
d7a338
@@ -773,6 +791,7 @@ padlock_ofb_encrypt:
d7a338
 .align	16
d7a338
 padlock_ctr32_encrypt:
d7a338
 .L_padlock_ctr32_encrypt_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%ebp
d7a338
 	pushl	%ebx
d7a338
 	pushl	%esi
d7a338
@@ -782,14 +801,14 @@ padlock_ctr32_encrypt:
d7a338
 	movl	28(%esp),%edx
d7a338
 	movl	32(%esp),%ecx
d7a338
 	testl	$15,%edx
d7a338
-	jnz	.L046ctr32_abort
d7a338
+	jnz	.L048ctr32_abort
d7a338
 	testl	$15,%ecx
d7a338
-	jnz	.L046ctr32_abort
d7a338
-	leal	.Lpadlock_saved_context-.L047ctr32_pic_point,%eax
d7a338
+	jnz	.L048ctr32_abort
d7a338
+	leal	.Lpadlock_saved_context-.L049ctr32_pic_point,%eax
d7a338
 	pushfl
d7a338
 	cld
d7a338
 	call	_padlock_verify_ctx
d7a338
-.L047ctr32_pic_point:
d7a338
+.L049ctr32_pic_point:
d7a338
 	leal	16(%edx),%edx
d7a338
 	xorl	%eax,%eax
d7a338
 	movq	-16(%edx),%mm0
d7a338
@@ -809,9 +828,9 @@ padlock_ctr32_encrypt:
d7a338
 	andl	$-16,%ebp
d7a338
 	andl	$-16,%esp
d7a338
 	movl	%eax,16(%ebp)
d7a338
-	jmp	.L048ctr32_loop
d7a338
+	jmp	.L050ctr32_loop
d7a338
 .align	16
d7a338
-.L048ctr32_loop:
d7a338
+.L050ctr32_loop:
d7a338
 	movl	%edi,(%ebp)
d7a338
 	movl	%esi,4(%ebp)
d7a338
 	movl	%ecx,8(%ebp)
d7a338
@@ -820,7 +839,7 @@ padlock_ctr32_encrypt:
d7a338
 	movl	-4(%edx),%ecx
d7a338
 	xorl	%edi,%edi
d7a338
 	movl	-8(%edx),%eax
d7a338
-.L049ctr32_prepare:
d7a338
+.L051ctr32_prepare:
d7a338
 	movl	%ecx,12(%esp,%edi,1)
d7a338
 	bswap	%ecx
d7a338
 	movq	%mm0,(%esp,%edi,1)
d7a338
@@ -829,7 +848,7 @@ padlock_ctr32_encrypt:
d7a338
 	bswap	%ecx
d7a338
 	leal	16(%edi),%edi
d7a338
 	cmpl	%ebx,%edi
d7a338
-	jb	.L049ctr32_prepare
d7a338
+	jb	.L051ctr32_prepare
d7a338
 	movl	%ecx,-4(%edx)
d7a338
 	leal	(%esp),%esi
d7a338
 	leal	(%esp),%edi
d7a338
@@ -842,33 +861,33 @@ padlock_ctr32_encrypt:
d7a338
 	movl	12(%ebp),%ebx
d7a338
 	movl	4(%ebp),%esi
d7a338
 	xorl	%ecx,%ecx
d7a338
-.L050ctr32_xor:
d7a338
+.L052ctr32_xor:
d7a338
 	movups	(%esi,%ecx,1),%xmm1
d7a338
 	leal	16(%ecx),%ecx
d7a338
 	pxor	-16(%esp,%ecx,1),%xmm1
d7a338
 	movups	%xmm1,-16(%edi,%ecx,1)
d7a338
 	cmpl	%ebx,%ecx
d7a338
-	jb	.L050ctr32_xor
d7a338
+	jb	.L052ctr32_xor
d7a338
 	movl	8(%ebp),%ecx
d7a338
 	addl	%ebx,%edi
d7a338
 	addl	%ebx,%esi
d7a338
 	subl	%ebx,%ecx
d7a338
 	movl	$512,%ebx
d7a338
-	jnz	.L048ctr32_loop
d7a338
+	jnz	.L050ctr32_loop
d7a338
 	pxor	%xmm0,%xmm0
d7a338
 	leal	(%esp),%eax
d7a338
-.L051ctr32_bzero:
d7a338
+.L053ctr32_bzero:
d7a338
 	movaps	%xmm0,(%eax)
d7a338
 	leal	16(%eax),%eax
d7a338
 	cmpl	%eax,%ebp
d7a338
-	ja	.L051ctr32_bzero
d7a338
-.L052ctr32_done:
d7a338
+	ja	.L053ctr32_bzero
d7a338
+.L054ctr32_done:
d7a338
 	movl	16(%ebp),%ebp
d7a338
 	leal	24(%ebp),%esp
d7a338
 	movl	$1,%eax
d7a338
 	leal	4(%esp),%esp
d7a338
 	emms
d7a338
-.L046ctr32_abort:
d7a338
+.L048ctr32_abort:
d7a338
 	popl	%edi
d7a338
 	popl	%esi
d7a338
 	popl	%ebx
d7a338
@@ -880,6 +899,7 @@ padlock_ctr32_encrypt:
d7a338
 .align	16
d7a338
 padlock_xstore:
d7a338
 .L_padlock_xstore_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	movl	8(%esp),%edi
d7a338
 	movl	12(%esp),%edx
d7a338
@@ -890,14 +910,15 @@ padlock_xstore:
d7a338
 .type	_win32_segv_handler,@function
d7a338
 .align	16
d7a338
 _win32_segv_handler:
d7a338
+.byte	243,15,30,251
d7a338
 	movl	$1,%eax
d7a338
 	movl	4(%esp),%edx
d7a338
 	movl	12(%esp),%ecx
d7a338
 	cmpl	$3221225477,(%edx)
d7a338
-	jne	.L053ret
d7a338
+	jne	.L055ret
d7a338
 	addl	$4,184(%ecx)
d7a338
 	movl	$0,%eax
d7a338
-.L053ret:
d7a338
+.L055ret:
d7a338
 	ret
d7a338
 .size	_win32_segv_handler,.-_win32_segv_handler
d7a338
 .globl	padlock_sha1_oneshot
d7a338
@@ -905,6 +926,7 @@ _win32_segv_handler:
d7a338
 .align	16
d7a338
 padlock_sha1_oneshot:
d7a338
 .L_padlock_sha1_oneshot_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	xorl	%eax,%eax
d7a338
@@ -936,6 +958,7 @@ padlock_sha1_oneshot:
d7a338
 .align	16
d7a338
 padlock_sha1_blocks:
d7a338
 .L_padlock_sha1_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	movl	12(%esp),%edi
d7a338
@@ -966,6 +989,7 @@ padlock_sha1_blocks:
d7a338
 .align	16
d7a338
 padlock_sha256_oneshot:
d7a338
 .L_padlock_sha256_oneshot_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	xorl	%eax,%eax
d7a338
@@ -997,6 +1021,7 @@ padlock_sha256_oneshot:
d7a338
 .align	16
d7a338
 padlock_sha256_blocks:
d7a338
 .L_padlock_sha256_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	movl	12(%esp),%edi
d7a338
@@ -1027,6 +1052,7 @@ padlock_sha256_blocks:
d7a338
 .align	16
d7a338
 padlock_sha512_blocks:
d7a338
 .L_padlock_sha512_blocks_begin:
d7a338
+.byte	243,15,30,251
d7a338
 	pushl	%edi
d7a338
 	pushl	%esi
d7a338
 	movl	12(%esp),%edi
d7a338
@@ -1069,7 +1095,21 @@ padlock_sha512_blocks:
d7a338
 .Lpadlock_saved_context:
d7a338
 .long	0
d7a338
 
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 2
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	.asciz "GNU"
d7a338
+1:
d7a338
+	.p2align 2
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 2
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
-
d7a338
-
d7a338
diff --git a/lib/accelerated/x86/elf/e_padlock-x86_64.s b/lib/accelerated/x86/elf/e_padlock-x86_64.s
d7a338
index c161f0a73..f92da756c 100644
d7a338
--- a/lib/accelerated/x86/elf/e_padlock-x86_64.s
d7a338
+++ b/lib/accelerated/x86/elf/e_padlock-x86_64.s
d7a338
@@ -1,4 +1,4 @@
d7a338
-# Copyright (c) 2011-2013, Andy Polyakov <appro@openssl.org>
d7a338
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
d7a338
 # All rights reserved.
d7a338
 #
d7a338
 # Redistribution and use in source and binary forms, with or without
d7a338
@@ -42,36 +42,50 @@
d7a338
 .type	padlock_capability,@function
d7a338
 .align	16
d7a338
 padlock_capability:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rbx,%r8
d7a338
 	xorl	%eax,%eax
d7a338
 	cpuid
d7a338
 	xorl	%eax,%eax
d7a338
-	cmpl	$1953391939,%ebx
d7a338
+	cmpl	$0x746e6543,%ebx
d7a338
+	jne	.Lzhaoxin
d7a338
+	cmpl	$0x48727561,%edx
d7a338
 	jne	.Lnoluck
d7a338
-	cmpl	$1215460705,%edx
d7a338
+	cmpl	$0x736c7561,%ecx
d7a338
 	jne	.Lnoluck
d7a338
-	cmpl	$1936487777,%ecx
d7a338
+	jmp	.LzhaoxinEnd
d7a338
+.Lzhaoxin:
d7a338
+	cmpl	$0x68532020,%ebx
d7a338
 	jne	.Lnoluck
d7a338
-	movl	$3221225472,%eax
d7a338
+	cmpl	$0x68676e61,%edx
d7a338
+	jne	.Lnoluck
d7a338
+	cmpl	$0x20206961,%ecx
d7a338
+	jne	.Lnoluck
d7a338
+.LzhaoxinEnd:
d7a338
+	movl	$0xC0000000,%eax
d7a338
 	cpuid
d7a338
 	movl	%eax,%edx
d7a338
 	xorl	%eax,%eax
d7a338
-	cmpl	$3221225473,%edx
d7a338
+	cmpl	$0xC0000001,%edx
d7a338
 	jb	.Lnoluck
d7a338
-	movl	$3221225473,%eax
d7a338
+	movl	$0xC0000001,%eax
d7a338
 	cpuid
d7a338
 	movl	%edx,%eax
d7a338
-	andl	$4294967279,%eax
d7a338
-	orl	$16,%eax
d7a338
+	andl	$0xffffffef,%eax
d7a338
+	orl	$0x10,%eax
d7a338
 .Lnoluck:
d7a338
 	movq	%r8,%rbx
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_capability,.-padlock_capability
d7a338
 
d7a338
 .globl	padlock_key_bswap
d7a338
 .type	padlock_key_bswap,@function
d7a338
 .align	16
d7a338
 padlock_key_bswap:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	240(%rdi),%edx
d7a338
 .Lbswap_loop:
d7a338
 	movl	(%rdi),%eax
d7a338
@@ -81,23 +95,29 @@ padlock_key_bswap:
d7a338
 	subl	$1,%edx
d7a338
 	jnz	.Lbswap_loop
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_key_bswap,.-padlock_key_bswap
d7a338
 
d7a338
 .globl	padlock_verify_context
d7a338
 .type	padlock_verify_context,@function
d7a338
 .align	16
d7a338
 padlock_verify_context:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdi,%rdx
d7a338
 	pushf
d7a338
 	leaq	.Lpadlock_saved_context(%rip),%rax
d7a338
 	call	_padlock_verify_ctx
d7a338
 	leaq	8(%rsp),%rsp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_verify_context,.-padlock_verify_context
d7a338
 
d7a338
 .type	_padlock_verify_ctx,@function
d7a338
 .align	16
d7a338
 _padlock_verify_ctx:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	8(%rsp),%r8
d7a338
 	btq	$30,%r8
d7a338
 	jnc	.Lverified
d7a338
@@ -108,43 +128,55 @@ _padlock_verify_ctx:
d7a338
 .Lverified:
d7a338
 	movq	%rdx,(%rax)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	_padlock_verify_ctx,.-_padlock_verify_ctx
d7a338
 
d7a338
 .globl	padlock_reload_key
d7a338
 .type	padlock_reload_key,@function
d7a338
 .align	16
d7a338
 padlock_reload_key:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushf
d7a338
 	popf
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_reload_key,.-padlock_reload_key
d7a338
 
d7a338
 .globl	padlock_aes_block
d7a338
 .type	padlock_aes_block,@function
d7a338
 .align	16
d7a338
 padlock_aes_block:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rbx,%r8
d7a338
 	movq	$1,%rcx
d7a338
 	leaq	32(%rdx),%rbx
d7a338
 	leaq	16(%rdx),%rdx
d7a338
-.byte	0xf3,0x0f,0xa7,0xc8	
d7a338
+.byte	0xf3,0x0f,0xa7,0xc8
d7a338
 	movq	%r8,%rbx
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_aes_block,.-padlock_aes_block
d7a338
 
d7a338
 .globl	padlock_xstore
d7a338
 .type	padlock_xstore,@function
d7a338
 .align	16
d7a338
 padlock_xstore:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movl	%esi,%edx
d7a338
-.byte	0x0f,0xa7,0xc0		
d7a338
+.byte	0x0f,0xa7,0xc0
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_xstore,.-padlock_xstore
d7a338
 
d7a338
 .globl	padlock_sha1_oneshot
d7a338
 .type	padlock_sha1_oneshot,@function
d7a338
 .align	16
d7a338
 padlock_sha1_oneshot:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdx,%rcx
d7a338
 	movq	%rdi,%rdx
d7a338
 	movups	(%rdi),%xmm0
d7a338
@@ -154,19 +186,22 @@ padlock_sha1_oneshot:
d7a338
 	movq	%rsp,%rdi
d7a338
 	movl	%eax,16(%rsp)
d7a338
 	xorq	%rax,%rax
d7a338
-.byte	0xf3,0x0f,0xa6,0xc8	
d7a338
+.byte	0xf3,0x0f,0xa6,0xc8
d7a338
 	movaps	(%rsp),%xmm0
d7a338
 	movl	16(%rsp),%eax
d7a338
 	addq	$128+8,%rsp
d7a338
 	movups	%xmm0,(%rdx)
d7a338
 	movl	%eax,16(%rdx)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
d7a338
 
d7a338
 .globl	padlock_sha1_blocks
d7a338
 .type	padlock_sha1_blocks,@function
d7a338
 .align	16
d7a338
 padlock_sha1_blocks:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdx,%rcx
d7a338
 	movq	%rdi,%rdx
d7a338
 	movups	(%rdi),%xmm0
d7a338
@@ -176,19 +211,22 @@ padlock_sha1_blocks:
d7a338
 	movq	%rsp,%rdi
d7a338
 	movl	%eax,16(%rsp)
d7a338
 	movq	$-1,%rax
d7a338
-.byte	0xf3,0x0f,0xa6,0xc8	
d7a338
+.byte	0xf3,0x0f,0xa6,0xc8
d7a338
 	movaps	(%rsp),%xmm0
d7a338
 	movl	16(%rsp),%eax
d7a338
 	addq	$128+8,%rsp
d7a338
 	movups	%xmm0,(%rdx)
d7a338
 	movl	%eax,16(%rdx)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_sha1_blocks,.-padlock_sha1_blocks
d7a338
 
d7a338
 .globl	padlock_sha256_oneshot
d7a338
 .type	padlock_sha256_oneshot,@function
d7a338
 .align	16
d7a338
 padlock_sha256_oneshot:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdx,%rcx
d7a338
 	movq	%rdi,%rdx
d7a338
 	movups	(%rdi),%xmm0
d7a338
@@ -198,19 +236,22 @@ padlock_sha256_oneshot:
d7a338
 	movq	%rsp,%rdi
d7a338
 	movaps	%xmm1,16(%rsp)
d7a338
 	xorq	%rax,%rax
d7a338
-.byte	0xf3,0x0f,0xa6,0xd0	
d7a338
+.byte	0xf3,0x0f,0xa6,0xd0
d7a338
 	movaps	(%rsp),%xmm0
d7a338
 	movaps	16(%rsp),%xmm1
d7a338
 	addq	$128+8,%rsp
d7a338
 	movups	%xmm0,(%rdx)
d7a338
 	movups	%xmm1,16(%rdx)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
d7a338
 
d7a338
 .globl	padlock_sha256_blocks
d7a338
 .type	padlock_sha256_blocks,@function
d7a338
 .align	16
d7a338
 padlock_sha256_blocks:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdx,%rcx
d7a338
 	movq	%rdi,%rdx
d7a338
 	movups	(%rdi),%xmm0
d7a338
@@ -220,19 +261,22 @@ padlock_sha256_blocks:
d7a338
 	movq	%rsp,%rdi
d7a338
 	movaps	%xmm1,16(%rsp)
d7a338
 	movq	$-1,%rax
d7a338
-.byte	0xf3,0x0f,0xa6,0xd0	
d7a338
+.byte	0xf3,0x0f,0xa6,0xd0
d7a338
 	movaps	(%rsp),%xmm0
d7a338
 	movaps	16(%rsp),%xmm1
d7a338
 	addq	$128+8,%rsp
d7a338
 	movups	%xmm0,(%rdx)
d7a338
 	movups	%xmm1,16(%rdx)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_sha256_blocks,.-padlock_sha256_blocks
d7a338
 
d7a338
 .globl	padlock_sha512_blocks
d7a338
 .type	padlock_sha512_blocks,@function
d7a338
 .align	16
d7a338
 padlock_sha512_blocks:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	movq	%rdx,%rcx
d7a338
 	movq	%rdi,%rdx
d7a338
 	movups	(%rdi),%xmm0
d7a338
@@ -245,7 +289,7 @@ padlock_sha512_blocks:
d7a338
 	movaps	%xmm1,16(%rsp)
d7a338
 	movaps	%xmm2,32(%rsp)
d7a338
 	movaps	%xmm3,48(%rsp)
d7a338
-.byte	0xf3,0x0f,0xa6,0xe0	
d7a338
+.byte	0xf3,0x0f,0xa6,0xe0
d7a338
 	movaps	(%rsp),%xmm0
d7a338
 	movaps	16(%rsp),%xmm1
d7a338
 	movaps	32(%rsp),%xmm2
d7a338
@@ -256,11 +300,14 @@ padlock_sha512_blocks:
d7a338
 	movups	%xmm2,32(%rdx)
d7a338
 	movups	%xmm3,48(%rdx)
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_sha512_blocks,.-padlock_sha512_blocks
d7a338
 .globl	padlock_ecb_encrypt
d7a338
 .type	padlock_ecb_encrypt,@function
d7a338
 .align	16
d7a338
 padlock_ecb_encrypt:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbp
d7a338
 	pushq	%rbx
d7a338
 
d7a338
@@ -278,9 +325,9 @@ padlock_ecb_encrypt:
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%rdx)
d7a338
 	jnz	.Lecb_aligned
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	setz	%al
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
 	jnz	.Lecb_aligned
d7a338
@@ -304,7 +351,7 @@ padlock_ecb_encrypt:
d7a338
 	cmoveq	%rdi,%rax
d7a338
 	addq	%rcx,%rax
d7a338
 	negq	%rax
d7a338
-	andq	$4095,%rax
d7a338
+	andq	$0xfff,%rax
d7a338
 	cmpq	$128,%rax
d7a338
 	movq	$-128,%rax
d7a338
 	cmovaeq	%rbx,%rax
d7a338
@@ -320,12 +367,12 @@ padlock_ecb_encrypt:
d7a338
 	movq	%rcx,%r10
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rbx,%r11
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	cmovnzq	%rsp,%rdi
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	jz	.Lecb_inp_aligned
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rdi,%rsi
d7a338
@@ -333,15 +380,15 @@ padlock_ecb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,200	
d7a338
+.byte	0xf3,0x0f,0xa7,200
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%r11,%rbx
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	jz	.Lecb_out_aligned
d7a338
 	movq	%rbx,%rcx
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 .Lecb_out_aligned:
d7a338
 	movq	%r9,%rsi
d7a338
@@ -362,7 +409,7 @@ padlock_ecb_encrypt:
d7a338
 	subq	%rax,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	movq	%rsp,%rsi
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -388,7 +435,7 @@ padlock_ecb_encrypt:
d7a338
 .Lecb_aligned:
d7a338
 	leaq	(%rsi,%rcx,1),%rbp
d7a338
 	negq	%rbp
d7a338
-	andq	$4095,%rbp
d7a338
+	andq	$0xfff,%rbp
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpq	$128,%rbp
d7a338
 	movq	$128-1,%rbp
d7a338
@@ -399,7 +446,7 @@ padlock_ecb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,200	
d7a338
+.byte	0xf3,0x0f,0xa7,200
d7a338
 	testq	%rbp,%rbp
d7a338
 	jz	.Lecb_exit
d7a338
 
d7a338
@@ -411,7 +458,7 @@ padlock_ecb_encrypt:
d7a338
 	subq	%rcx,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	leaq	(%r8),%rdi
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -423,11 +470,14 @@ padlock_ecb_encrypt:
d7a338
 	popq	%rbx
d7a338
 	popq	%rbp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
d7a338
 .globl	padlock_cbc_encrypt
d7a338
 .type	padlock_cbc_encrypt,@function
d7a338
 .align	16
d7a338
 padlock_cbc_encrypt:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbp
d7a338
 	pushq	%rbx
d7a338
 
d7a338
@@ -445,9 +495,9 @@ padlock_cbc_encrypt:
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%rdx)
d7a338
 	jnz	.Lcbc_aligned
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	setz	%al
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
 	jnz	.Lcbc_aligned
d7a338
@@ -471,7 +521,7 @@ padlock_cbc_encrypt:
d7a338
 	cmoveq	%rdi,%rax
d7a338
 	addq	%rcx,%rax
d7a338
 	negq	%rax
d7a338
-	andq	$4095,%rax
d7a338
+	andq	$0xfff,%rax
d7a338
 	cmpq	$64,%rax
d7a338
 	movq	$-64,%rax
d7a338
 	cmovaeq	%rbx,%rax
d7a338
@@ -487,12 +537,12 @@ padlock_cbc_encrypt:
d7a338
 	movq	%rcx,%r10
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rbx,%r11
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	cmovnzq	%rsp,%rdi
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	jz	.Lcbc_inp_aligned
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rdi,%rsi
d7a338
@@ -500,17 +550,17 @@ padlock_cbc_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,208	
d7a338
+.byte	0xf3,0x0f,0xa7,208
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%r11,%rbx
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	jz	.Lcbc_out_aligned
d7a338
 	movq	%rbx,%rcx
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 .Lcbc_out_aligned:
d7a338
 	movq	%r9,%rsi
d7a338
@@ -531,7 +581,7 @@ padlock_cbc_encrypt:
d7a338
 	subq	%rax,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	movq	%rsp,%rsi
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -557,7 +607,7 @@ padlock_cbc_encrypt:
d7a338
 .Lcbc_aligned:
d7a338
 	leaq	(%rsi,%rcx,1),%rbp
d7a338
 	negq	%rbp
d7a338
-	andq	$4095,%rbp
d7a338
+	andq	$0xfff,%rbp
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpq	$64,%rbp
d7a338
 	movq	$64-1,%rbp
d7a338
@@ -568,7 +618,7 @@ padlock_cbc_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,208	
d7a338
+.byte	0xf3,0x0f,0xa7,208
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 	testq	%rbp,%rbp
d7a338
@@ -582,7 +632,7 @@ padlock_cbc_encrypt:
d7a338
 	subq	%rcx,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	leaq	(%r8),%rdi
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -594,11 +644,14 @@ padlock_cbc_encrypt:
d7a338
 	popq	%rbx
d7a338
 	popq	%rbp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
d7a338
 .globl	padlock_cfb_encrypt
d7a338
 .type	padlock_cfb_encrypt,@function
d7a338
 .align	16
d7a338
 padlock_cfb_encrypt:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbp
d7a338
 	pushq	%rbx
d7a338
 
d7a338
@@ -616,9 +669,9 @@ padlock_cfb_encrypt:
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%rdx)
d7a338
 	jnz	.Lcfb_aligned
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	setz	%al
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
 	jnz	.Lcfb_aligned
d7a338
@@ -645,12 +698,12 @@ padlock_cfb_encrypt:
d7a338
 	movq	%rcx,%r10
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rbx,%r11
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	cmovnzq	%rsp,%rdi
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	jz	.Lcfb_inp_aligned
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rdi,%rsi
d7a338
@@ -658,17 +711,17 @@ padlock_cfb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,224	
d7a338
+.byte	0xf3,0x0f,0xa7,224
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%r11,%rbx
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	jz	.Lcfb_out_aligned
d7a338
 	movq	%rbx,%rcx
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 .Lcfb_out_aligned:
d7a338
 	movq	%r9,%rsi
d7a338
@@ -698,7 +751,7 @@ padlock_cfb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,224	
d7a338
+.byte	0xf3,0x0f,0xa7,224
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 .Lcfb_exit:
d7a338
@@ -708,11 +761,14 @@ padlock_cfb_encrypt:
d7a338
 	popq	%rbx
d7a338
 	popq	%rbp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
d7a338
 .globl	padlock_ofb_encrypt
d7a338
 .type	padlock_ofb_encrypt,@function
d7a338
 .align	16
d7a338
 padlock_ofb_encrypt:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbp
d7a338
 	pushq	%rbx
d7a338
 
d7a338
@@ -730,9 +786,9 @@ padlock_ofb_encrypt:
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%rdx)
d7a338
 	jnz	.Lofb_aligned
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	setz	%al
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
 	jnz	.Lofb_aligned
d7a338
@@ -759,12 +815,12 @@ padlock_ofb_encrypt:
d7a338
 	movq	%rcx,%r10
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rbx,%r11
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	cmovnzq	%rsp,%rdi
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	jz	.Lofb_inp_aligned
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rdi,%rsi
d7a338
@@ -772,17 +828,17 @@ padlock_ofb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,232	
d7a338
+.byte	0xf3,0x0f,0xa7,232
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%r11,%rbx
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	jz	.Lofb_out_aligned
d7a338
 	movq	%rbx,%rcx
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 .Lofb_out_aligned:
d7a338
 	movq	%r9,%rsi
d7a338
@@ -812,7 +868,7 @@ padlock_ofb_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,232	
d7a338
+.byte	0xf3,0x0f,0xa7,232
d7a338
 	movdqa	(%rax),%xmm0
d7a338
 	movdqa	%xmm0,-16(%rdx)
d7a338
 .Lofb_exit:
d7a338
@@ -822,11 +878,14 @@ padlock_ofb_encrypt:
d7a338
 	popq	%rbx
d7a338
 	popq	%rbp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
d7a338
 .globl	padlock_ctr32_encrypt
d7a338
 .type	padlock_ctr32_encrypt,@function
d7a338
 .align	16
d7a338
 padlock_ctr32_encrypt:
d7a338
+.cfi_startproc	
d7a338
+.byte	243,15,30,250
d7a338
 	pushq	%rbp
d7a338
 	pushq	%rbx
d7a338
 
d7a338
@@ -844,9 +903,9 @@ padlock_ctr32_encrypt:
d7a338
 	xorl	%ebx,%ebx
d7a338
 	testl	$32,(%rdx)
d7a338
 	jnz	.Lctr32_aligned
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	setz	%al
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	setz	%bl
d7a338
 	testl	%ebx,%eax
d7a338
 	jnz	.Lctr32_aligned
d7a338
@@ -881,7 +940,7 @@ padlock_ctr32_encrypt:
d7a338
 	cmoveq	%rdi,%rax
d7a338
 	addq	%rcx,%rax
d7a338
 	negq	%rax
d7a338
-	andq	$4095,%rax
d7a338
+	andq	$0xfff,%rax
d7a338
 	cmpq	$32,%rax
d7a338
 	movq	$-32,%rax
d7a338
 	cmovaeq	%rbx,%rax
d7a338
@@ -897,12 +956,12 @@ padlock_ctr32_encrypt:
d7a338
 	movq	%rcx,%r10
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rbx,%r11
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	cmovnzq	%rsp,%rdi
d7a338
-	testq	$15,%rsi
d7a338
+	testq	$0x0f,%rsi
d7a338
 	jz	.Lctr32_inp_aligned
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
 	movq	%rdi,%rsi
d7a338
@@ -910,23 +969,23 @@ padlock_ctr32_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,216	
d7a338
+.byte	0xf3,0x0f,0xa7,216
d7a338
 	movl	-4(%rdx),%eax
d7a338
-	testl	$4294901760,%eax
d7a338
+	testl	$0xffff0000,%eax
d7a338
 	jnz	.Lctr32_no_carry
d7a338
 	bswapl	%eax
d7a338
-	addl	$65536,%eax
d7a338
+	addl	$0x10000,%eax
d7a338
 	bswapl	%eax
d7a338
 	movl	%eax,-4(%rdx)
d7a338
 .Lctr32_no_carry:
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%r11,%rbx
d7a338
-	testq	$15,%rdi
d7a338
+	testq	$0x0f,%rdi
d7a338
 	jz	.Lctr32_out_aligned
d7a338
 	movq	%rbx,%rcx
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	shrq	$3,%rcx
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	subq	%rbx,%rdi
d7a338
 .Lctr32_out_aligned:
d7a338
 	movq	%r9,%rsi
d7a338
@@ -944,7 +1003,7 @@ padlock_ctr32_encrypt:
d7a338
 	cmoveq	%rdi,%rax
d7a338
 	addq	%rcx,%rax
d7a338
 	negq	%rax
d7a338
-	andq	$4095,%rax
d7a338
+	andq	$0xfff,%rax
d7a338
 	cmpq	$32,%rax
d7a338
 	movq	$-32,%rax
d7a338
 	cmovaeq	%rbx,%rax
d7a338
@@ -959,7 +1018,7 @@ padlock_ctr32_encrypt:
d7a338
 	subq	%rax,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	movq	%rsp,%rsi
d7a338
 	movq	%r8,%rdi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -986,7 +1045,7 @@ padlock_ctr32_encrypt:
d7a338
 	movl	-4(%rdx),%eax
d7a338
 	bswapl	%eax
d7a338
 	negl	%eax
d7a338
-	andl	$65535,%eax
d7a338
+	andl	$0xffff,%eax
d7a338
 	movq	$1048576,%rbx
d7a338
 	shll	$4,%eax
d7a338
 	cmovzq	%rbx,%rax
d7a338
@@ -1003,11 +1062,11 @@ padlock_ctr32_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,216	
d7a338
+.byte	0xf3,0x0f,0xa7,216
d7a338
 
d7a338
 	movl	-4(%rdx),%eax
d7a338
 	bswapl	%eax
d7a338
-	addl	$65536,%eax
d7a338
+	addl	$0x10000,%eax
d7a338
 	bswapl	%eax
d7a338
 	movl	%eax,-4(%rdx)
d7a338
 
d7a338
@@ -1021,7 +1080,7 @@ padlock_ctr32_encrypt:
d7a338
 .Lctr32_aligned_skip:
d7a338
 	leaq	(%rsi,%rcx,1),%rbp
d7a338
 	negq	%rbp
d7a338
-	andq	$4095,%rbp
d7a338
+	andq	$0xfff,%rbp
d7a338
 	xorl	%eax,%eax
d7a338
 	cmpq	$32,%rbp
d7a338
 	movq	$32-1,%rbp
d7a338
@@ -1032,7 +1091,7 @@ padlock_ctr32_encrypt:
d7a338
 	leaq	-16(%rdx),%rax
d7a338
 	leaq	16(%rdx),%rbx
d7a338
 	shrq	$4,%rcx
d7a338
-.byte	0xf3,0x0f,0xa7,216	
d7a338
+.byte	0xf3,0x0f,0xa7,216
d7a338
 	testq	%rbp,%rbp
d7a338
 	jz	.Lctr32_exit
d7a338
 
d7a338
@@ -1044,7 +1103,7 @@ padlock_ctr32_encrypt:
d7a338
 	subq	%rcx,%rsp
d7a338
 	shrq	$3,%rcx
d7a338
 	leaq	(%rsp),%rdi
d7a338
-.byte	0xf3,0x48,0xa5		
d7a338
+.byte	0xf3,0x48,0xa5
d7a338
 	leaq	(%r8),%rdi
d7a338
 	leaq	(%rsp),%rsi
d7a338
 	movq	%rbx,%rcx
d7a338
@@ -1056,6 +1115,7 @@ padlock_ctr32_encrypt:
d7a338
 	popq	%rbx
d7a338
 	popq	%rbp
d7a338
 	.byte	0xf3,0xc3
d7a338
+.cfi_endproc	
d7a338
 .size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
d7a338
 .byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
d7a338
 .align	16
d7a338
@@ -1063,8 +1123,26 @@ padlock_ctr32_encrypt:
d7a338
 .align	8
d7a338
 .Lpadlock_saved_context:
d7a338
 .quad	0
d7a338
-
d7a338
+	.section ".note.gnu.property", "a"
d7a338
+	.p2align 3
d7a338
+	.long 1f - 0f
d7a338
+	.long 4f - 1f
d7a338
+	.long 5
d7a338
+0:
d7a338
+	# "GNU" encoded with .byte, since .asciz isn't supported
d7a338
+	# on Solaris.
d7a338
+	.byte 0x47
d7a338
+	.byte 0x4e
d7a338
+	.byte 0x55
d7a338
+	.byte 0
d7a338
+1:
d7a338
+	.p2align 3
d7a338
+	.long 0xc0000002
d7a338
+	.long 3f - 2f
d7a338
+2:
d7a338
+	.long 3
d7a338
+3:
d7a338
+	.p2align 3
d7a338
+4:
d7a338
 
d7a338
 .section .note.GNU-stack,"",%progbits
d7a338
-
d7a338
-