tnintemann / rpms / boost

Forked from rpms/boost 4 years ago
Clone

Blame SOURCES/boost-1.66-optimize-s390x-instructions.patch

4aabfd
From 519786ec38bb5123547071021e4bd7678d7673ad Mon Sep 17 00:00:00 2001
4aabfd
From: Andreas Krebbel <krebbel@linux.ibm.com>
4aabfd
Date: Mon, 23 Mar 2020 09:08:27 +0100
4aabfd
Subject: [PATCH 3/4] Optimize s390x instructions
4aabfd
4aabfd
---
4aabfd
 src/asm/jump_s390x_sysv_elf_gas.S  | 84 ++++++++++--------------------
4aabfd
 src/asm/make_s390x_sysv_elf_gas.S  | 27 ++++++----
4aabfd
 src/asm/ontop_s390x_sysv_elf_gas.S | 81 +++++++++-------------------
4aabfd
 3 files changed, 70 insertions(+), 122 deletions(-)
4aabfd
4aabfd
diff --git a/src/asm/jump_s390x_sysv_elf_gas.S b/src/asm/jump_s390x_sysv_elf_gas.S
4aabfd
index c011d53..b2163cc 100644
4aabfd
--- a/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
4aabfd
+++ b/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
4aabfd
@@ -36,47 +36,34 @@
4aabfd
 .global jump_fcontext
4aabfd
 .type   jump_fcontext, @function
4aabfd
 
4aabfd
+#define GR_OFFSET	0
4aabfd
+#define LR_OFFSET	64
4aabfd
+#define SP_OFFSET	72
4aabfd
+#define FP_OFFSET	80
4aabfd
+#define PC_OFFSET	112
4aabfd
+#define L_CTX		120
4aabfd
+#define L_STACKFRAME	120
4aabfd
+
4aabfd
 jump_fcontext:
4aabfd
     
4aabfd
     # Reserved the space for stack to store the data of current context
4aabfd
     # before we jump to the new context.
4aabfd
-    lay 15,-120(15)
4aabfd
+    aghi %r15,-L_STACKFRAME
4aabfd
 
4aabfd
     # save the registers to the stack
4aabfd
-    stg 6,  0(15)       # save R6     
4aabfd
-    stg 7,  8(15)       # save R7     
4aabfd
-    stg 8,  16(15)      # save R8
4aabfd
-    stg 9,  24(15)      # save R9
4aabfd
-    stg 10, 32(15)      # save R10
4aabfd
-    stg 11, 40(15)      # save R11
4aabfd
-    stg 12, 48(15)      # save R12
4aabfd
-    stg 13, 56(15)      # save R13
4aabfd
-    stg 14, 64(15)      # save R14
4aabfd
-    stg 15, 72(15)      # save R15
4aabfd
+    stmg %r6, %r15, GR_OFFSET(%r15)
4aabfd
 
4aabfd
     # save the floating point registers
4aabfd
-    # Load the FPR into R0 then save it to the stack
4aabfd
-    # Load F1 into R0
4aabfd
-    lgdr 0,1 
4aabfd
-    stg  0,80(15)       # save F1
4aabfd
-
4aabfd
-    # Load F3 into R0
4aabfd
-    lgdr 0,3
4aabfd
-    stg  0,88(15)       # save F3
4aabfd
-
4aabfd
-    # Load F5 into R0
4aabfd
-    lgdr 0,5
4aabfd
-    stg  0,96(15)       # save F5
4aabfd
-
4aabfd
-    # Load F7 into R0
4aabfd
-    lgdr 0,7
4aabfd
-    stg  0,104(15)      # save F7
4aabfd
+    std  %f0,FP_OFFSET(%r15)
4aabfd
+    std  %f3,FP_OFFSET+8(%r15)
4aabfd
+    std  %f5,FP_OFFSET+16(%r15)
4aabfd
+    std  %f7,FP_OFFSET+24(%r15)
4aabfd
 
4aabfd
     # Save LR as PC
4aabfd
-    stg 14,112(15)
4aabfd
+    stg  %r14,PC_OFFSET(%r15)
4aabfd
 
4aabfd
     # Store the SP pointing to the old context-data into R0
4aabfd
-    lgr 0,15
4aabfd
+    lgr	 %r0,%r15
4aabfd
 
4aabfd
     # Get the SP pointing to the new context-data
4aabfd
     # Note: Since the return type of the jump_fcontext is struct whose
4aabfd
@@ -88,46 +75,31 @@ jump_fcontext:
4aabfd
     # R2 --> Address of the return transfer_t struct
4aabfd
     # R3 --> Context we want to switch to
4aabfd
     # R4 --> Data
4aabfd
-    lgr 15,3
4aabfd
+    lgr	%r15,%r3
4aabfd
 
4aabfd
     # Load the registers with the data present in context-data of the
4aabfd
     # context we are going to switch to
4aabfd
-    lg 6,  0(15)       # restore R6     
4aabfd
-    lg 7,  8(15)       # restore R7     
4aabfd
-    lg 8,  16(15)      # restore R8
4aabfd
-    lg 9,  24(15)      # restore R9
4aabfd
-    lg 10, 32(15)      # restore R10
4aabfd
-    lg 11, 40(15)      # restore R11
4aabfd
-    lg 12, 48(15)      # restore R12
4aabfd
-    lg 13, 56(15)      # restore R13
4aabfd
-    lg 14, 64(15)      # restore R14
4aabfd
+    lmg	%r6, %r14, GR_OFFSET(%r15)
4aabfd
 
4aabfd
     # Restore Floating point registers
4aabfd
-    lg   1,80(15)
4aabfd
-    ldgr 1,1            # restore F1
4aabfd
-
4aabfd
-    lg   1,88(15)
4aabfd
-    ldgr 1,3            # restore F3
4aabfd
-
4aabfd
-    lg   1,96(15)
4aabfd
-    ldgr 1,5            # restore F5
4aabfd
-
4aabfd
-    lg   1,104(15)
4aabfd
-    ldgr 1,7            # restore F7
4aabfd
+    ld	 %f1,FP_OFFSET(%r15)
4aabfd
+    ld	 %f3,FP_OFFSET+8(%r15)
4aabfd
+    ld	 %f5,FP_OFFSET+16(%r15)
4aabfd
+    ld	 %f7,FP_OFFSET+24(%r15)
4aabfd
 
4aabfd
     # Load PC
4aabfd
-    lg  1,112(15)
4aabfd
+    lg   %r1,PC_OFFSET(%r15)
4aabfd
 
4aabfd
-    # Adjust the stack
4aabfd
-    lay 15, 120(15)
4aabfd
+    # Adjust the stack 
4aabfd
+    aghi %r15,120
4aabfd
 
4aabfd
     # R2 --> Address where the return transfer_t is stored
4aabfd
     # R0 --> FCTX
4aabfd
     # R4 --> DATA
4aabfd
 
4aabfd
     # Store the elements to return transfer_t
4aabfd
-    stg 15, 0(2)
4aabfd
-    stg 4, 8(2)
4aabfd
+    stg %r15, 0(%r2)
4aabfd
+    stg %r4, 8(%r2)
4aabfd
 
4aabfd
     # Note: The address in R2 points to the place where the return
4aabfd
     # transfer_t is stored. Since context_function take transfer_t
4aabfd
@@ -135,7 +107,7 @@ jump_fcontext:
4aabfd
     # first parameter value.
4aabfd
 
4aabfd
     #jump to context
4aabfd
-    br 1
4aabfd
+    br  %r1
4aabfd
 
4aabfd
 .size   jump_fcontext,.-jump_fcontext
4aabfd
 # Mark that we don't need executable stack.
4aabfd
diff --git a/src/asm/make_s390x_sysv_elf_gas.S b/src/asm/make_s390x_sysv_elf_gas.S
4aabfd
index f566533..d02856c 100644
4aabfd
--- a/libs/context/src/asm/make_s390x_sysv_elf_gas.S
4aabfd
+++ b/libs/context/src/asm/make_s390x_sysv_elf_gas.S
4aabfd
@@ -36,6 +36,14 @@
4aabfd
 .global make_fcontext
4aabfd
 .type 	 make_fcontext, @function
4aabfd
 
4aabfd
+#define GR_OFFSET	0
4aabfd
+#define LR_OFFSET	64
4aabfd
+#define SP_OFFSET	72
4aabfd
+#define FP_OFFSET	80
4aabfd
+#define PC_OFFSET	112
4aabfd
+#define L_CTX		120
4aabfd
+#define L_STACKFRAME	120
4aabfd
+
4aabfd
 make_fcontext:
4aabfd
 
4aabfd
 		# make_fcontext takes in 3 arguments
4aabfd
@@ -56,40 +64,39 @@ make_fcontext:
4aabfd
 		# address is zero or not. If not AND it with `-8`. 
4aabfd
 
4aabfd
 		# Here we AND the lower 16 bits of the memory address present in the 
4aabfd
-		# R2 with the bits 1111 1111 1111 1000 which when converted into
4aabfd
-		# decimal is 65528
4aabfd
-		nill    2,65528
4aabfd
+		# R2 with the bits 1111 1111 1111 1000 
4aabfd
+		nill    %r2,0xfff0
4aabfd
 
4aabfd
 		# Reserve space for context-data on context-stack.
4aabfd
 		# This is done by shifting the SP/address by 112 bytes.
4aabfd
-		lay 2,-120(2)
4aabfd
+		aghi	%r2,-L_CTX
4aabfd
 
4aabfd
 		# third arg of make_fcontext() == address of the context-function
4aabfd
 		# Store the address as a PC to jump in, whenever we call the 
4aabfd
 		# make_fcontext.
4aabfd
-		stg 4,112(2)
4aabfd
+		stg 	%r4,PC_OFFSET(%r2)
4aabfd
 
4aabfd
 		# Save the address of finish as return-address for context-function
4aabfd
 		# This will be entered after context-function return
4aabfd
 		# The address of finish will be saved in Link register, this register
4aabfd
 		# specifies where we need to jump after the function executes
4aabfd
 		# completely.
4aabfd
-		larl 1,finish
4aabfd
-		stg  1,64(2)
4aabfd
+		larl 	%r1,finish
4aabfd
+		stg  	%r1,LR_OFFSET(%r2)
4aabfd
 
4aabfd
 		# Return pointer to context data
4aabfd
 		# R14 acts as the link register
4aabfd
 		# R2 holds the address of the context stack. When we return from the
4aabfd
 		# make_fcontext, R2 is passed back.
4aabfd
-		br 14 
4aabfd
+		br 	%r14 
4aabfd
 
4aabfd
 	finish:
4aabfd
 
4aabfd
 		# In finish tasks, you load the exit code and exit the make_fcontext
4aabfd
 		# This is called when the context-function is entirely executed
4aabfd
 
4aabfd
-		lghi 2,0
4aabfd
-		brasl 14,_exit
4aabfd
+		lghi 	%r2,0
4aabfd
+		brasl 	%r14,_exit@PLT
4aabfd
 
4aabfd
 .size   make_fcontext,.-make_fcontext
4aabfd
 # Mark that we don't need executable stack.
4aabfd
diff --git a/src/asm/ontop_s390x_sysv_elf_gas.S b/src/asm/ontop_s390x_sysv_elf_gas.S
4aabfd
index 7ab2cf5..4488654 100644
4aabfd
--- a/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
4aabfd
+++ b/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
4aabfd
@@ -36,47 +36,32 @@
4aabfd
 .global ontop_fcontext
4aabfd
 .type   ontop_fcontext, @function
4aabfd
 
4aabfd
+#define GR_OFFSET	0
4aabfd
+#define LR_OFFSET	64
4aabfd
+#define SP_OFFSET	72
4aabfd
+#define FP_OFFSET	80
4aabfd
+#define PC_OFFSET	112
4aabfd
+#define L_CTX		120
4aabfd
+
4aabfd
 ontop_fcontext:
4aabfd
     
4aabfd
     # Reserved the space for stack to store the data of current context
4aabfd
     # before we jump to the new context.
4aabfd
-    lay 15,-120(15)
4aabfd
+    aghi %r15,-L_CTX
4aabfd
 
4aabfd
     # save the registers to the stack
4aabfd
-    stg 6,  0(15)       # save R6     
4aabfd
-    stg 7,  8(15)       # save R7     
4aabfd
-    stg 8,  16(15)      # save R8
4aabfd
-    stg 9,  24(15)      # save R9
4aabfd
-    stg 10, 32(15)      # save R10
4aabfd
-    stg 11, 40(15)      # save R11
4aabfd
-    stg 12, 48(15)      # save R12
4aabfd
-    stg 13, 56(15)      # save R13
4aabfd
-    stg 14, 64(15)      # save R14
4aabfd
-    stg 15, 72(15)      # save R15
4aabfd
+    stmg %r6, %r15, GR_OFFSET(%r15)
4aabfd
 
4aabfd
     # save the floating point registers
4aabfd
-    # Load the FPR into R0 then save it to the stack
4aabfd
-    # Load F1 into R0
4aabfd
-    lgdr 0,1 
4aabfd
-    stg  0,80(15)       # save F1
4aabfd
-
4aabfd
-    # Load F3 into R0
4aabfd
-    lgdr 0,3
4aabfd
-    stg  0,88(15)       # save F3
4aabfd
-
4aabfd
-    # Load F5 into R0
4aabfd
-    lgdr 0,5
4aabfd
-    stg  0,96(15)       # save F5
4aabfd
-
4aabfd
-    # Load F7 into R0
4aabfd
-    lgdr 0,7
4aabfd
-    stg  0,104(15)      # save F7
4aabfd
-
4aabfd
+    std  %f0,FP_OFFSET(%r15)
4aabfd
+    std  %f3,FP_OFFSET+8(%r15)
4aabfd
+    std  %f5,FP_OFFSET+16(%r15)
4aabfd
+    std  %f7,FP_OFFSET+24(%r15)
4aabfd
     # Save LR as PC
4aabfd
-    stg 14,112(15)
4aabfd
+    stg  %r14,PC_OFFSET(%r15)
4aabfd
 
4aabfd
     # Store the SP pointing to the old context-data into R0
4aabfd
-    lgr 0,15
4aabfd
+    lgr  %r0,%r15
4aabfd
 
4aabfd
     # Get the SP pointing to the new context-data
4aabfd
     # Note: Since the return type of the jump_fcontext is struct whose
4aabfd
@@ -88,38 +73,22 @@ ontop_fcontext:
4aabfd
     # R2 --> Address of the return transfer_t struct
4aabfd
     # R3 --> Context we want to switch to
4aabfd
     # R4 --> Data
4aabfd
-    lgr 15,3
4aabfd
+    lgr  %r15,%r3
4aabfd
 
4aabfd
     # Load the registers with the data present in context-data of the
4aabfd
     # context we are going to switch to
4aabfd
-    lg 6,  0(15)       # restore R6     
4aabfd
-    lg 7,  8(15)       # restore R7     
4aabfd
-    lg 8,  16(15)      # restore R8
4aabfd
-    lg 9,  24(15)      # restore R9
4aabfd
-    lg 10, 32(15)      # restore R10
4aabfd
-    lg 11, 40(15)      # restore R11
4aabfd
-    lg 12, 48(15)      # restore R12
4aabfd
-    lg 13, 56(15)      # restore R13
4aabfd
-    lg 14, 64(15)      # restore R14
4aabfd
-    lg 15, 72(15)      # restore R15
4aabfd
+    lmg  %r6,%r15,GR_OFFSET(%r15)
4aabfd
 
4aabfd
     # Restore Floating point registers
4aabfd
-    lg   1,80(15)
4aabfd
-    ldgr 1,1            # restore F1
4aabfd
-
4aabfd
-    lg   1,88(15)
4aabfd
-    ldgr 1,3            # restore F3
4aabfd
-
4aabfd
-    lg   1,96(15)
4aabfd
-    ldgr 1,5            # restore F5
4aabfd
-
4aabfd
-    lg   1,104(15)
4aabfd
-    ldgr 1,7            # restore F7
4aabfd
+    ld	 %f1,FP_OFFSET(%r15)
4aabfd
+    ld	 %f3,FP_OFFSET+8(%r15)
4aabfd
+    ld	 %f5,FP_OFFSET+16(%r15)
4aabfd
+    ld	 %f7,FP_OFFSET+24(%r15)
4aabfd
 
4aabfd
     # Skip PC
4aabfd
 
4aabfd
     # Adjust the stack
4aabfd
-    lay 15, 120(15)
4aabfd
+    aghi %r15,L_CTX
4aabfd
 
4aabfd
     # R2 --> Address where the return transfer_t is stored
4aabfd
     # R0 --> FCTX
4aabfd
@@ -127,8 +96,8 @@ ontop_fcontext:
4aabfd
     # R5 --> Context function
4aabfd
 
4aabfd
     # Store the elements to return transfer_t
4aabfd
-    stg 15, 0(2)
4aabfd
-    stg 4, 8(2)
4aabfd
+    stg  %r15, 0(%r2)
4aabfd
+    stg  %r4, 8(%r2)
4aabfd
 
4aabfd
     # Note: The address in R2 points to the place where the return
4aabfd
     # transfer_t is stored. Since context_function take transfer_t
4aabfd
@@ -136,7 +105,7 @@ ontop_fcontext:
4aabfd
     # first parameter value.
4aabfd
 
4aabfd
     #jump to context function
4aabfd
-    br 5
4aabfd
+    br 	%r5
4aabfd
 
4aabfd
 .size   ontop_fcontext,.-ontop_fcontext
4aabfd
 # Mark that we don't need executable stack.
4aabfd
-- 
4aabfd
2.18.1
4aabfd