Blame SOURCES/0071-aarch64-better-float-to-unsigned-int-conversion.patch

006bc1
From 70e65633d892765bcbaad3493e5b690abd5402f2 Mon Sep 17 00:00:00 2001
006bc1
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
006bc1
Date: Thu, 28 Mar 2019 09:19:34 +0530
006bc1
Subject: [PATCH 71/72] aarch64: better float to unsigned int conversion
006bc1
006bc1
A straight float to unsigned conversion has a limited range of (-1.0,
006bc1
UTYPE_MAX) which should be fine in general but for the sake of
006bc1
consistency across the interpreter and the JIT compiler, it is
006bc1
necessary to work a wee bit harder to expand this range to (TYPE_MIN,
006bc1
UTYPE_MAX), which can be done with a simple range check.  This adds a
006bc1
couple of branches but only one of the branches should have a
006bc1
noticeable performance impact on most processors with branch
006bc1
predictors, and that too only if the input number varies wildly in
006bc1
range.
006bc1
006bc1
This currently works only for 64-bit conversions, 32-bit is still WIP.
006bc1
---
006bc1
 src/lj_asm_arm64.h    | 30 ++++++++++++++++++++++--------
006bc1
 src/lj_target_arm64.h |  1 +
006bc1
 2 files changed, 23 insertions(+), 8 deletions(-)
006bc1
006bc1
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
006bc1
index 42a4fae..c72144a 100644
006bc1
--- a/src/lj_asm_arm64.h
006bc1
+++ b/src/lj_asm_arm64.h
006bc1
@@ -594,14 +594,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
006bc1
     } else {
006bc1
       Reg left = ra_alloc1(as, lref, RSET_FPR);
006bc1
       Reg dest = ra_dest(as, ir, RSET_GPR);
006bc1
-      A64Ins ai = irt_is64(ir->t) ?
006bc1
-	(st == IRT_NUM ?
006bc1
-	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
006bc1
-	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
006bc1
-	(st == IRT_NUM ?
006bc1
-	 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
006bc1
-	 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
006bc1
-      emit_dn(as, ai, dest, (left & 31));
006bc1
+
006bc1
+      A64Ins ai_signed = st == IRT_NUM ?
006bc1
+        (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
006bc1
+        (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
006bc1
+
006bc1
+      if (irt_isi64(ir->t) || irt_isint(ir->t))
006bc1
+        emit_dn(as, ai_signed, dest, (left & 31));
006bc1
+      else {
006bc1
+        A64Ins ai_unsigned = st == IRT_NUM ?
006bc1
+          (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
006bc1
+          (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
006bc1
+
006bc1
+        MCLabel l_done = emit_label(as);
006bc1
+        emit_dn(as, ai_unsigned, dest, (left & 31));
006bc1
+        MCLabel l_signed = emit_label(as);
006bc1
+	emit_jmp(as, l_done);
006bc1
+        emit_dn(as, ai_signed, dest, (left & 31));
006bc1
+        /* The valid range for float to unsigned int conversion is (-1.0,
006bc1
+           UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
006bc1
+        emit_cond_branch(as, CC_PL, l_signed);
006bc1
+        emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
006bc1
+      }
006bc1
     }
006bc1
   } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
006bc1
     Reg dest = ra_dest(as, ir, RSET_GPR);
006bc1
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
006bc1
index a207a2b..2f8357f 100644
006bc1
--- a/src/lj_target_arm64.h
006bc1
+++ b/src/lj_target_arm64.h
006bc1
@@ -279,6 +279,7 @@ typedef enum A64Ins {
006bc1
   A64I_STPs = 0x2d000000,
006bc1
   A64I_STPd = 0x6d000000,
006bc1
   A64I_FCMPd = 0x1e602000,
006bc1
+  A64I_FCMPZs = 0x1e202008,
006bc1
   A64I_FCMPZd = 0x1e602008,
006bc1
   A64I_FCSELd = 0x1e600c00,
006bc1
   A64I_FRINTMd = 0x1e654000,
006bc1
-- 
006bc1
2.20.1
006bc1