|
 |
006bc1 |
From 70e65633d892765bcbaad3493e5b690abd5402f2 Mon Sep 17 00:00:00 2001
|
|
 |
006bc1 |
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
|
 |
006bc1 |
Date: Thu, 28 Mar 2019 09:19:34 +0530
|
|
 |
006bc1 |
Subject: [PATCH 71/72] aarch64: better float to unsigned int conversion
|
|
 |
006bc1 |
|
|
 |
006bc1 |
A straight float to unsigned conversion has a limited range of (-1.0,
|
|
 |
006bc1 |
UTYPE_MAX) which should be fine in general but for the sake of
|
|
 |
006bc1 |
consistency across the interpreter and the JIT compiler, it is
|
|
 |
006bc1 |
necessary to work a wee bit harder to expand this range to (TYPE_MIN,
|
|
 |
006bc1 |
UTYPE_MAX), which can be done with a simple range check. This adds a
|
|
 |
006bc1 |
couple of branches but only one of the branches should have a
|
|
 |
006bc1 |
noticeable performance impact on most processors with branch
|
|
 |
006bc1 |
predictors, and that too only if the input number varies wildly in
|
|
 |
006bc1 |
range.
|
|
 |
006bc1 |
|
|
 |
006bc1 |
This currently works only for 64-bit conversions, 32-bit is still WIP.
|
|
 |
006bc1 |
---
|
|
 |
006bc1 |
src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
|
|
 |
006bc1 |
src/lj_target_arm64.h | 1 +
|
|
 |
006bc1 |
2 files changed, 23 insertions(+), 8 deletions(-)
|
|
 |
006bc1 |
|
|
 |
006bc1 |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
|
|
 |
006bc1 |
index 42a4fae..c72144a 100644
|
|
 |
006bc1 |
--- a/src/lj_asm_arm64.h
|
|
 |
006bc1 |
+++ b/src/lj_asm_arm64.h
|
|
 |
006bc1 |
@@ -594,14 +594,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
|
 |
006bc1 |
} else {
|
|
 |
006bc1 |
Reg left = ra_alloc1(as, lref, RSET_FPR);
|
|
 |
006bc1 |
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
 |
006bc1 |
- A64Ins ai = irt_is64(ir->t) ?
|
|
 |
006bc1 |
- (st == IRT_NUM ?
|
|
 |
006bc1 |
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
|
|
 |
006bc1 |
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
|
|
 |
006bc1 |
- (st == IRT_NUM ?
|
|
 |
006bc1 |
- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
|
|
 |
006bc1 |
- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
|
|
 |
006bc1 |
- emit_dn(as, ai, dest, (left & 31));
|
|
 |
006bc1 |
+
|
|
 |
006bc1 |
+ A64Ins ai_signed = st == IRT_NUM ?
|
|
 |
006bc1 |
+ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
|
|
 |
006bc1 |
+ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
|
|
 |
006bc1 |
+
|
|
 |
006bc1 |
+ if (irt_isi64(ir->t) || irt_isint(ir->t))
|
|
 |
006bc1 |
+ emit_dn(as, ai_signed, dest, (left & 31));
|
|
 |
006bc1 |
+ else {
|
|
 |
006bc1 |
+ A64Ins ai_unsigned = st == IRT_NUM ?
|
|
 |
006bc1 |
+ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
|
|
 |
006bc1 |
+ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
|
|
 |
006bc1 |
+
|
|
 |
006bc1 |
+ MCLabel l_done = emit_label(as);
|
|
 |
006bc1 |
+ emit_dn(as, ai_unsigned, dest, (left & 31));
|
|
 |
006bc1 |
+ MCLabel l_signed = emit_label(as);
|
|
 |
006bc1 |
+ emit_jmp(as, l_done);
|
|
 |
006bc1 |
+ emit_dn(as, ai_signed, dest, (left & 31));
|
|
 |
006bc1 |
+ /* The valid range for float to unsigned int conversion is (-1.0,
|
|
 |
006bc1 |
+ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
|
|
 |
006bc1 |
+ emit_cond_branch(as, CC_PL, l_signed);
|
|
 |
006bc1 |
+ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
|
|
 |
006bc1 |
+ }
|
|
 |
006bc1 |
}
|
|
 |
006bc1 |
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
|
 |
006bc1 |
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
 |
006bc1 |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
|
|
 |
006bc1 |
index a207a2b..2f8357f 100644
|
|
 |
006bc1 |
--- a/src/lj_target_arm64.h
|
|
 |
006bc1 |
+++ b/src/lj_target_arm64.h
|
|
 |
006bc1 |
@@ -279,6 +279,7 @@ typedef enum A64Ins {
|
|
 |
006bc1 |
A64I_STPs = 0x2d000000,
|
|
 |
006bc1 |
A64I_STPd = 0x6d000000,
|
|
 |
006bc1 |
A64I_FCMPd = 0x1e602000,
|
|
 |
006bc1 |
+ A64I_FCMPZs = 0x1e202008,
|
|
 |
006bc1 |
A64I_FCMPZd = 0x1e602008,
|
|
 |
006bc1 |
A64I_FCSELd = 0x1e600c00,
|
|
 |
006bc1 |
A64I_FRINTMd = 0x1e654000,
|
|
 |
006bc1 |
--
|
|
 |
006bc1 |
2.20.1
|
|
 |
006bc1 |
|