From 8fc4ce1c981967fccd5366ace6add6d14cfcde89 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh@gotplt.org>
Date: Mon, 25 Feb 2019 14:40:39 +0000
Subject: [PATCH 63/72] aarch64: Use the xzr register whenever possible
Using the xzr register for store inputs and the second operand of
arithmetic operations frees up a register for use elsewhere.
---
src/lj_asm_arm64.h | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index c214e10..a826687 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1007,10 +1007,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
}
+static int maybe_zero_val(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+
+ switch(ir->o) {
+ case IR_KNULL:
+ return 1;
+ case IR_KINT:
+ return 0 == ir->i;
+ case IR_KINT64:
+ return 0 == ir_kint64(ir)->u64;
+ }
+
+ return 0;
+}
+
static void asm_xstore(ASMState *as, IRIns *ir)
{
if (ir->r != RID_SINK) {
- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ Reg src;
+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+ src = RID_ZERO;
+ else
+ src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
}
@@ -1198,7 +1218,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
int32_t ofs = sizeof(GCcdata);
- Reg r = ra_alloc1(as, ir->op2, allow);
+ Reg r;
+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+ r = RID_ZERO;
+ else
+ r = ra_alloc1(as, ir->op2, allow);
+
lua_assert(sz == 4 || sz == 8);
emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
} else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
@@ -1214,7 +1239,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
{
- Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+ Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
--
2.20.1