From 006bc10efd578fa26c78cb6976818db7c9876cb5 Mon Sep 17 00:00:00 2001 From: Alfredo Moralejo Date: Sep 08 2021 12:48:27 +0000 Subject: Import luajit-2.1.0-0.19beta3.el9 in CloudSIG xena --- diff --git a/.luajit.metadata b/.luajit.metadata new file mode 100644 index 0000000..f323241 --- /dev/null +++ b/.luajit.metadata @@ -0,0 +1 @@ +e945cee354484db45576c3760a5cfd70e7a314bf SOURCES/LuaJIT-2.1.0-beta3.tar.gz diff --git a/SOURCES/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch b/SOURCES/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch new file mode 100644 index 0000000..16aca3b --- /dev/null +++ b/SOURCES/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch @@ -0,0 +1,31 @@ +commit 31afda31814ec02f82ffb0519bee496c87eeaa89 +Merge: 8271c64 1c89933 +Author: Mike Pall +Date: Tue May 9 21:01:23 2017 +0200 + + Merge branch 'master' into v2.1 + +commit 1c89933f129dde76944336c6bfd05297b8d67730 +Author: Mike Pall +Date: Tue May 9 20:59:37 2017 +0200 + + Fix LJ_MAX_JSLOTS assertion in rec_check_slots(). + + Thanks to Yichun Zhang. + +diff --git a/src/lj_record.c b/src/lj_record.c +index 9d0469c..c2d0274 100644 +--- a/src/lj_record.c ++++ b/src/lj_record.c +@@ -87,9 +87,9 @@ static void rec_check_slots(jit_State *J) + BCReg s, nslots = J->baseslot + J->maxslot; + int32_t depth = 0; + cTValue *base = J->L->base - J->baseslot; +- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); ++ lua_assert(J->baseslot >= 1+LJ_FR2); + lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); +- lua_assert(nslots < LJ_MAX_JSLOTS); ++ lua_assert(nslots <= LJ_MAX_JSLOTS); + for (s = 0; s < nslots; s++) { + TRef tr = J->slot[s]; + if (tr) { diff --git a/SOURCES/0002-Add-missing-LJ_MAX_JSLOTS-check.patch b/SOURCES/0002-Add-missing-LJ_MAX_JSLOTS-check.patch new file mode 100644 index 0000000..70ccfd5 --- /dev/null +++ b/SOURCES/0002-Add-missing-LJ_MAX_JSLOTS-check.patch @@ -0,0 +1,40 @@ +commit 6259c0b909a8c00fabe3c7e6bd81150ee08cbf9f +Merge: 31afda3 630ff31 +Author: Mike Pall +Date: Wed May 17 17:38:53 2017 +0200 + + Merge branch 'master' into v2.1 + +commit 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c +Author: Mike Pall +Date: Wed May 17 17:37:35 2017 +0200 + + Add missing LJ_MAX_JSLOTS check. + + Thanks to Yichun Zhang. + +From 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 17 May 2017 17:37:35 +0200 +Subject: [PATCH 02/72] Add missing LJ_MAX_JSLOTS check. + +Thanks to Yichun Zhang. +--- + src/lj_record.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/lj_record.c b/src/lj_record.c +index cecacd2..bc4e8a6 100644 +--- a/src/lj_record.c ++++ b/src/lj_record.c +@@ -633,6 +633,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) + J->framedepth++; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; ++ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) ++ lj_trace_err(J, LJ_TRERR_STACKOV); + } + + /* Record tail call. */ +-- +2.20.1 diff --git a/SOURCES/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch b/SOURCES/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch new file mode 100644 index 0000000..9d8300f --- /dev/null +++ b/SOURCES/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch @@ -0,0 +1,30 @@ +From 7381b620358c2561e8690149f1d25828fdad6675 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 7 Jun 2017 19:16:22 +0200 +Subject: [PATCH 03/72] MIPS: Use precise search for exit jump patching. + +Contributed by Djordje Kovacevic and Stefan Pejic. +--- + src/lj_asm_mips.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h +index 03270cc..d0a1ca5 100644 +--- a/src/lj_asm_mips.h ++++ b/src/lj_asm_mips.h +@@ -1933,7 +1933,11 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + for (p++; p < pe; p++) { + if (*p == exitload) { /* Look for load of exit number. */ +- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ ++ /* Look for exitstub branch. Yes, this covers all used branch variants. */ ++ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && ++ ((p[-1] & 0xf0000000u) == MIPSI_BEQ || ++ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || ++ (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { + ptrdiff_t delta = target - p; + if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ + patchbranch: +-- +2.20.1 + diff --git a/SOURCES/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch b/SOURCES/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch new file mode 100644 index 0000000..4da6b4d --- /dev/null +++ b/SOURCES/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch @@ -0,0 +1,77 @@ +From c7c3c4da432ddb543d4b0a9abbb245f11b26afd0 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 7 Jun 2017 19:36:46 +0200 +Subject: [PATCH 04/72] MIPS: Fix handling of spare long-range jump slots. + +Contributed by Djordje Kovacevic and Stefan Pejic. +--- + src/lj_asm_mips.h | 9 +++++---- + src/lj_jit.h | 6 ++++++ + src/lj_mcode.c | 6 ------ + 3 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h +index d0a1ca5..7631190 100644 +--- a/src/lj_asm_mips.h ++++ b/src/lj_asm_mips.h +@@ -65,10 +65,9 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) + static void asm_sparejump_setup(ASMState *as) + { + MCode *mxp = as->mcbot; +- /* Assumes sizeof(MCLink) == 8. */ +- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { ++ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { + lua_assert(MIPSI_NOP == 0); +- memset(mxp+2, 0, MIPS_SPAREJUMP*8); ++ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); + mxp += MIPS_SPAREJUMP*2; + lua_assert(mxp < as->mctop); + lj_mcode_sync(as->mcbot, mxp); +@@ -1947,7 +1946,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + if (!cstart) cstart = p-1; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ + int i; +- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { ++ for (i = (int)(sizeof(MCLink)/sizeof(MCode)); ++ i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); ++ i += 2) { + if (mcarea[i] == tjump) { + delta = mcarea+i - p; + goto patchbranch; +diff --git a/src/lj_jit.h b/src/lj_jit.h +index a2e8fd9..3f38d28 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -155,6 +155,12 @@ typedef uint8_t MCode; + typedef uint32_t MCode; + #endif + ++/* Linked list of MCode areas. */ ++typedef struct MCLink { ++ MCode *next; /* Next area. */ ++ size_t size; /* Size of current area. */ ++} MCLink; ++ + /* Stack snapshot header. */ + typedef struct SnapShot { + uint16_t mapofs; /* Offset into snapshot map. */ +diff --git a/src/lj_mcode.c b/src/lj_mcode.c +index f0a1f69..5ea89f6 100644 +--- a/src/lj_mcode.c ++++ b/src/lj_mcode.c +@@ -272,12 +272,6 @@ static void *mcode_alloc(jit_State *J, size_t sz) + + /* -- MCode area management ----------------------------------------------- */ + +-/* Linked list of MCode areas. */ +-typedef struct MCLink { +- MCode *next; /* Next area. */ +- size_t size; /* Size of current area. */ +-} MCLink; +- + /* Allocate a new MCode area. */ + static void mcode_allocarea(jit_State *J) + { +-- +2.20.1 + diff --git a/SOURCES/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch b/SOURCES/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch new file mode 100644 index 0000000..dda4ae2 --- /dev/null +++ b/SOURCES/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch @@ -0,0 +1,982 @@ +From a057a07ab702e225e21848d4f918886c5b0ac06b Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 7 Jun 2017 23:56:54 +0200 +Subject: [PATCH 05/72] MIPS64: Add soft-float support to JIT compiler backend. + +Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +Sponsored by Cisco Systems, Inc. +--- + src/lj_arch.h | 4 +- + src/lj_asm.c | 8 +- + src/lj_asm_mips.h | 217 +++++++++++++++++++++++++++++++++++++-------- + src/lj_crecord.c | 4 +- + src/lj_emit_mips.h | 2 + + src/lj_ffrecord.c | 2 +- + src/lj_ircall.h | 43 ++++++--- + src/lj_iropt.h | 2 +- + src/lj_jit.h | 4 +- + src/lj_obj.h | 3 + + src/lj_opt_split.c | 2 +- + src/lj_snap.c | 21 +++-- + src/vm_mips64.dasc | 49 ++++++++++ + 13 files changed, 286 insertions(+), 75 deletions(-) + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index c8d7138..b770564 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -337,9 +337,6 @@ + #define LJ_ARCH_BITS 32 + #define LJ_TARGET_MIPS32 1 + #else +-#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU +-#define LJ_ARCH_NOJIT 1 /* NYI */ +-#endif + #define LJ_ARCH_BITS 64 + #define LJ_TARGET_MIPS64 1 + #define LJ_TARGET_GC64 1 +@@ -512,6 +509,7 @@ + #define LJ_ABI_SOFTFP 0 + #endif + #define LJ_SOFTFP (!LJ_ARCH_HASFPU) ++#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) + + #if LJ_ARCH_ENDIAN == LUAJIT_BE + #define LJ_LE 0 +diff --git a/src/lj_asm.c b/src/lj_asm.c +index c2cf5a9..bed2268 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -338,7 +338,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + ra_modified(as, r); + ir->r = RID_INIT; /* Do not keep any hint. */ + RA_DBGX((as, "remat $i $r", ir, r)); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (ir->o == IR_KNUM) { + emit_loadk64(as, r, ir); + } else +@@ -1305,7 +1305,7 @@ static void asm_call(ASMState *as, IRIns *ir) + asm_gencall(as, ci, args); + } + +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; +@@ -1652,10 +1652,10 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_MUL: asm_mul(as, ir); break; + case IR_MOD: asm_mod(as, ir); break; + case IR_NEG: asm_neg(as, ir); break; +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + case IR_DIV: case IR_POW: case IR_ABS: + case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: +- lua_assert(0); /* Unused for LJ_SOFTFP. */ ++ lua_assert(0); /* Unused for LJ_SOFTFP32. */ + break; + #else + case IR_DIV: asm_div(as, ir); break; +diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h +index 05af3d0..1406a87 100644 +--- a/src/lj_asm_mips.h ++++ b/src/lj_asm_mips.h +@@ -290,7 +290,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + { + ra_leftov(as, gpr, ref); + gpr++; +-#if LJ_64 ++#if LJ_64 && !LJ_SOFTFP + fpr++; + #endif + } +@@ -301,7 +301,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + emit_spstore(as, ir, r, ofs); + ofs += irt_isnum(ir->t) ? 8 : 4; + #else +- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); ++ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; + #endif + } +@@ -312,7 +312,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #endif + if (gpr <= REGARG_LASTGPR) { + gpr++; +-#if LJ_64 ++#if LJ_64 && !LJ_SOFTFP + fpr++; + #endif + } else { +@@ -461,12 +461,36 @@ static void asm_tobit(ASMState *as, IRIns *ir) + emit_tg(as, MIPSI_MFC1, dest, tmp); + emit_fgh(as, MIPSI_ADD_D, tmp, left, right); + } ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg r) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| ++ RID2RSET(RID_R1)|RID2RSET(RID_R12); ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ ra_evictset(as, drop); ++ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ ++ ra_destreg(as, ir, RID_RET); ++ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); ++ if (r == RID_NONE) ++ ra_leftov(as, REGARG_FIRSTGPR, ir->op1); ++ else if (r != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, r); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ emit_dta(as, MIPSI_SLL, dest, dest, 0); ++ asm_callid(as, ir, IRCALL_lj_vm_tobit); ++} + #endif + + static void asm_conv(ASMState *as, IRIns *ir) + { + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + #endif + #if LJ_64 +@@ -477,12 +501,13 @@ static void asm_conv(ASMState *as, IRIns *ir) + lua_assert(!(irt_isint64(ir->t) || + (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + #endif +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP32 + /* FP conversions are handled by SPLIT. */ + lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ + #else + lua_assert(irt_type(ir->t) != st); ++#if !LJ_SOFTFP + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -608,6 +633,42 @@ static void asm_conv(ASMState *as, IRIns *ir) + } + } + } else ++#else ++ if (irt_isfp(ir->t)) { ++#if LJ_64 && LJ_HASFFI ++ if (stfp) { /* FP to FP conversion. */ ++ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : ++ IRCALL_softfp_d2f); ++ } else { /* Integer to FP conversion. */ ++ IRCallID cid = ((IRT_IS64 >> st) & 1) ? ++ (irt_isnum(ir->t) ? ++ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : ++ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : ++ (irt_isnum(ir->t) ? ++ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : ++ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); ++ asm_callid(as, ir, cid); ++ } ++#else ++ asm_callid(as, ir, IRCALL_softfp_i2d); ++#endif ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ asm_tointg(as, ir, RID_NONE); ++ } else { ++ IRCallID cid = irt_is64(ir->t) ? ++ ((st == IRT_NUM) ? ++ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : ++ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : ++ ((st == IRT_NUM) ? ++ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : ++ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); ++ asm_callid(as, ir, cid); ++ } ++ } else ++#endif + #endif + { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -665,7 +726,7 @@ static void asm_strto(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = 0; +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && +@@ -806,7 +867,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + MCLabel l_end, l_loop, l_next; + + rset_clear(allow, tab); +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP32 + if (!isk) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); +@@ -826,7 +887,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + } + } + #else +- if (irt_isnum(kt)) { ++ if (!LJ_SOFTFP && irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else if (!irt_ispri(kt)) { +@@ -882,6 +943,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); ++ } else if (LJ_SOFTFP && irt_isnum(kt)) { ++ emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); ++ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } else if (irt_isaddr(kt)) { + Reg refk = tmp2; + if (isk) { +@@ -960,7 +1024,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); + if (irt_isnum(kt)) { + emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); +- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); ++ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); + #if !LJ_SOFTFP + emit_tg(as, MIPSI_DMFC1, tmp1, key); +@@ -1123,7 +1187,7 @@ static MIPSIns asm_fxloadins(IRIns *ir) + case IRT_U8: return MIPSI_LBU; + case IRT_I16: return MIPSI_LH; + case IRT_U16: return MIPSI_LHU; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; ++ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; + } +@@ -1134,7 +1198,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir) + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return MIPSI_SB; + case IRT_I16: case IRT_U16: return MIPSI_SH; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; ++ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; + } +@@ -1199,7 +1263,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + + static void asm_ahuvload(ASMState *as, IRIns *ir) + { +- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); + Reg dest = RID_NONE, type = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; +@@ -1212,7 +1276,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + } + } + if (ra_used(ir)) { +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); +@@ -1261,10 +1325,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { +- src = ra_alloc1(as, ir->op2, RSET_FPR); ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); +- emit_hsi(as, MIPSI_SDC1, src, idx, ofs); ++ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); + } else { + #if LJ_32 + if (!irt_ispri(ir->t)) { +@@ -1312,7 +1376,7 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType1 t = ir->t; + #if LJ_32 + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); +- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; + #else +@@ -1320,7 +1384,7 @@ static void asm_sload(ASMState *as, IRIns *ir) + #endif + lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP32 + lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); +@@ -1328,29 +1392,44 @@ static void asm_sload(ASMState *as, IRIns *ir) + } + #else + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { +- dest = ra_scratch(as, RSET_FPR); ++ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else + #endif + if (ra_used(ir)) { +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { ++ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { +- Reg tmp = ra_scratch(as, RSET_FPR); ++ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); ++ if (tmp != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, tmp); ++#else + emit_tg(as, MIPSI_MFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); ++#endif + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + Reg tmp = ra_scratch(as, RSET_GPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); ++ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); ++#else + emit_fg(as, MIPSI_CVT_D_W, dest, dest); + emit_tg(as, MIPSI_MTC1, tmp, dest); ++#endif + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } +@@ -1399,7 +1478,7 @@ dotypecheck: + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); +- if (ra_hasreg(dest)) ++ if (!LJ_SOFTFP && ra_hasreg(dest)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + } else { + asm_guard(as, MIPSI_BNE, RID_TMP, +@@ -1409,7 +1488,7 @@ dotypecheck: + } + emit_tsi(as, MIPSI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { +- if (irt_isnum(t)) ++ if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, +@@ -1548,26 +1627,40 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_fg(as, mi, dest, left); + } ++#endif + ++#if !LJ_SOFTFP32 + static void asm_fpmath(ASMState *as, IRIns *ir) + { + if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) + return; ++#if !LJ_SOFTFP + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, MIPSI_SQRT_D); + else ++#endif + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); + } + #endif + ++#if !LJ_SOFTFP ++#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) ++#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) ++#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) ++#elif LJ_64 /* && LJ_SOFTFP */ ++#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) ++#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) ++#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) ++#endif ++ + static void asm_add(ASMState *as, IRIns *ir) + { + IRType1 t = ir->t; +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(t)) { +- asm_fparith(as, ir, MIPSI_ADD_D); ++ asm_fpadd(as, ir); + } else + #endif + { +@@ -1589,9 +1682,9 @@ static void asm_add(ASMState *as, IRIns *ir) + + static void asm_sub(ASMState *as, IRIns *ir) + { +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(ir->t)) { +- asm_fparith(as, ir, MIPSI_SUB_D); ++ asm_fpsub(as, ir); + } else + #endif + { +@@ -1605,9 +1698,9 @@ static void asm_sub(ASMState *as, IRIns *ir) + + static void asm_mul(ASMState *as, IRIns *ir) + { +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(ir->t)) { +- asm_fparith(as, ir, MIPSI_MUL_D); ++ asm_fpmul(as, ir); + } else + #endif + { +@@ -1634,7 +1727,7 @@ static void asm_mod(ASMState *as, IRIns *ir) + asm_callid(as, ir, IRCALL_lj_vm_modi); + } + +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + static void asm_pow(ASMState *as, IRIns *ir) + { + #if LJ_64 && LJ_HASFFI +@@ -1654,7 +1747,11 @@ static void asm_div(ASMState *as, IRIns *ir) + IRCALL_lj_carith_divu64); + else + #endif ++#if !LJ_SOFTFP + asm_fparith(as, ir, MIPSI_DIV_D); ++#else ++ asm_callid(as, ir, IRCALL_softfp_div); ++#endif + } + #endif + +@@ -1664,6 +1761,13 @@ static void asm_neg(ASMState *as, IRIns *ir) + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, MIPSI_NEG_D); + } else ++#elif LJ_64 /* && LJ_SOFTFP */ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dst(as, MIPSI_XOR, dest, left, ++ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); ++ } else + #endif + { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -1673,7 +1777,17 @@ static void asm_neg(ASMState *as, IRIns *ir) + } + } + ++#if !LJ_SOFTFP + #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_abs(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); ++} ++#endif ++ + #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) + #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + +@@ -1918,15 +2032,21 @@ static void asm_bror(ASMState *as, IRIns *ir) + } + } + +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP + static void asm_sfpmin_max(ASMState *as, IRIns *ir) + { + CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; ++#if LJ_64 ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++#else + IRRef args[4]; + args[0^LJ_BE] = ir->op1; + args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; + args[3^LJ_BE] = (ir+1)->op2; ++#endif + asm_setupresult(as, ir, &ci); + emit_call(as, (void *)ci.func, 0); + ci.func = NULL; +@@ -1936,7 +2056,10 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir) + + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + { +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpmin_max(as, ir); ++#else + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; +@@ -1947,6 +2070,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); + } + emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); ++#endif + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); +@@ -1967,18 +2091,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + + /* -- Comparisons --------------------------------------------------------- */ + +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP + /* SFP comparisons. */ + static void asm_sfpcomp(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; ++#if LJ_64 ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++#else + IRRef args[4]; + args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; + args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; ++#endif + +- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { ++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); +@@ -2032,11 +2162,15 @@ static void asm_comp(ASMState *as, IRIns *ir) + { + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + IROp op = ir->o; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#else + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); + emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); ++#endif + } else { + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; +@@ -2068,9 +2202,13 @@ static void asm_equal(ASMState *as, IRIns *ir) + Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? + RSET_FPR : RSET_GPR); + right = (left >> 8); left &= 255; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#else + asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); + emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); ++#endif + } else { + asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); + } +@@ -2263,7 +2401,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ +@@ -2272,6 +2410,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); ++#elif LJ_SOFTFP /* && LJ_64 */ ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); ++ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); + #else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); +diff --git a/src/lj_crecord.c b/src/lj_crecord.c +index e32ae23..fd59e28 100644 +--- a/src/lj_crecord.c ++++ b/src/lj_crecord.c +@@ -212,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, + ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); + ml[i].trofs = trofs; + i++; +- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; ++ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; + if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ + rwin = 0; + for ( ; j < i; j++) { +@@ -1130,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, + else + tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); + } +- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { ++ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { + lj_needsplit(J); + } + #if LJ_TARGET_X86 +diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h +index 8a9ee24..bb6593a 100644 +--- a/src/lj_emit_mips.h ++++ b/src/lj_emit_mips.h +@@ -12,6 +12,8 @@ static intptr_t get_k64val(IRIns *ir) + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); ++ } else if (LJ_SOFTFP && ir->o == IR_KNUM) { ++ return (intptr_t)ir_knum(ir)->u64; + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + return ir->i; /* Sign-extended. */ +diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c +index dfdee2d..849d7a2 100644 +--- a/src/lj_ffrecord.c ++++ b/src/lj_ffrecord.c +@@ -1012,7 +1012,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) + handle_num: + tra = lj_ir_tonum(J, tra); + tr = lj_ir_call(J, id, tr, trsf, tra); +- if (LJ_SOFTFP) lj_needsplit(J); ++ if (LJ_SOFTFP32) lj_needsplit(J); + break; + case STRFMT_STR: + if (!tref_isstr(tra)) { +diff --git a/src/lj_ircall.h b/src/lj_ircall.h +index 973c36e..7312006 100644 +--- a/src/lj_ircall.h ++++ b/src/lj_ircall.h +@@ -51,7 +51,7 @@ typedef struct CCallInfo { + #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) + #define CCI_XA (1u << CCI_XARGS_SHIFT) + +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) + #else + #define CCI_XNARGS(ci) CCI_NARGS((ci)) +@@ -78,13 +78,19 @@ typedef struct CCallInfo { + #define IRCALLCOND_SOFTFP_FFI(x) NULL + #endif + +-#if LJ_SOFTFP && LJ_TARGET_MIPS32 ++#if LJ_SOFTFP && LJ_TARGET_MIPS + #define IRCALLCOND_SOFTFP_MIPS(x) x + #else + #define IRCALLCOND_SOFTFP_MIPS(x) NULL + #endif + +-#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) ++#if LJ_SOFTFP && LJ_TARGET_MIPS64 ++#define IRCALLCOND_SOFTFP_MIPS64(x) x ++#else ++#define IRCALLCOND_SOFTFP_MIPS64(x) NULL ++#endif ++ ++#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) + + #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) + #define IRCALLCOND_FP64_FFI(x) x +@@ -112,6 +118,14 @@ typedef struct CCallInfo { + #define XA2_FP 0 + #endif + ++#if LJ_SOFTFP32 ++#define XA_FP32 CCI_XA ++#define XA2_FP32 (CCI_XA+CCI_XA) ++#else ++#define XA_FP32 0 ++#define XA2_FP32 0 ++#endif ++ + #if LJ_32 + #define XA_64 CCI_XA + #define XA2_64 (CCI_XA+CCI_XA) +@@ -181,20 +195,21 @@ typedef struct CCallInfo { + _(ANY, pow, 2, N, NUM, XA2_FP) \ + _(ANY, atan2, 2, N, NUM, XA2_FP) \ + _(ANY, ldexp, 2, N, NUM, XA_FP) \ +- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ +- _(SOFTFP, softfp_add, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_div, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ ++ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ ++ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ + _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ +- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ +- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ +- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ ++ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ ++ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ + _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ + _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ +- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ +- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ ++ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ ++ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ + _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ +diff --git a/src/lj_iropt.h b/src/lj_iropt.h +index 73aef0e..a59ba3f 100644 +--- a/src/lj_iropt.h ++++ b/src/lj_iropt.h +@@ -150,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); + /* Optimization passes. */ + LJ_FUNC void lj_opt_dce(jit_State *J); + LJ_FUNC int lj_opt_loop(jit_State *J); +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + LJ_FUNC void lj_opt_split(jit_State *J); + #else + #define lj_opt_split(J) UNUSED(J) +diff --git a/src/lj_jit.h b/src/lj_jit.h +index 2fa8efc..f37e792 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -374,7 +374,7 @@ enum { + ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) + + /* Set/reset flag to activate the SPLIT pass for the current trace. */ +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + #define lj_needsplit(J) (J->needsplit = 1) + #define lj_resetsplit(J) (J->needsplit = 0) + #else +@@ -437,7 +437,7 @@ typedef struct jit_State { + MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ + + PostProc postproc; /* Required post-processing after execution. */ +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + uint8_t needsplit; /* Need SPLIT pass. */ + #endif + uint8_t retryrec; /* Retry recording. */ +diff --git a/src/lj_obj.h b/src/lj_obj.h +index 52372c3..c7e4742 100644 +--- a/src/lj_obj.h ++++ b/src/lj_obj.h +@@ -924,6 +924,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) + + #if LJ_SOFTFP + LJ_ASMF int32_t lj_vm_tobit(double x); ++#if LJ_TARGET_MIPS64 ++LJ_ASMF int32_t lj_vm_tointg(double x); ++#endif + #endif + + static LJ_AINLINE int32_t lj_num2bit(lua_Number n) +diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c +index fc93520..79ac3cc 100644 +--- a/src/lj_opt_split.c ++++ b/src/lj_opt_split.c +@@ -8,7 +8,7 @@ + + #include "lj_obj.h" + +-#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) ++#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) + + #include "lj_err.h" + #include "lj_buf.h" +diff --git a/src/lj_snap.c b/src/lj_snap.c +index bb063c2..44fa379 100644 +--- a/src/lj_snap.c ++++ b/src/lj_snap.c +@@ -93,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) + (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + sn |= SNAP_NORESTORE; + } +- if (LJ_SOFTFP && irt_isnum(ir->t)) ++ if (LJ_SOFTFP32 && irt_isnum(ir->t)) + sn |= SNAP_SOFTFPNUM; + map[n++] = sn; + } +@@ -374,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) + break; + } + } +- } else if (LJ_SOFTFP && ir->o == IR_HIOP) { ++ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { + ref++; + } else if (ir->o == IR_PVAL) { + ref = ir->op1 + REF_BIAS; +@@ -486,7 +486,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + } else { + IRType t = irt_type(ir->t); + uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; +- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; ++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); + tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); + } +@@ -520,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { + if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) + snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); +- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && ++ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && + irs+1 < irlast && (irs+1)->o == IR_HIOP) + snap_pref(J, T, map, nent, seen, (irs+1)->op2); + } +@@ -579,10 +579,10 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); + val = snap_pref(J, T, map, nent, seen, irc->op1); + val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); +- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && ++ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && + irs+1 < irlast && (irs+1)->o == IR_HIOP) { + IRType t = IRT_I64; +- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) ++ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) + t = IRT_NUM; + lj_needsplit(J); + if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { +@@ -635,7 +635,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + int32_t *sps = &ex->spill[regsp_spill(rs)]; + if (irt_isinteger(t)) { + setintV(o, *sps); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + } else if (irt_isnum(t)) { + o->u64 = *(uint64_t *)sps; + #endif +@@ -660,6 +660,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + #if !LJ_SOFTFP + } else if (irt_isnum(t)) { + setnumV(o, ex->fpr[r-RID_MIN_FPR]); ++#elif LJ_64 /* && LJ_SOFTFP */ ++ } else if (irt_isnum(t)) { ++ o->u64 = ex->gpr[r-RID_MIN_GPR]; + #endif + #if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { +@@ -813,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + val = lj_tab_set(J->L, t, &tmp); + /* NOBARRIER: The table is new (marked white). */ + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); +- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { ++ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { + snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); + val->u32.hi = tmp.u32.lo; + } +@@ -874,7 +877,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) + continue; + } + snap_restoreval(J, T, ex, snapno, rfilt, ref, o); +- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { ++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { + TValue tmp; + snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); + o->u32.hi = tmp.u32.lo; +diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc +index c06270a..75b38de 100644 +--- a/src/vm_mips64.dasc ++++ b/src/vm_mips64.dasc +@@ -1980,6 +1980,38 @@ static void build_subroutines(BuildCtx *ctx) + |1: + | jr ra + |. move CRET1, r0 ++ | ++ |// FP number to int conversion with a check for soft-float. ++ |// Modifies CARG1, CRET1, CRET2, TMP0, AT. ++ |->vm_tointg: ++ |.if JIT ++ | dsll CRET2, CARG1, 1 ++ | beqz CRET2, >2 ++ |. li TMP0, 1076 ++ | dsrl AT, CRET2, 53 ++ | dsubu TMP0, TMP0, AT ++ | sltiu AT, TMP0, 54 ++ | beqz AT, >1 ++ |. dextm CRET2, CRET2, 0, 20 ++ | dinsu CRET2, AT, 21, 21 ++ | slt AT, CARG1, r0 ++ | dsrlv CRET1, CRET2, TMP0 ++ | dsubu CARG1, r0, CRET1 ++ | movn CRET1, CARG1, AT ++ | li CARG1, 64 ++ | subu TMP0, CARG1, TMP0 ++ | dsllv CRET2, CRET2, TMP0 // Integer check. ++ | sextw AT, CRET1 ++ | xor AT, CRET1, AT // Range check. ++ | jr ra ++ |. movz CRET2, AT, CRET2 ++ |1: ++ | jr ra ++ |. li CRET2, 1 ++ |2: ++ | jr ra ++ |. move CRET1, r0 ++ |.endif + |.endif + | + |.macro .ffunc_bit, name +@@ -2665,6 +2697,23 @@ static void build_subroutines(BuildCtx *ctx) + |. li CRET1, 0 + |.endif + | ++ |.macro sfmin_max, name, intins ++ |->vm_sf .. name: ++ |.if JIT and not FPU ++ | move TMP2, ra ++ | bal ->vm_sfcmpolt ++ |. nop ++ | move ra, TMP2 ++ | move TMP0, CRET1 ++ | move CRET1, CARG1 ++ | jr ra ++ |. intins CRET1, CARG2, TMP0 ++ |.endif ++ |.endmacro ++ | ++ | sfmin_max min, movz ++ | sfmin_max max, movn ++ | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- +-- +2.20.1 + diff --git a/SOURCES/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch b/SOURCES/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch new file mode 100644 index 0000000..133018d --- /dev/null +++ b/SOURCES/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch @@ -0,0 +1,26 @@ +From b0ecc6dd65a0b40e1868f20719c4f7c4880dc32d Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Thu, 8 Jun 2017 00:15:15 +0200 +Subject: [PATCH 06/72] FreeBSD/x64: Avoid changing resource limits, if not + needed. + +--- + src/lj_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_alloc.c b/src/lj_alloc.c +index 95d15d0..9fc761c 100644 +--- a/src/lj_alloc.c ++++ b/src/lj_alloc.c +@@ -343,7 +343,7 @@ static void *CALL_MMAP(size_t size) + } + #endif + +-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 ++#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 + + #include + +-- +2.20.1 + diff --git a/SOURCES/0007-Remove-unused-define.patch b/SOURCES/0007-Remove-unused-define.patch new file mode 100644 index 0000000..c4729e1 --- /dev/null +++ b/SOURCES/0007-Remove-unused-define.patch @@ -0,0 +1,28 @@ +From 6a71e71c1430e5a8f794a52cb2da66e2693db796 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 11 Jun 2017 10:02:08 +0200 +Subject: [PATCH 07/72] Remove unused define. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Suggested by 罗泽轩. +--- + src/lj_def.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/lj_def.h b/src/lj_def.h +index 2d8fff6..e67bb24 100644 +--- a/src/lj_def.h ++++ b/src/lj_def.h +@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t; + #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ + #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ + #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ +-#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ + + /* JIT compiler limits. */ + #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ +-- +2.20.1 + diff --git a/SOURCES/0008-Modify-fix-for-warning-from-ar.patch b/SOURCES/0008-Modify-fix-for-warning-from-ar.patch new file mode 100644 index 0000000..4d9b0e4 --- /dev/null +++ b/SOURCES/0008-Modify-fix-for-warning-from-ar.patch @@ -0,0 +1,33 @@ +From 82151a4514e6538086f3f5e01cb8d4b22287b14f Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 12 Jun 2017 09:24:00 +0200 +Subject: [PATCH 08/72] Modify fix for warning from 'ar'. + +--- + src/Makefile | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/Makefile b/src/Makefile +index f7f81a4..24e8c0e 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -208,7 +208,7 @@ TARGET_CC= $(STATIC_CC) + TARGET_STCC= $(STATIC_CC) + TARGET_DYNCC= $(DYNAMIC_CC) + TARGET_LD= $(CROSS)$(CC) +-TARGET_AR= $(CROSS)ar rcus 2>/dev/null ++TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +@@ -293,6 +293,7 @@ ifeq (Windows,$(TARGET_SYS)) + TARGET_XSHLDFLAGS= -shared + TARGET_DYNXLDOPTS= + else ++ TARGET_AR+= 2>/dev/null + ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-stack-protector + endif +-- +2.20.1 + diff --git a/SOURCES/0009-x64-LJ_GC64-Fix-emit_rma.patch b/SOURCES/0009-x64-LJ_GC64-Fix-emit_rma.patch new file mode 100644 index 0000000..ff59f09 --- /dev/null +++ b/SOURCES/0009-x64-LJ_GC64-Fix-emit_rma.patch @@ -0,0 +1,47 @@ +From 7e662e4f87134f1e84f7bea80933e033c5bf53a3 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 26 Jul 2017 09:52:53 +0200 +Subject: [PATCH 09/72] x64/LJ_GC64: Fix emit_rma(). + +--- + src/lj_emit_x86.h | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h +index 5207f9d..5b139bd 100644 +--- a/src/lj_emit_x86.h ++++ b/src/lj_emit_x86.h +@@ -343,9 +343,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); +- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { +- emit_rmro(as, xo, rr, rr, 0); +- emit_loadu64(as, rr, (uintptr_t)addr); ++ } else if (!checki32((intptr_t)addr)) { ++ Reg ra = (rr & 15); ++ if (xo != XO_MOV) { ++ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ ++ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; ++ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; ++ ra = RID_DISPATCH; ++ if (checku32(dispaddr)) { ++ emit_loadi(as, ra, (int32_t)dispaddr); ++ } else { /* Full-size 64 bit load. */ ++ MCode *p = as->mcp; ++ *(uint64_t *)(p-8) = dispaddr; ++ p[-9] = (MCode)(XI_MOVri+(ra&7)); ++ p[-10] = 0x48 + ((ra>>3)&1); ++ p -= 10; ++ as->mcp = p; ++ } ++ if (xo == XO_GROUP3b) emit_i8(as, i8); ++ } ++ emit_rmro(as, xo, rr, ra, 0); ++ emit_loadu64(as, ra, (uintptr_t)addr); + } else + #endif + { +-- +2.20.1 + diff --git a/SOURCES/0010-PPC-Add-soft-float-support-to-interpreter.patch b/SOURCES/0010-PPC-Add-soft-float-support-to-interpreter.patch new file mode 100644 index 0000000..52d3638 --- /dev/null +++ b/SOURCES/0010-PPC-Add-soft-float-support-to-interpreter.patch @@ -0,0 +1,2761 @@ +From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 26 Jul 2017 09:52:19 +0200 +Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter. + +Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +Sponsored by Cisco Systems, Inc. +--- + src/host/buildvm_asm.c | 2 +- + src/lj_arch.h | 29 +- + src/lj_ccall.c | 38 +- + src/lj_ccall.h | 4 +- + src/lj_ccallback.c | 30 +- + src/lj_frame.h | 2 +- + src/lj_ircall.h | 2 +- + src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++------- + 8 files changed, 1101 insertions(+), 255 deletions(-) + +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index ffd1490..43595b3 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx) + #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) + fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); + #endif +-#if LJ_TARGET_PPC && !LJ_TARGET_PS3 ++#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP + /* Hard-float ABI. */ + fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); + #endif +diff --git a/src/lj_arch.h b/src/lj_arch.h +index b770564..0145a7c 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -254,6 +254,29 @@ + #else + #define LJ_ARCH_BITS 32 + #define LJ_ARCH_NAME "ppc" ++ ++#if !defined(LJ_ARCH_HASFPU) ++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) ++#define LJ_ARCH_HASFPU 0 ++#else ++#define LJ_ARCH_HASFPU 1 ++#endif ++#endif ++ ++#if !defined(LJ_ABI_SOFTFP) ++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) ++#define LJ_ABI_SOFTFP 1 ++#else ++#define LJ_ABI_SOFTFP 0 ++#endif ++#endif ++#endif ++ ++#if LJ_ABI_SOFTFP ++#define LJ_ARCH_NOJIT 1 /* NYI */ ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++#else ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE + #endif + + #define LJ_TARGET_PPC 1 +@@ -262,7 +285,6 @@ + #define LJ_TARGET_MASKSHIFT 0 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ +-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE + + #if LJ_TARGET_CONSOLE + #define LJ_ARCH_PPC32ON64 1 +@@ -415,16 +437,13 @@ + #error "No support for ILP32 model on ARM64" + #endif + #elif LJ_TARGET_PPC +-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +-#error "No support for PowerPC CPUs without double-precision FPU" +-#endif + #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE + #error "No support for little-endian PPC32" + #endif + #if LJ_ARCH_PPC64 + #error "No support for PowerPC 64 bit mode (yet)" + #endif +-#ifdef __NO_FPRS__ ++#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) + #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" + #endif + #elif LJ_TARGET_MIPS32 +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index 5c252e5..799be48 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -387,6 +387,24 @@ + #define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + ++#define CCALL_HANDLE_GPR \ ++ /* Try to pass argument in GPRs. */ \ ++ if (n > 1) { \ ++ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ ++ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ ++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ ++ else if (ngpr + n > maxgpr) \ ++ ngpr = maxgpr; /* Prevent reordering. */ \ ++ } \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ ++#if LJ_ABI_SOFTFP ++#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR ++#else + #define CCALL_HANDLE_REGARG \ + if (isfp) { /* Try to pass argument in FPRs. */ \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ +@@ -395,24 +413,16 @@ + d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + goto done; \ + } \ +- } else { /* Try to pass argument in GPRs. */ \ +- if (n > 1) { \ +- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ +- if (ctype_isinteger(d->info)) \ +- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ +- else if (ngpr + n > maxgpr) \ +- ngpr = maxgpr; /* Prevent reordering. */ \ +- } \ +- if (ngpr + n <= maxgpr) { \ +- dp = &cc->gpr[ngpr]; \ +- ngpr += n; \ +- goto done; \ +- } \ ++ } else { \ ++ CCALL_HANDLE_GPR \ + } ++#endif + ++#if !LJ_ABI_SOFTFP + #define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ ++#endif + + #elif LJ_TARGET_MIPS32 + /* -- MIPS o32 calling conventions ---------------------------------------- */ +@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + } + if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ + +-#if LJ_TARGET_X64 || LJ_TARGET_PPC ++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = nsp; +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index 59f6648..6efa48c 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -86,9 +86,9 @@ typedef union FPRArg { + #elif LJ_TARGET_PPC + + #define CCALL_NARG_GPR 8 +-#define CCALL_NARG_FPR 8 ++#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) + #define CCALL_NRET_GPR 4 /* For complex double. */ +-#define CCALL_NRET_FPR 1 ++#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) + #define CCALL_SPS_EXTRA 4 + #define CCALL_SPS_FREE 0 + +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 846827b..03494a7 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts) + + #elif LJ_TARGET_PPC + ++#define CALLBACK_HANDLE_GPR \ ++ if (n > 1) { \ ++ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ ++ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \ ++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ ++ } \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } ++ ++#if LJ_ABI_SOFTFP ++#define CALLBACK_HANDLE_REGARG \ ++ CALLBACK_HANDLE_GPR \ ++ UNUSED(isfp); ++#else + #define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ +@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts) + goto done; \ + } \ + } else { /* Try to pass argument in GPRs. */ \ +- if (n > 1) { \ +- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ +- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ +- } \ +- if (ngpr + n <= maxgpr) { \ +- sp = &cts->cb.gpr[ngpr]; \ +- ngpr += n; \ +- goto done; \ +- } \ ++ CALLBACK_HANDLE_GPR \ + } ++#endif + ++#if !LJ_ABI_SOFTFP + #define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ ++#endif + + #elif LJ_TARGET_MIPS32 + +diff --git a/src/lj_frame.h b/src/lj_frame.h +index 19c49a4..04cb5a3 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #define CFRAME_OFS_L 36 + #define CFRAME_OFS_PC 32 + #define CFRAME_OFS_MULTRES 28 +-#define CFRAME_SIZE 272 ++#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) + #define CFRAME_SHIFT_MULTRES 3 + #endif + #elif LJ_TARGET_MIPS32 +diff --git a/src/lj_ircall.h b/src/lj_ircall.h +index 7312006..9b3883b 100644 +--- a/src/lj_ircall.h ++++ b/src/lj_ircall.h +@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; + #define fp64_f2l __aeabi_f2lz + #define fp64_f2ul __aeabi_f2ulz + #endif +-#elif LJ_TARGET_MIPS ++#elif LJ_TARGET_MIPS || LJ_TARGET_PPC + #define softfp_add __adddf3 + #define softfp_sub __subdf3 + #define softfp_mul __muldf3 +diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc +index b4260eb..0839668 100644 +--- a/src/vm_ppc.dasc ++++ b/src/vm_ppc.dasc +@@ -103,6 +103,18 @@ + |// Fixed register assignments for the interpreter. + |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) + | ++|.macro .FPU, a, b ++|.if FPU ++| a, b ++|.endif ++|.endmacro ++| ++|.macro .FPU, a, b, c ++|.if FPU ++| a, b, c ++|.endif ++|.endmacro ++| + |// The following must be C callee-save (but BASE is often refetched). + |.define BASE, r14 // Base of current Lua stack frame. + |.define KBASE, r15 // Constants of current Lua function. +@@ -116,8 +128,10 @@ + |.define TISNUM, r22 + |.define TISNIL, r23 + |.define ZERO, r24 ++|.if FPU + |.define TOBIT, f30 // 2^52 + 2^51. + |.define TONUM, f31 // 2^52 + 2^51 + 2^31. ++|.endif + | + |// The following temporaries are not saved across C calls, except for RA. + |.define RA, r20 // Callee-save. +@@ -133,6 +147,7 @@ + | + |// Saved temporaries. + |.define SAVE0, r21 ++|.define SAVE1, r25 + | + |// Calling conventions. + |.define CARG1, r3 +@@ -141,8 +156,10 @@ + |.define CARG4, r6 // Overlaps TMP3. + |.define CARG5, r7 // Overlaps INS. + | ++|.if FPU + |.define FARG1, f1 + |.define FARG2, f2 ++|.endif + | + |.define CRET1, r3 + |.define CRET2, r4 +@@ -213,10 +230,16 @@ + |.endif + |.else + | ++|.if FPU + |.define SAVE_LR, 276(sp) + |.define CFRAME_SPACE, 272 // Delta for sp. + |// Back chain for sp: 272(sp) <-- sp entering interpreter + |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. ++|.else ++|.define SAVE_LR, 132(sp) ++|.define CFRAME_SPACE, 128 // Delta for sp. ++|// Back chain for sp: 128(sp) <-- sp entering interpreter ++|.endif + |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. + |.define SAVE_CR, 52(sp) // 32 bit CR save. + |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. +@@ -226,16 +249,25 @@ + |.define SAVE_PC, 32(sp) + |.define SAVE_MULTRES, 28(sp) + |.define UNUSED1, 24(sp) ++|.if FPU + |.define TMPD_LO, 20(sp) + |.define TMPD_HI, 16(sp) + |.define TONUM_LO, 12(sp) + |.define TONUM_HI, 8(sp) ++|.else ++|.define SFSAVE_4, 20(sp) ++|.define SFSAVE_3, 16(sp) ++|.define SFSAVE_2, 12(sp) ++|.define SFSAVE_1, 8(sp) ++|.endif + |// Next frame lr: 4(sp) + |// Back chain for sp: 0(sp) <-- sp while in interpreter + | ++|.if FPU + |.define TMPD_BLO, 23(sp) + |.define TMPD, TMPD_HI + |.define TONUM_D, TONUM_HI ++|.endif + | + |.endif + | +@@ -245,7 +277,7 @@ + |.else + | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) + |.endif +-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) ++| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) + |.endmacro + |.macro rest_, reg + |.if GPR64 +@@ -253,7 +285,7 @@ + |.else + | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) + |.endif +-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) ++| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) + |.endmacro + | + |.macro saveregs +@@ -323,6 +355,7 @@ + |// Trap for not-yet-implemented parts. + |.macro NYI; tw 4, sp, sp; .endmacro + | ++|.if FPU + |// int/FP conversions. + |.macro tonum_i, freg, reg + | xoris reg, reg, 0x8000 +@@ -346,6 +379,7 @@ + |.macro toint, reg, freg + | toint reg, freg, freg + |.endmacro ++|.endif + | + |//----------------------------------------------------------------------- + | +@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx) + | beq >2 + |1: + | addic. TMP1, TMP1, -8 ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | addi RA, RA, 8 ++ |.if FPU + | stfd f0, 0(BASE) ++ |.else ++ | stw CARG1, 0(BASE) ++ | stw CARG2, 4(BASE) ++ |.endif + | addi BASE, BASE, 8 + | bney <1 + | +@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx) + | .toc ld TOCREG, SAVE_TOC + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp BASE, L->base +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | lwz DISPATCH, L->glref // Setup pointer to dispatch table. + | li ZERO, 0 +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | li TMP1, LJ_TFALSE +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | li TISNIL, LJ_TNIL + | li_vmstate INTERP +- | lfs TOBIT, TMPD ++ | .FPU lfs TOBIT, TMPD + | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | la RA, -8(BASE) // Results start at BASE-8. +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | addi DISPATCH, DISPATCH, GG_G2DISP + | stw TMP1, 0(RA) // Prepend false to error message. + | li RD, 16 // 2 results: false + error message. + | st_vmstate +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | b ->vm_returnc + | + |//----------------------------------------------------------------------- +@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx) + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp TMP1, L->top + | lwz PC, FRAME_PC(BASE) +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | stb CARG3, L->status +- | stw TMP3, TMPD +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD + | sub RD, TMP1, BASE +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | addi RD, RD, 8 +- | stw TMP0, TONUM_HI ++ | .FPU stw TMP0, TONUM_HI + | li_vmstate INTERP + | li ZERO, 0 + | st_vmstate + | andix. TMP0, PC, FRAME_TYPE + | mr MULTRES, RD +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | li TISNIL, LJ_TNIL + | beq ->BC_RET_Z + | b ->vm_return +@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx) + | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp TMP1, L->top +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | add PC, PC, BASE +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | li ZERO, 0 +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD + | sub PC, PC, TMP2 // PC = frame delta + frame type +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | sub NARGS8:RC, TMP1, BASE +- | stw TMP0, TONUM_HI ++ | .FPU stw TMP0, TONUM_HI + | li_vmstate INTERP +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | li TISNIL, LJ_TNIL + | st_vmstate + | +@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx) + | lwz INS, -4(PC) + | subi CARG2, RB, 16 + | decode_RB8 SAVE0, INS ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz TMP2, 0(RA) ++ | lwz TMP3, 4(RA) ++ |.endif + | add TMP1, BASE, SAVE0 + | stp BASE, L->base + | cmplw TMP1, CARG2 + | sub CARG3, CARG2, TMP1 + | decode_RA8 RA, INS ++ |.if FPU + | stfd f0, 0(CARG2) ++ |.else ++ | stw TMP2, 0(CARG2) ++ | stw TMP3, 4(CARG2) ++ |.endif + | bney ->BC_CAT_Z ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP2, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | b ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- +@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns TValue * (finished) or NULL (metamethod). + | cmplwi CRET1, 0 + | beq >3 ++ |.if FPU + | lfd f0, 0(CRET1) ++ |.else ++ | lwz TMP0, 0(CRET1) ++ | lwz TMP1, 4(CRET1) ++ |.endif + | ins_next1 ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + | + |3: // Call __index metamethod. +@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns cTValue * or NULL. + | cmplwi CRET1, 0 + | beq >1 ++ |.if FPU + | lfd f14, 0(CRET1) ++ |.else ++ | lwz SAVE0, 0(CRET1) ++ | lwz SAVE1, 4(CRET1) ++ |.endif + | b ->BC_TGETR_Z + |1: + | stwx TISNIL, BASE, RA +@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx) + | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | cmplwi CRET1, 0 ++ |.if FPU + | lfdx f0, BASE, RA ++ |.else ++ | lwzux TMP2, RA, BASE ++ | lwz TMP3, 4(RA) ++ |.endif + | beq >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | ins_next1 ++ |.if FPU + | stfd f0, 0(CRET1) ++ |.else ++ | stw TMP2, 0(CRET1) ++ | stw TMP3, 4(CRET1) ++ |.endif + | ins_next2 + | + |3: // Call __newindex metamethod. +@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx) + | add PC, TMP1, BASE + | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) ++ |.if FPU + | stfd f0, 16(BASE) // Copy value to third argument. ++ |.else ++ | stw TMP2, 16(BASE) ++ | stw TMP3, 20(BASE) ++ |.endif + | b ->vm_call_dispatch_f + | + |->vmeta_tsetr: +@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx) + | stw PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. ++ |.if FPU + | stfd f14, 0(CRET1) ++ |.else ++ | stw SAVE0, 0(CRET1) ++ | stw SAVE1, 4(CRET1) ++ |.endif + | b ->cont_nop + | + |//-- Comparison metamethods --------------------------------------------- +@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) + | + |->cont_ra: // RA = resultptr + | lwz INS, -4(PC) ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | decode_RA8 TMP1, INS ++ |.if FPU + | stfdx f0, BASE, TMP1 ++ |.else ++ | stwux CARG1, TMP1, BASE ++ | stw CARG2, 4(TMP1) ++ |.endif + | b ->cont_nop + | + |->cont_condt: // RA = resultptr +@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_n, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) ++ | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | blt ->fff_fallback +- | checknum CARG3; bge ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ | lwz CARG3, 8(BASE) + | lfd FARG2, 8(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ | lwz CARG3, 8(BASE) ++ | lwz CARG4, 12(BASE) ++ |.endif + | blt ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback + | checknum CARG3; bge ->fff_fallback +- | checknum CARG4; bge ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. +@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) + | bge cr1, ->fff_fallback + | stw CARG3, 0(RA) + | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. ++ | addi TMP1, BASE, 8 ++ | add TMP2, RA, NARGS8:RC + | stw CARG1, 4(RA) + | beq ->fff_res // Done if exactly 1 argument. +- | li TMP1, 8 +- | subi RC, RC, 8 + |1: +- | cmplw TMP1, RC +- | lfdx f0, BASE, TMP1 +- | stfdx f0, RA, TMP1 ++ | cmplw TMP1, TMP2 ++ |.if FPU ++ | lfd f0, 0(TMP1) ++ | stfd f0, 0(TMP1) ++ |.else ++ | lwz CARG1, 0(TMP1) ++ | lwz CARG2, 4(TMP1) ++ | stw CARG1, -8(TMP1) ++ | stw CARG2, -4(TMP1) ++ |.endif + | addi TMP1, TMP1, 8 + | bney <1 + | b ->fff_res +@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) + | orc TMP1, TMP2, TMP0 + | addi TMP1, TMP1, ~LJ_TISNUM+1 + | slwi TMP1, TMP1, 3 ++ |.if FPU + | la TMP2, CFUNC:RB->upvalue + | lfdx FARG1, TMP2, TMP1 ++ |.else ++ | add TMP1, CFUNC:RB, TMP1 ++ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi ++ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo ++ |.endif + | b ->fff_resn + | + |//-- Base library: getters and setters --------------------------------- +@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG1, L + | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. ++ |.if FPU + | lfd FARG1, 0(CRET1) ++ |.else ++ | lwz CARG2, 4(CRET1) ++ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. ++ |.endif + | b ->fff_resn + | + |//-- Base library: conversions ------------------------------------------ +@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) + | // Only handles the number case inline (without a base argument). + | cmplwi NARGS8:RC, 8 + | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | bne ->fff_fallback // Exactly one argument. + | checknum CARG1; bgt ->fff_fallback + | b ->fff_resn +@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx) + | cmplwi CRET1, 0 + | li CARG3, LJ_TNIL + | beq ->fff_restv // End of traversal: return nil. +- | lfd f0, 8(BASE) // Copy key and value to results. + | la RA, -8(BASE) ++ |.if FPU ++ | lfd f0, 8(BASE) // Copy key and value to results. + | lfd f1, 16(BASE) + | stfd f0, 0(RA) +- | li RD, (2+1)*8 + | stfd f1, 8(RA) ++ |.else ++ | lwz CARG1, 8(BASE) ++ | lwz CARG2, 12(BASE) ++ | lwz CARG3, 16(BASE) ++ | lwz CARG4, 20(BASE) ++ | stw CARG1, 0(RA) ++ | stw CARG2, 4(RA) ++ | stw CARG3, 8(RA) ++ | stw CARG4, 12(RA) ++ |.endif ++ | li RD, (2+1)*8 + | b ->fff_res + | + |.ffunc_1 pairs +@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx) + | bne ->fff_fallback + #if LJ_52 + | lwz TAB:TMP2, TAB:CARG1->metatable ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | cmplwi TAB:TMP2, 0 + | la RA, -8(BASE) + | bne ->fff_fallback + #else ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | la RA, -8(BASE) + #endif + | stw TISNIL, 8(BASE) + | li RD, (3+1)*8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw TMP0, 0(RA) ++ | stw TMP1, 4(RA) ++ |.endif + | b ->fff_res + | + |.ffunc ipairs_aux +@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx) + | stfd FARG2, 0(RA) + |.endif + | ble >2 // Not in array part? ++ |.if FPU + | lwzx TMP2, TMP1, TMP3 + | lfdx f0, TMP1, TMP3 ++ |.else ++ | lwzux TMP2, TMP1, TMP3 ++ | lwz TMP3, 4(TMP1) ++ |.endif + |1: + | checknil TMP2 + | li RD, (0+1)*8 + | beq ->fff_res // End of iteration, return 0 results. + | li RD, (2+1)*8 ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw TMP2, 8(RA) ++ | stw TMP3, 12(RA) ++ |.endif + | b ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lwz TMP0, TAB:CARG1->hmask +@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx) + | li RD, (0+1)*8 + | beq ->fff_res + | lwz TMP2, 0(CRET1) ++ |.if FPU + | lfd f0, 0(CRET1) ++ |.else ++ | lwz TMP3, 4(CRET1) ++ |.endif + | b <1 + | + |.ffunc_1 ipairs +@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx) + | bne ->fff_fallback + #if LJ_52 + | lwz TAB:TMP2, TAB:CARG1->metatable ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | cmplwi TAB:TMP2, 0 + | la RA, -8(BASE) + | bne ->fff_fallback + #else ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | la RA, -8(BASE) + #endif + |.if DUALNUM +@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | stw ZERO, 12(BASE) + | li RD, (3+1)*8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw TMP0, 0(RA) ++ | stw TMP1, 4(RA) ++ |.endif + | b ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- +@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc xpcall + | cmplwi NARGS8:RC, 16 +- | lwz CARG4, 8(BASE) ++ | lwz CARG3, 8(BASE) ++ |.if FPU + | lfd FARG2, 8(BASE) + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ | lwz CARG4, 12(BASE) ++ |.endif + | blt ->fff_fallback + | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | mr TMP2, BASE +- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. ++ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. + | la BASE, 16(BASE) + | // Remember active hook before pcall. + | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 ++ |.if FPU + | stfd FARG2, 0(TMP2) // Swap function and traceback. +- | subi NARGS8:RC, NARGS8:RC, 16 + | stfd FARG1, 8(TMP2) ++ |.else ++ | stw CARG3, 0(TMP2) ++ | stw CARG4, 4(TMP2) ++ | stw CARG1, 8(TMP2) ++ | stw CARG2, 12(TMP2) ++ |.endif ++ | subi NARGS8:RC, NARGS8:RC, 16 + | addi PC, TMP1, 16+FRAME_PCALL + | b ->vm_call_dispatch + | +@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx) + | stp BASE, L->top + |2: // Move args to coroutine. + | cmpw TMP1, NARGS8:RC ++ |.if FPU + | lfdx f0, BASE, TMP1 ++ |.else ++ | add CARG3, BASE, TMP1 ++ | lwz TMP2, 0(CARG3) ++ | lwz TMP3, 4(CARG3) ++ |.endif + | beq >3 ++ |.if FPU + | stfdx f0, CARG2, TMP1 ++ |.else ++ | add CARG3, CARG2, TMP1 ++ | stw TMP2, 0(CARG3) ++ | stw TMP3, 4(CARG3) ++ |.endif + | addi TMP1, TMP1, 8 + | b <2 + |3: +@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx) + | stp TMP2, L:SAVE0->top // Clear coroutine stack. + |5: // Move results from coroutine. + | cmplw TMP1, TMP3 ++ |.if FPU + | lfdx f0, TMP2, TMP1 + | stfdx f0, BASE, TMP1 ++ |.else ++ | add CARG3, TMP2, TMP1 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ | add CARG3, BASE, TMP1 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | addi TMP1, TMP1, 8 + | bne <5 + |6: +@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx) + | andix. TMP0, PC, FRAME_TYPE + | la TMP3, -8(TMP3) + | li TMP1, LJ_TFALSE ++ |.if FPU + | lfd f0, 0(TMP3) ++ |.else ++ | lwz CARG1, 0(TMP3) ++ | lwz CARG2, 4(TMP3) ++ |.endif + | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. + | li RD, (2+1)*8 + | stw TMP1, -8(BASE) // Prepend false to results. + | la RA, -8(BASE) ++ |.if FPU + | stfd f0, 0(BASE) // Copy error message. ++ |.else ++ | stw CARG1, 0(BASE) // Copy error message. ++ | stw CARG2, 4(BASE) ++ |.endif + | b <7 + |.else + | mr CARG1, L +@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx) + | lus CARG1, 0x8000 // -(2^31). + | beqy ->fff_resi + |5: ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif + | blex func + | b ->fff_resn + |.endmacro +@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc math_log + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) +- | lfd FARG1, 0(BASE) ++ | lwz CARG1, 0(BASE) + | bne ->fff_fallback // Need exactly 1 argument. +- | checknum CARG3; bge ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback ++ |.if FPU ++ | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | blex log + | b ->fff_resn + | +@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx) + |.if DUALNUM + |.ffunc math_ldexp + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz TMP0, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif ++ | lwz TMP1, 8(BASE) + |.if GPR64 + | lwz CARG2, 12(BASE) +- |.else ++ |.elif FPU + | lwz CARG1, 12(BASE) ++ |.else ++ | lwz CARG3, 12(BASE) + |.endif + | blt ->fff_fallback +- | checknum CARG3; bge ->fff_fallback +- | checknum CARG4; bne ->fff_fallback ++ | checknum TMP0; bge ->fff_fallback ++ | checknum TMP1; bne ->fff_fallback + |.else + |.ffunc_nn math_ldexp + |.if GPR64 +@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc_n math_frexp + |.if GPR64 + | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) +- |.else ++ |.elif FPU + | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) ++ |.else ++ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + |.endif + | lwz PC, FRAME_PC(BASE) + | blex frexp +@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx) + |.if not DUALNUM + | tonum_i FARG2, TMP1 + |.endif ++ |.if FPU + | stfd FARG1, 0(RA) ++ |.else ++ | stw CRET1, 0(RA) ++ | stw CRET2, 4(RA) ++ |.endif + | li RD, (2+1)*8 + |.if DUALNUM + | stw TISNUM, 8(RA) +@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc_n math_modf + |.if GPR64 + | la CARG2, -8(BASE) +- |.else ++ |.elif FPU + | la CARG1, -8(BASE) ++ |.else ++ | la CARG3, -8(BASE) + |.endif + | lwz PC, FRAME_PC(BASE) + | blex modf + | la RA, -8(BASE) ++ |.if FPU + | stfd FARG1, 0(BASE) ++ |.else ++ | stw CRET1, 0(BASE) ++ | stw CRET2, 4(BASE) ++ |.endif + | li RD, (2+1)*8 + | b ->fff_res + | +@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx) + |.if DUALNUM + | .ffunc_1 name + | checknum CARG3 +- | addi TMP1, BASE, 8 +- | add TMP2, BASE, NARGS8:RC ++ | addi SAVE0, BASE, 8 ++ | add SAVE1, BASE, NARGS8:RC + | bne >4 + |1: // Handle integers. +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 +- | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 ++ | lwz CARG2, 4(SAVE0) + | bge cr1, ->fff_resi + | checknum CARG4 + | xoris TMP0, CARG1, 0x8000 +@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx) + |.if GPR64 + | rldicl CARG1, CARG1, 0, 32 + |.endif +- | addi TMP1, TMP1, 8 ++ | addi SAVE0, SAVE0, 8 + | b <1 + |3: + | bge ->fff_fallback + | // Convert intermediate result to number and continue below. ++ |.if FPU + | tonum_i FARG1, CARG1 +- | lfd FARG2, 0(TMP1) ++ | lfd FARG2, 0(SAVE0) ++ |.else ++ | mr CARG2, CARG1 ++ | bl ->vm_sfi2d_1 ++ | lwz CARG3, 0(SAVE0) ++ | lwz CARG4, 4(SAVE0) ++ |.endif + | b >6 + |4: ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif + | bge ->fff_fallback + |5: // Handle numbers. +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 +- | lfd FARG2, 0(TMP1) ++ | lwz CARG3, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 ++ |.if FPU ++ | lfd FARG2, 0(SAVE0) ++ |.else ++ | lwz CARG4, 4(SAVE0) ++ |.endif + | bge cr1, ->fff_resn +- | checknum CARG4; bge >7 ++ | checknum CARG3; bge >7 + |6: ++ | addi SAVE0, SAVE0, 8 ++ |.if FPU + | fsub f0, FARG1, FARG2 +- | addi TMP1, TMP1, 8 + |.if ismax + | fsel FARG1, f0, FARG1, FARG2 + |.else + | fsel FARG1, f0, FARG2, FARG1 + |.endif ++ |.else ++ | stw CARG1, SFSAVE_1 ++ | stw CARG2, SFSAVE_2 ++ | stw CARG3, SFSAVE_3 ++ | stw CARG4, SFSAVE_4 ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.if ismax ++ | blt >8 ++ |.else ++ | bge >8 ++ |.endif ++ | lwz CARG1, SFSAVE_1 ++ | lwz CARG2, SFSAVE_2 ++ | b <5 ++ |8: ++ | lwz CARG1, SFSAVE_3 ++ | lwz CARG2, SFSAVE_4 ++ |.endif + | b <5 + |7: // Convert integer to number and continue above. +- | lwz CARG2, 4(TMP1) ++ | lwz CARG3, 4(SAVE0) + | bne ->fff_fallback +- | tonum_i FARG2, CARG2 ++ |.if FPU ++ | tonum_i FARG2, CARG3 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b <6 + |.else + | .ffunc_n name +@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx) + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name +- | addi TMP1, BASE, 8 +- | add TMP2, BASE, NARGS8:RC ++ | addi SAVE0, BASE, 8 ++ | add SAVE1, BASE, NARGS8:RC + |1: +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 ++ | lwz CARG4, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 + |.if DUALNUM +- | lwz CARG2, 4(TMP1) ++ | lwz CARG2, 4(SAVE0) + |.else +- | lfd FARG1, 0(TMP1) ++ | lfd FARG1, 0(SAVE0) + |.endif + | bgey cr1, ->fff_resi + | checknum CARG4 + |.if DUALNUM ++ |.if FPU + | bnel ->fff_bitop_fb + |.else ++ | beq >3 ++ | stw CARG1, SFSAVE_1 ++ | bl ->fff_bitop_fb ++ | mr CARG2, CARG1 ++ | lwz CARG1, SFSAVE_1 ++ |3: ++ |.endif ++ |.else + | fadd FARG1, FARG1, TOBIT + | bge ->fff_fallback + | stfd FARG1, TMPD + | lwz CARG2, TMPD_LO + |.endif + | ins CARG1, CARG1, CARG2 +- | addi TMP1, TMP1, 8 ++ | addi SAVE0, SAVE0, 8 + | b <1 + |.endmacro + | +@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_bit_sh, name, ins, shmod + |.if DUALNUM + | .ffunc_2 bit_..name ++ |.if FPU + | checknum CARG3; bnel ->fff_tobit_fb ++ |.else ++ | checknum CARG3; beq >1 ++ | bl ->fff_tobit_fb ++ | lwz CARG2, 12(BASE) // Conversion polluted CARG2. ++ |1: ++ |.endif + | // Note: no inline conversion from number for 2nd argument! + | checknum CARG4; bne ->fff_fallback + |.else +@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx) + |->fff_resn: + | lwz PC, FRAME_PC(BASE) + | la RA, -8(BASE) ++ |.if FPU + | stfd FARG1, -8(BASE) ++ |.else ++ | stw CARG1, -8(BASE) ++ | stw CARG2, -4(BASE) ++ |.endif + | b ->fff_res1 + | + |// Fallback FP number to bit conversion. + |->fff_tobit_fb: + |.if DUALNUM ++ |.if FPU + | lfd FARG1, 0(BASE) + | bgt ->fff_fallback + | fadd FARG1, FARG1, TOBIT + | stfd FARG1, TMPD + | lwz CARG1, TMPD_LO + | blr ++ |.else ++ | bgt ->fff_fallback ++ | mr CARG2, CARG1 ++ | mr CARG1, CARG3 ++ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. ++ |->vm_tobit: ++ | slwi TMP2, CARG1, 1 ++ | addis TMP2, TMP2, 0x0020 ++ | cmpwi TMP2, 0 ++ | bge >2 ++ | li TMP1, 0x3e0 ++ | srawi TMP2, TMP2, 21 ++ | not TMP1, TMP1 ++ | sub. TMP2, TMP1, TMP2 ++ | cmpwi cr7, CARG1, 0 ++ | blt >1 ++ | slwi TMP1, CARG1, 11 ++ | srwi TMP0, CARG2, 21 ++ | oris TMP1, TMP1, 0x8000 ++ | or TMP1, TMP1, TMP0 ++ | srw CARG1, TMP1, TMP2 ++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. ++ | neg CARG1, CARG1 ++ | blr ++ |1: ++ | addi TMP2, TMP2, 21 ++ | srw TMP1, CARG2, TMP2 ++ | slwi CARG2, CARG1, 12 ++ | subfic TMP2, TMP2, 20 ++ | slw TMP0, CARG2, TMP2 ++ | or CARG1, TMP1, TMP0 ++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. ++ | neg CARG1, CARG1 ++ | blr ++ |2: ++ | li CARG1, 0 ++ | blr ++ |.endif + |.endif + |->fff_bitop_fb: + |.if DUALNUM +- | lfd FARG1, 0(TMP1) ++ |.if FPU ++ | lfd FARG1, 0(SAVE0) + | bgt ->fff_fallback + | fadd FARG1, FARG1, TOBIT + | stfd FARG1, TMPD + | lwz CARG2, TMPD_LO + | blr ++ |.else ++ | bgt ->fff_fallback ++ | mr CARG1, CARG4 ++ | b ->vm_tobit ++ |.endif + |.endif + | + |//----------------------------------------------------------------------- +@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx) + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | addic. TMP1, TMP1, -8 + | addi RA, RA, 8 ++ |.if FPU + | stfdx f0, BASE, RC ++ |.else ++ | add CARG3, BASE, RC ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | addi RC, RC, 8 + | bne <1 + |2: +@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx) + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b, c, d ++ |.if FPU + | stfd f..a, 16+a*8(sp) + | stfd f..b, 16+b*8(sp) + | stfd f..c, 16+c*8(sp) + | stfd f..d, 16+d*8(sp) ++ |.endif + |.endmacro + | + |->vm_exit_handler: +@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx) + | lwz KBASE, PC2PROTO(k)(TMP1) + | // Setup type comparison constants. + | li TISNUM, LJ_TISNUM +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). +- | stw TMP3, TMPD ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU stw TMP3, TMPD + | li ZERO, 0 +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | li TISNIL, LJ_TNIL +- | stw TMP0, TONUM_HI +- | lfs TONUM, TMPD ++ | .FPU stw TMP0, TONUM_HI ++ | .FPU lfs TONUM, TMPD + | // Modified copy of ins_next which handles function header dispatch, too. + | lwz INS, 0(PC) + | addi PC, PC, 4 +@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | +- |// NYI: Use internal implementations of floor, ceil, trunc. ++ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. ++ | ++ |.macro sfi2d, AHI, ALO ++ |.if not FPU ++ | mr. AHI, ALO ++ | bclr 12, 2 // Handle zero first. ++ | srawi TMP0, ALO, 31 ++ | xor TMP1, ALO, TMP0 ++ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. ++ | cntlzw AHI, TMP1 ++ | andix. TMP0, TMP0, 0x800 // Mask sign bit. ++ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. ++ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. ++ | slwi ALO, TMP1, 21 ++ | or AHI, AHI, TMP0 // Sign | Exponent. ++ | srwi TMP1, TMP1, 11 ++ | slwi AHI, AHI, 20 // Align left. ++ | add AHI, AHI, TMP1 // Add mantissa, increment exponent. ++ | blr ++ |.endif ++ |.endmacro ++ | ++ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. ++ |->vm_sfi2d_1: ++ | sfi2d CARG1, CARG2 ++ | ++ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. ++ |->vm_sfi2d_2: ++ | sfi2d CARG3, CARG4 + | + |->vm_modi: + | divwo. TMP0, CARG1, CARG2 +@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx) + | addi DISPATCH, r12, GG_G2DISP + | stw r11, CTSTATE->cb.slot + | stw r3, CTSTATE->cb.gpr[0] +- | stfd f1, CTSTATE->cb.fpr[0] ++ | .FPU stfd f1, CTSTATE->cb.fpr[0] + | stw r4, CTSTATE->cb.gpr[1] +- | stfd f2, CTSTATE->cb.fpr[1] ++ | .FPU stfd f2, CTSTATE->cb.fpr[1] + | stw r5, CTSTATE->cb.gpr[2] +- | stfd f3, CTSTATE->cb.fpr[2] ++ | .FPU stfd f3, CTSTATE->cb.fpr[2] + | stw r6, CTSTATE->cb.gpr[3] +- | stfd f4, CTSTATE->cb.fpr[3] ++ | .FPU stfd f4, CTSTATE->cb.fpr[3] + | stw r7, CTSTATE->cb.gpr[4] +- | stfd f5, CTSTATE->cb.fpr[4] ++ | .FPU stfd f5, CTSTATE->cb.fpr[4] + | stw r8, CTSTATE->cb.gpr[5] +- | stfd f6, CTSTATE->cb.fpr[5] ++ | .FPU stfd f6, CTSTATE->cb.fpr[5] + | stw r9, CTSTATE->cb.gpr[6] +- | stfd f7, CTSTATE->cb.fpr[6] ++ | .FPU stfd f7, CTSTATE->cb.fpr[6] + | stw r10, CTSTATE->cb.gpr[7] +- | stfd f8, CTSTATE->cb.fpr[7] ++ | .FPU stfd f8, CTSTATE->cb.fpr[7] + | addi TMP0, sp, CFRAME_SPACE+8 + | stw TMP0, CTSTATE->cb.stack + | mr CARG1, CTSTATE +@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx) + | lp BASE, L:CRET1->base + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp RC, L:CRET1->top +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li ZERO, 0 + | mr L, CRET1 +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | lwz LFUNC:RB, FRAME_FUNC(BASE) +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | stw TMP0, TONUM_HI ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU stw TMP0, TONUM_HI + | li TISNIL, LJ_TNIL + | li_vmstate INTERP +- | lfs TOBIT, TMPD +- | stw TMP3, TMPD ++ | .FPU lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD + | sub RC, RC, BASE + | st_vmstate +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | ins_callt + |.endif + | +@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) + | lwz CRET1, CTSTATE->cb.gpr[0] +- | lfd FARG1, CTSTATE->cb.fpr[0] ++ | .FPU lfd FARG1, CTSTATE->cb.fpr[0] + | lwz CRET2, CTSTATE->cb.gpr[1] + | b ->vm_leave_unw + |.endif +@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx) + | bge <1 + |2: + | bney cr1, >3 +- | lfd f1, CCSTATE->fpr[0] +- | lfd f2, CCSTATE->fpr[1] +- | lfd f3, CCSTATE->fpr[2] +- | lfd f4, CCSTATE->fpr[3] +- | lfd f5, CCSTATE->fpr[4] +- | lfd f6, CCSTATE->fpr[5] +- | lfd f7, CCSTATE->fpr[6] +- | lfd f8, CCSTATE->fpr[7] ++ | .FPU lfd f1, CCSTATE->fpr[0] ++ | .FPU lfd f2, CCSTATE->fpr[1] ++ | .FPU lfd f3, CCSTATE->fpr[2] ++ | .FPU lfd f4, CCSTATE->fpr[3] ++ | .FPU lfd f5, CCSTATE->fpr[4] ++ | .FPU lfd f6, CCSTATE->fpr[5] ++ | .FPU lfd f7, CCSTATE->fpr[6] ++ | .FPU lfd f8, CCSTATE->fpr[7] + |3: + | lp TMP0, CCSTATE->func + | lwz CARG2, CCSTATE->gpr[1] +@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx) + | lwz TMP2, -4(r14) + | lwz TMP0, 4(r14) + | stw CARG1, CCSTATE:TMP1->gpr[0] +- | stfd FARG1, CCSTATE:TMP1->fpr[0] ++ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] + | stw CARG2, CCSTATE:TMP1->gpr[1] + | mtlr TMP0 + | stw CARG3, CCSTATE:TMP1->gpr[2] +@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE ++ | lwzux CARG3, RD, BASE + | lwz TMP2, -4(PC) +- | checknum cr0, TMP0 +- | lwz CARG3, 4(RD) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RD) + | decode_RD4 TMP2, TMP2 +- | checknum cr1, TMP1 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | checknum cr1, CARG3 ++ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) + | bne cr0, >7 + | bne cr1, >8 +- | cmpw CARG2, CARG3 ++ | cmpw CARG2, CARG4 + if (op == BC_ISLT) { + | bge >2 + } else if (op == BC_ISGE) { +@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ble >2 + } + |1: +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |2: + | ins_next + | + |7: // RA is not an integer. + | bgt cr0, ->vmeta_comp + | // RA is a number. +- | lfd f0, 0(RA) ++ | .FPU lfd f0, 0(RA) + | bgt cr1, ->vmeta_comp + | blt cr1, >4 + | // RA is a number, RD is an integer. +- | tonum_i f1, CARG3 ++ |.if FPU ++ | tonum_i f1, CARG4 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b >5 + | + |8: // RA is an integer, RD is not an integer. + | bgt cr1, ->vmeta_comp + | // RA is an integer, RD is a number. ++ |.if FPU + | tonum_i f0, CARG2 ++ |.else ++ | bl ->vm_sfi2d_1 ++ |.endif + |4: +- | lfd f1, 0(RD) ++ | .FPU lfd f1, 0(RD) + |5: ++ |.if FPU + | fcmpu cr0, f0, f1 ++ |.else ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.endif + if (op == BC_ISLT) { + | bge <2 + } else if (op == BC_ISGE) { +@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE +- | checknum cr0, TMP0 +- | lwz TMP2, -4(PC) +- | checknum cr1, TMP1 +- | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) ++ | lwzux CARG3, RD, BASE ++ | checknum cr0, CARG1 ++ | lwz SAVE0, -4(PC) ++ | checknum cr1, CARG3 ++ | decode_RD4 SAVE0, SAVE0 ++ | lwz CARG4, 4(RD) + | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + if (vk) { + | ble cr7, ->BC_ISEQN_Z + } else { + | ble cr7, ->BC_ISNEN_Z + } + |.else +- | lwzux TMP0, RA, BASE +- | lwz TMP2, 0(PC) ++ | lwzux CARG1, RA, BASE ++ | lwz SAVE0, 0(PC) + | lfd f0, 0(RA) + | addi PC, PC, 4 +- | lwzux TMP1, RD, BASE +- | checknum cr0, TMP0 +- | decode_RD4 TMP2, TMP2 ++ | lwzux CARG3, RD, BASE ++ | checknum cr0, CARG1 ++ | decode_RD4 SAVE0, SAVE0 + | lfd f1, 0(RD) +- | checknum cr1, TMP1 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | checknum cr1, CARG3 ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + | bge cr0, >5 + | bge cr1, >5 + | fcmpu cr0, f0, f1 + if (vk) { + | bne >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + } else { + | beq >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + } + |1: + | ins_next +@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |5: // Either or both types are not numbers. + |.if not DUALNUM + | lwz CARG2, 4(RA) +- | lwz CARG3, 4(RD) ++ | lwz CARG4, 4(RD) + |.endif + |.if FFI +- | cmpwi cr7, TMP0, LJ_TCDATA +- | cmpwi cr5, TMP1, LJ_TCDATA ++ | cmpwi cr7, CARG1, LJ_TCDATA ++ | cmpwi cr5, CARG3, LJ_TCDATA + |.endif +- | not TMP3, TMP0 +- | cmplw TMP0, TMP1 +- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? ++ | not TMP2, CARG1 ++ | cmplw CARG1, CARG3 ++ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? + |.if FFI + | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq + |.endif +- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? ++ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? + |.if FFI + | beq cr7, ->vmeta_equal_cd + |.endif +- | cmplw cr5, CARG2, CARG3 ++ | cmplw cr5, CARG2, CARG4 + | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. + | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. + | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. +- | mr SAVE0, PC ++ | mr SAVE1, PC + | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. + | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. + if (vk) { + | bne cr0, >6 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |6: + } else { + | beq cr0, >6 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |6: + } + |.if DUALNUM +@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! ++ | mr CARG3, CARG4 + | lwz TAB:TMP2, TAB:CARG2->metatable + | li CARG4, 1-vk // ne = 0 or 1. + | cmplwi TAB:TMP2, 0 +@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lbz TMP2, TAB:TMP2->nomm + | andix. TMP2, TMP2, 1<vmeta_equal // Handle __eq metamethod. + break; + +@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, KBASE +- | checknum cr0, TMP0 +- | lwz TMP2, -4(PC) +- | checknum cr1, TMP1 +- | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | lwzux CARG3, RD, KBASE ++ | checknum cr0, CARG1 ++ | lwz SAVE0, -4(PC) ++ | checknum cr1, CARG3 ++ | decode_RD4 SAVE0, SAVE0 ++ | lwz CARG4, 4(RD) ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + if (vk) { + |->BC_ISEQN_Z: + } else { +@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | bne cr0, >7 + | bne cr1, >8 +- | cmpw CARG2, CARG3 ++ | cmpw CARG2, CARG4 + |4: + |.else + if (vk) { +@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } else { + |->BC_ISNEN_Z: // Dummy label. + } +- | lwzx TMP0, BASE, RA ++ | lwzx CARG1, BASE, RA + | addi PC, PC, 4 + | lfdx f0, BASE, RA +- | lwz TMP2, -4(PC) ++ | lwz SAVE0, -4(PC) + | lfdx f1, KBASE, RD +- | decode_RD4 TMP2, TMP2 +- | checknum TMP0 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | decode_RD4 SAVE0, SAVE0 ++ | checknum CARG1 ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + | bge >3 + | fcmpu cr0, f0, f1 + |.endif + if (vk) { + | bne >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |1: + |.if not FFI + |3: +@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.if not FFI + |3: + |.endif +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |2: + } + | ins_next + |.if FFI + |3: +- | cmpwi TMP0, LJ_TCDATA ++ | cmpwi CARG1, LJ_TCDATA + | beq ->vmeta_equal_cd + | b <1 + |.endif +@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |7: // RA is not an integer. + | bge cr0, <3 + | // RA is a number. +- | lfd f0, 0(RA) ++ | .FPU lfd f0, 0(RA) + | blt cr1, >1 + | // RA is a number, RD is an integer. +- | tonum_i f1, CARG3 ++ |.if FPU ++ | tonum_i f1, CARG4 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b >2 + | + |8: // RA is an integer, RD is a number. ++ |.if FPU + | tonum_i f0, CARG2 ++ |.else ++ | bl ->vm_sfi2d_1 ++ |.endif + |1: +- | lfd f1, 0(RD) ++ | .FPU lfd f1, 0(RD) + |2: ++ |.if FPU + | fcmpu cr0, f0, f1 ++ |.else ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.endif + | b <4 + |.endif + break; +@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add PC, PC, TMP2 + } else { + | li TMP1, LJ_TFALSE ++ |.if FPU + | lfdx f0, BASE, RD ++ |.else ++ | lwzux CARG1, RD, BASE ++ | lwz CARG2, 4(RD) ++ |.endif + | cmplw TMP0, TMP1 + if (op == BC_ISTC) { + | bge >1 +@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | addis PC, PC, -(BCBIAS_J*4 >> 16) + | decode_RD4 TMP2, INS ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux CARG1, RA, BASE ++ | stw CARG2, 4(RA) ++ |.endif + | add PC, PC, TMP2 + |1: + } +@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_MOV: + | // RA = dst*8, RD = src*8 + | ins_next1 ++ |.if FPU + | lfdx f0, BASE, RD + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, RD, BASE ++ | lwz TMP1, 4(RD) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_NOT: +@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | lwzx CARG3, KBASE, RC + | .endif ++ | .if FPU + | lfdx f14, BASE, RB + | lfdx f15, KBASE, RC ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, KBASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif + | .if DUALNUM +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vn + | .else +- | checknum TMP1; bge ->vmeta_arith_vn ++ | checknum CARG1; bge ->vmeta_arith_vn + | .endif + || break; + ||case 1: +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | lwzx CARG3, KBASE, RC + | .endif ++ | .if FPU + | lfdx f15, BASE, RB + | lfdx f14, KBASE, RC ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, KBASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif + | .if DUALNUM +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_nv + | .else +- | checknum TMP1; bge ->vmeta_arith_nv ++ | checknum CARG1; bge ->vmeta_arith_nv + | .endif + || break; + ||default: +- | lwzx TMP1, BASE, RB +- | lwzx TMP2, BASE, RC ++ | lwzx CARG1, BASE, RB ++ | lwzx CARG3, BASE, RC ++ | .if FPU + | lfdx f14, BASE, RB + | lfdx f15, BASE, RC +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, BASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vv + || break; +@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | fsub a, b, a // b - floor(b/c)*c + |.endmacro + | ++ |.macro sfpmod ++ |->BC_MODVN_Z: ++ | stw CARG1, SFSAVE_1 ++ | stw CARG2, SFSAVE_2 ++ | mr SAVE0, CARG3 ++ | mr SAVE1, CARG4 ++ | blex __divdf3 ++ | blex floor ++ | mr CARG3, SAVE0 ++ | mr CARG4, SAVE1 ++ | blex __muldf3 ++ | mr CARG3, CRET1 ++ | mr CARG4, CRET2 ++ | lwz CARG1, SFSAVE_1 ++ | lwz CARG2, SFSAVE_2 ++ | blex __subdf3 ++ |.endmacro ++ | + |.macro ins_arithfp, fpins + | ins_arithpre + |.if "fpins" == "fpmod_" + | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. +- |.else ++ |.elif FPU + | fpins f0, f14, f15 + | ins_next1 + | stfdx f0, BASE, RA + | ins_next2 ++ |.else ++ | blex __divdf3 // Only soft-float div uses this macro. ++ | ins_next1 ++ | stwux CRET1, RA, BASE ++ | stw CRET2, 4(RA) ++ | ins_next2 + |.endif + |.endmacro + | +- |.macro ins_arithdn, intins, fpins ++ |.macro ins_arithdn, intins, fpins, fpcall + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | lwzux CARG1, RB, BASE ++ | lwzux CARG3, RC, KBASE ++ | lwz CARG2, 4(RB) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RC) ++ | checknum cr1, CARG3 + || break; + ||case 1: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG2, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG1, 4(RC) ++ | lwzux CARG3, RB, BASE ++ | lwzux CARG1, RC, KBASE ++ | lwz CARG4, 4(RB) ++ | checknum cr0, CARG3 ++ | lwz CARG2, 4(RC) ++ | checknum cr1, CARG1 + || break; + ||default: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, BASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | lwzux CARG1, RB, BASE ++ | lwzux CARG3, RC, BASE ++ | lwz CARG2, 4(RB) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RC) ++ | checknum cr1, CARG3 + || break; + ||} +- | checknum cr1, TMP2 + | bne >5 + | bne cr1, >5 +- | intins CARG1, CARG1, CARG2 ++ |.if "intins" == "intmod" ++ | mr CARG1, CARG2 ++ | mr CARG2, CARG4 ++ |.endif ++ | intins CARG1, CARG2, CARG4 + | bso >4 + |1: + | ins_next1 +@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checkov TMP0, <1 // Ignore unrelated overflow. + | ins_arithfallback b + |5: // FP variant. ++ |.if FPU + ||if (vk == 1) { + | lfd f15, 0(RB) +- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | lfd f14, 0(RC) + ||} else { + | lfd f14, 0(RB) +- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | lfd f15, 0(RC) + ||} ++ |.endif ++ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | ins_arithfallback bge + |.if "fpins" == "fpmod_" + | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + |.else ++ |.if FPU + | fpins f0, f14, f15 +- | ins_next1 + | stfdx f0, BASE, RA ++ |.else ++ |.if "fpcall" == "sfpmod" ++ | sfpmod ++ |.else ++ | blex fpcall ++ |.endif ++ | stwux CRET1, RA, BASE ++ | stw CRET2, 4(RA) ++ |.endif ++ | ins_next1 + | b <2 + |.endif + |.endmacro + | +- |.macro ins_arith, intins, fpins ++ |.macro ins_arith, intins, fpins, fpcall + |.if DUALNUM +- | ins_arithdn intins, fpins ++ | ins_arithdn intins, fpins, fpcall + |.else + | ins_arithfp fpins + |.endif +@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addo. TMP0, TMP0, TMP3 + | add y, a, b + |.endmacro +- | ins_arith addo32., fadd ++ | ins_arith addo32., fadd, __adddf3 + |.else +- | ins_arith addo., fadd ++ | ins_arith addo., fadd, __adddf3 + |.endif + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: +@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | subo. TMP0, TMP0, TMP3 + | sub y, a, b + |.endmacro +- | ins_arith subo32., fsub ++ | ins_arith subo32., fsub, __subdf3 + |.else +- | ins_arith subo., fsub ++ | ins_arith subo., fsub, __subdf3 + |.endif + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: +- | ins_arith mullwo., fmul ++ | ins_arith mullwo., fmul, __muldf3 + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp fdiv + break; + case BC_MODVN: +- | ins_arith intmod, fpmod ++ | ins_arith intmod, fpmod, sfpmod + break; + case BC_MODNV: case BC_MODVV: +- | ins_arith intmod, fpmod_ ++ | ins_arith intmod, fpmod_, sfpmod + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB ++ | lwzx CARG3, BASE, RC ++ |.if FPU + | lfdx FARG1, BASE, RB +- | lwzx TMP2, BASE, RC + | lfdx FARG2, BASE, RC +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ |.else ++ | add TMP1, BASE, RB ++ | add TMP2, BASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ |.endif ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vv + | blex pow + | ins_next1 ++ |.if FPU + | stfdx FARG1, BASE, RA ++ |.else ++ | stwux CARG1, RA, BASE ++ | stw CARG2, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lp BASE, L->base + | bne ->vmeta_binop + | ins_next1 ++ |.if FPU + | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, SAVE0, BASE ++ | lwz TMP1, 4(SAVE0) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | ins_next1 ++ |.if FPU + | lfdx f0, KBASE, RD + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, RD, KBASE ++ | lwz TMP1, 4(RD) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_KPRI: +@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwzx UPVAL:RB, LFUNC:RB, RD + | ins_next1 + | lwz TMP1, UPVAL:RB->v ++ |.if FPU + | lfd f0, 0(TMP1) + | stfdx f0, BASE, RA ++ |.else ++ | lwz TMP2, 0(TMP1) ++ | lwz TMP3, 4(TMP1) ++ | stwux TMP2, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_USETV: +@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz LFUNC:RB, FRAME_FUNC(BASE) + | srwi RA, RA, 1 + | addi RA, RA, offsetof(GCfuncL, uvptr) ++ |.if FPU + | lfdux f0, RD, BASE ++ |.else ++ | lwzux CARG1, RD, BASE ++ | lwz CARG3, 4(RD) ++ |.endif + | lwzx UPVAL:RB, LFUNC:RB, RA + | lbz TMP3, UPVAL:RB->marked + | lwz CARG2, UPVAL:RB->v + | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbz TMP0, UPVAL:RB->closed + | lwz TMP2, 0(RD) ++ |.if FPU + | stfd f0, 0(CARG2) ++ |.else ++ | stw CARG1, 0(CARG2) ++ | stw CARG3, 4(CARG2) ++ |.endif + | cmplwi cr1, TMP0, 0 + | lwz TMP1, 4(RD) + | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq +@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz LFUNC:RB, FRAME_FUNC(BASE) + | srwi RA, RA, 1 + | addi RA, RA, offsetof(GCfuncL, uvptr) ++ |.if FPU + | lfdx f0, KBASE, RD ++ |.else ++ | lwzux TMP2, RD, KBASE ++ | lwz TMP3, 4(RD) ++ |.endif + | lwzx UPVAL:RB, LFUNC:RB, RA + | ins_next1 + | lwz TMP1, UPVAL:RB->v ++ |.if FPU + | stfd f0, 0(TMP1) ++ |.else ++ | stw TMP2, 0(TMP1) ++ | stw TMP3, 4(TMP1) ++ |.endif + | ins_next2 + break; + case BC_USETP: +@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | ble ->vmeta_tgetv // Integer key and in array part? + | lwzx TMP0, TMP1, TMP2 ++ |.if FPU + | lfdx f14, TMP1, TMP2 ++ |.else ++ | lwzux SAVE0, TMP1, TMP2 ++ | lwz SAVE1, 4(TMP1) ++ |.endif + | checknil TMP0; beq >2 + |1: + | ins_next1 ++ |.if FPU + | stfdx f14, BASE, RA ++ |.else ++ | stwux SAVE0, RA, BASE ++ | stw SAVE1, 4(RA) ++ |.endif + | ins_next2 + | + |2: // Check for __index if table value is nil. +@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz TMP1, TAB:RB->asize + | lwz TMP2, TAB:RB->array + | cmplw TMP0, TMP1; bge ->vmeta_tgetb ++ |.if FPU + | lwzx TMP1, TMP2, RC + | lfdx f0, TMP2, RC ++ |.else ++ | lwzux TMP1, TMP2, RC ++ | lwz TMP3, 4(TMP2) ++ |.endif + | checknil TMP1; beq >5 + |1: + | ins_next1 ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP1, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | ins_next2 + | + |5: // Check for __index if table value is nil. +@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmplw TMP0, CARG2 + | slwi TMP2, CARG2, 3 + | ble ->vmeta_tgetr // In array part? ++ |.if FPU + | lfdx f14, TMP1, TMP2 ++ |.else ++ | lwzux SAVE0, TMP2, TMP1 ++ | lwz SAVE1, 4(TMP2) ++ |.endif + |->BC_TGETR_Z: + | ins_next1 ++ |.if FPU + | stfdx f14, BASE, RA ++ |.else ++ | stwux SAVE0, RA, BASE ++ | stw SAVE1, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ble ->vmeta_tsetv // Integer key and in array part? + | lwzx TMP2, TMP1, TMP0 + | lbz TMP3, TAB:RB->marked ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add SAVE1, BASE, RA ++ | lwz SAVE0, 0(SAVE1) ++ | lwz SAVE1, 4(SAVE1) ++ |.endif + | checknil TMP2; beq >3 + |1: + | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfdx f14, TMP1, TMP0 ++ |.else ++ | stwux SAVE0, TMP1, TMP0 ++ | stw SAVE1, 4(TMP1) ++ |.endif + | bne >7 + |2: + | ins_next +@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz NODE:TMP2, TAB:RB->node + | stb ZERO, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add CARG2, BASE, RA ++ | lwz SAVE0, 0(CARG2) ++ | lwz SAVE1, 4(CARG2) ++ |.endif + | slwi TMP0, TMP1, 5 + | slwi TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 +@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checknil CARG2; beq >4 // Key found, but nil value? + |2: + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfd f14, NODE:TMP2->val ++ |.else ++ | stw SAVE0, NODE:TMP2->val.u32.hi ++ | stw SAVE1, NODE:TMP2->val.u32.lo ++ |.endif + | bne >7 + |3: + | ins_next +@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | lp BASE, L->base ++ |.if FPU + | stfd f14, 0(CRET1) ++ |.else ++ | stw SAVE0, 0(CRET1) ++ | stw SAVE1, 4(CRET1) ++ |.endif + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz TMP2, TAB:RB->array + | lbz TMP3, TAB:RB->marked + | cmplw TMP0, TMP1 ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add CARG2, BASE, RA ++ | lwz SAVE0, 0(CARG2) ++ | lwz SAVE1, 4(CARG2) ++ |.endif + | bge ->vmeta_tsetb + | lwzx TMP1, TMP2, RC + | checknil TMP1; beq >5 + |1: + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfdx f14, TMP2, RC ++ |.else ++ | stwux SAVE0, RC, TMP2 ++ | stw SAVE1, 4(RC) ++ |.endif + | bne >7 + |2: + | ins_next +@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |2: + | cmplw TMP0, CARG3 + | slwi TMP2, CARG3, 3 ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | lwzux SAVE0, RA, BASE ++ | lwz SAVE1, 4(RA) ++ |.endif + | ble ->vmeta_tsetr // In array part? + | ins_next1 ++ |.if FPU + | stfdx f14, TMP1, TMP2 ++ |.else ++ | stwux SAVE0, TMP1, TMP2 ++ | stw SAVE1, 4(TMP1) ++ |.endif + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add TMP1, TMP1, TMP0 + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz SAVE0, 0(RA) ++ | lwz SAVE1, 4(RA) ++ |.endif + | addi RA, RA, 8 + | cmpw cr1, RA, TMP2 ++ |.if FPU + | stfd f0, 0(TMP1) ++ |.else ++ | stw SAVE0, 0(TMP1) ++ | stw SAVE1, 4(TMP1) ++ |.endif + | addi TMP1, TMP1, 8 + | blt cr1, <3 + | bne >7 +@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | beq cr1, >3 + |2: + | addi TMP3, TMP2, 8 ++ |.if FPU + | lfdx f0, RA, TMP2 ++ |.else ++ | add CARG3, RA, TMP2 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmplw cr1, TMP3, NARGS8:RC ++ |.if FPU + | stfdx f0, BASE, TMP2 ++ |.else ++ | stwux CARG1, TMP2, BASE ++ | stw CARG2, 4(TMP2) ++ |.endif + | mr TMP2, TMP3 + | bne cr1, <2 + |3: +@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add BASE, BASE, RA + | lwz TMP1, -24(BASE) + | lwz LFUNC:RB, -20(BASE) ++ |.if FPU + | lfd f1, -8(BASE) + | lfd f0, -16(BASE) ++ |.else ++ | lwz CARG1, -8(BASE) ++ | lwz CARG2, -4(BASE) ++ | lwz CARG3, -16(BASE) ++ | lwz CARG4, -12(BASE) ++ |.endif + | stw TMP1, 0(BASE) // Copy callable. + | stw LFUNC:RB, 4(BASE) + | checkfunc TMP1 +- | stfd f1, 16(BASE) // Copy control var. + | li NARGS8:RC, 16 // Iterators get 2 arguments. ++ |.if FPU ++ | stfd f1, 16(BASE) // Copy control var. + | stfdu f0, 8(BASE) // Copy state. ++ |.else ++ | stw CARG1, 16(BASE) // Copy control var. ++ | stw CARG2, 20(BASE) ++ | stwu CARG3, 8(BASE) // Copy state. ++ | stw CARG4, 4(BASE) ++ |.endif + | bne ->vmeta_call + | ins_call + break; +@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi TMP3, RC, 3 + | bge >5 // Index points after array part? + | lwzx TMP2, TMP1, TMP3 ++ |.if FPU + | lfdx f0, TMP1, TMP3 ++ |.else ++ | lwzux CARG1, TMP3, TMP1 ++ | lwz CARG2, 4(TMP3) ++ |.endif + | checknil TMP2 + | lwz INS, -4(PC) + | beq >4 +@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | addi RC, RC, 1 + | addis TMP3, PC, -(BCBIAS_J*4 >> 16) ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw CARG1, 8(RA) ++ | stw CARG2, 12(RA) ++ |.endif + | decode_RD4 TMP1, INS + | stw RC, -4(RA) // Update control var. + | add PC, TMP1, TMP3 +@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi RB, RC, 3 + | sub TMP3, TMP3, RB + | lwzx RB, TMP2, TMP3 ++ |.if FPU + | lfdx f0, TMP2, TMP3 ++ |.else ++ | add CARG3, TMP2, TMP3 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | add NODE:TMP3, TMP2, TMP3 + | checknil RB + | lwz INS, -4(PC) + | beq >7 ++ |.if FPU + | lfd f1, NODE:TMP3->key ++ |.else ++ | lwz CARG3, NODE:TMP3->key.u32.hi ++ | lwz CARG4, NODE:TMP3->key.u32.lo ++ |.endif + | addis TMP2, PC, -(BCBIAS_J*4 >> 16) ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw CARG1, 8(RA) ++ | stw CARG2, 12(RA) ++ |.endif + | add RC, RC, TMP0 + | decode_RD4 TMP1, INS ++ |.if FPU + | stfd f1, 0(RA) ++ |.else ++ | stw CARG3, 0(RA) ++ | stw CARG4, 4(RA) ++ |.endif + | addi RC, RC, 1 + | add PC, TMP1, TMP2 + | stw RC, -4(RA) // Update control var. +@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | subi TMP2, TMP2, 16 + | ble >2 // No vararg slots? + |1: // Copy vararg slots to destination slots. ++ |.if FPU + | lfd f0, 0(RC) ++ |.else ++ | lwz CARG1, 0(RC) ++ | lwz CARG2, 4(RC) ++ |.endif + | addi RC, RC, 8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw CARG1, 0(RA) ++ | stw CARG2, 4(RA) ++ |.endif + | cmplw RA, TMP2 + | cmplw cr1, RC, TMP3 + | bge >3 // All destination slots filled? +@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addi MULTRES, TMP1, 8 + | bgt >7 + |6: ++ |.if FPU + | lfd f0, 0(RC) ++ |.else ++ | lwz CARG1, 0(RC) ++ | lwz CARG2, 4(RC) ++ |.endif + | addi RC, RC, 8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw CARG1, 0(RA) ++ | stw CARG2, 4(RA) ++ |.endif + | cmplw RC, TMP3 + | addi RA, RA, 8 + | blt <6 // More vararg slots? +@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP1, 0 + |2: + | addi TMP3, TMP1, 8 ++ |.if FPU + | lfdx f0, RA, TMP1 ++ |.else ++ | add CARG3, RA, TMP1 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmpw TMP3, RC ++ |.if FPU + | stfdx f0, TMP2, TMP1 ++ |.else ++ | add CARG3, TMP2, TMP1 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | beq >3 + | addi TMP1, TMP3, 8 ++ |.if FPU + | lfdx f1, RA, TMP3 ++ |.else ++ | add CARG3, RA, TMP3 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmpw TMP1, RC ++ |.if FPU + | stfdx f1, TMP2, TMP3 ++ |.else ++ | add CARG3, TMP2, TMP3 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | bne <2 + |3: + |5: +@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | subi TMP2, BASE, 8 + | decode_RB8 RB, INS + if (op == BC_RET1) { ++ |.if FPU + | lfd f0, 0(RA) + | stfd f0, 0(TMP2) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ | stw CARG1, 0(TMP2) ++ | stw CARG2, 4(TMP2) ++ |.endif + } + |5: + | cmplw RB, RD +@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |4: + | stw CARG1, FORL_IDX*8+4(RA) + } else { +- | lwz TMP3, FORL_STEP*8(RA) ++ | lwz SAVE0, FORL_STEP*8(RA) + | lwz CARG3, FORL_STEP*8+4(RA) + | lwz TMP2, FORL_STOP*8(RA) + | lwz CARG2, FORL_STOP*8+4(RA) +- | cmplw cr7, TMP3, TISNUM ++ | cmplw cr7, SAVE0, TISNUM + | cmplw cr1, TMP2, TISNUM + | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq + | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq +@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + if (vk) { + |.if DUALNUM + |9: // FP loop. ++ |.if FPU + | lfd f1, FORL_IDX*8(RA) + |.else ++ | lwz CARG1, FORL_IDX*8(RA) ++ | lwz CARG2, FORL_IDX*8+4(RA) ++ |.endif ++ |.else + | lfdux f1, RA, BASE + |.endif ++ |.if FPU + | lfd f3, FORL_STEP*8(RA) + | lfd f2, FORL_STOP*8(RA) +- | lwz TMP3, FORL_STEP*8(RA) + | fadd f1, f1, f3 + | stfd f1, FORL_IDX*8(RA) ++ |.else ++ | lwz CARG3, FORL_STEP*8(RA) ++ | lwz CARG4, FORL_STEP*8+4(RA) ++ | mr SAVE1, RD ++ | blex __adddf3 ++ | mr RD, SAVE1 ++ | stw CRET1, FORL_IDX*8(RA) ++ | stw CRET2, FORL_IDX*8+4(RA) ++ | lwz CARG3, FORL_STOP*8(RA) ++ | lwz CARG4, FORL_STOP*8+4(RA) ++ |.endif ++ | lwz SAVE0, FORL_STEP*8(RA) + } else { + |.if DUALNUM + |9: // FP loop. + |.else + | lwzux TMP1, RA, BASE +- | lwz TMP3, FORL_STEP*8(RA) ++ | lwz SAVE0, FORL_STEP*8(RA) + | lwz TMP2, FORL_STOP*8(RA) + | cmplw cr0, TMP1, TISNUM +- | cmplw cr7, TMP3, TISNUM ++ | cmplw cr7, SAVE0, TISNUM + | cmplw cr1, TMP2, TISNUM + |.endif ++ |.if FPU + | lfd f1, FORL_IDX*8(RA) ++ |.else ++ | lwz CARG1, FORL_IDX*8(RA) ++ | lwz CARG2, FORL_IDX*8+4(RA) ++ |.endif + | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt ++ |.if FPU + | lfd f2, FORL_STOP*8(RA) ++ |.else ++ | lwz CARG3, FORL_STOP*8(RA) ++ | lwz CARG4, FORL_STOP*8+4(RA) ++ |.endif + | bge ->vmeta_for + } +- | cmpwi cr6, TMP3, 0 ++ | cmpwi cr6, SAVE0, 0 + if (op != BC_JFORL) { + | srwi RD, RD, 1 + } ++ |.if FPU + | stfd f1, FORL_EXT*8(RA) ++ |.else ++ | stw CARG1, FORL_EXT*8(RA) ++ | stw CARG2, FORL_EXT*8+4(RA) ++ |.endif + if (op != BC_JFORL) { + | add RD, PC, RD + } ++ |.if FPU + | fcmpu cr0, f1, f2 ++ |.else ++ | mr SAVE1, RD ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ | mr RD, SAVE1 ++ |.endif + if (op == BC_JFORI) { + | addis PC, RD, -(BCBIAS_J*4 >> 16) + } +-- +2.20.1 + diff --git a/SOURCES/0011-Use-https-for-freelists.org-links.patch b/SOURCES/0011-Use-https-for-freelists.org-links.patch new file mode 100644 index 0000000..c0c2a19 --- /dev/null +++ b/SOURCES/0011-Use-https-for-freelists.org-links.patch @@ -0,0 +1,25 @@ +From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Fri, 18 Aug 2017 12:52:14 +0200 +Subject: [PATCH 11/72] Use https for freelists.org links. + +--- + doc/ext_ffi_semantics.html | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html +index 899640c..ae3c037 100644 +--- a/doc/ext_ffi_semantics.html ++++ b/doc/ext_ffi_semantics.html +@@ -844,7 +844,7 @@ place of a type, you'd need to use ffi.typeof("int") instead. +

+ The main use for parameterized types are libraries implementing abstract + data types +-(» example), ++(example), + similar to what can be achieved with C++ template metaprogramming. + Another use case are derived types of anonymous structs, which avoids + pollution of the global struct namespace. +-- +2.20.1 + diff --git a/SOURCES/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch b/SOURCES/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch new file mode 100644 index 0000000..80ca5b0 --- /dev/null +++ b/SOURCES/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch @@ -0,0 +1,25 @@ +From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 28 Aug 2017 10:43:37 +0200 +Subject: [PATCH 12/72] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64(). + +Contributed by Peter Cawley. +--- + src/lj_asm_x86.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h +index 3e189b1..55c02d2 100644 +--- a/src/lj_asm_x86.h ++++ b/src/lj_asm_x86.h +@@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; ++ lj_mcode_commitbot(as->J, as->mcbot); + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +-- +2.20.1 + diff --git a/SOURCES/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch b/SOURCES/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch new file mode 100644 index 0000000..faaa94a --- /dev/null +++ b/SOURCES/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch @@ -0,0 +1,751 @@ +From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 3 Sep 2017 23:20:53 +0200 +Subject: [PATCH 13/72] PPC: Add soft-float support to JIT compiler backend. + +Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +Sponsored by Cisco Systems, Inc. +--- + src/lj_arch.h | 1 - + src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++------- + 2 files changed, 278 insertions(+), 44 deletions(-) + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index 0145a7c..5962f3a 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -273,7 +273,6 @@ + #endif + + #if LJ_ABI_SOFTFP +-#define LJ_ARCH_NOJIT 1 /* NYI */ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + #else + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE +diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h +index 6daa861..1955429 100644 +--- a/src/lj_asm_ppc.h ++++ b/src/lj_asm_ppc.h +@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, + emit_tab(as, pi, rt, left, right); + } + ++#if !LJ_SOFTFP + /* Fuse to multiply-add/sub instruction. */ + static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) + { +@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) + } + return 0; + } ++#endif + + /* -- Calls --------------------------------------------------------------- */ + +@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + { + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 8; +- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; ++ Reg gpr = REGARG_FIRSTGPR; ++#if !LJ_SOFTFP ++ Reg fpr = REGARG_FIRSTFPR; ++#endif + if ((void *)ci->func) + emit_call(as, (void *)ci->func); + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + if (ref) { + IRIns *ir = IR(ref); ++#if !LJ_SOFTFP + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ +@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + emit_spstore(as, ir, r, ofs); + ofs += irt_isnum(ir->t) ? 8 : 4; + } +- } else { ++ } else ++#endif ++ { + if (gpr <= REGARG_LASTGPR) { + lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + ra_leftov(as, gpr, ref); +@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + } + checkmclim(as); + } ++#if !LJ_SOFTFP + if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ + emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); ++#endif + } + + /* Setup result reg/sp for call. Evict scratch regs. */ +@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + { + RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++#if !LJ_SOFTFP + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; ++#endif + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) +@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lua_assert(!irt_ispri(ir->t)); +- if (irt_isfp(ir->t)) { ++ if (!LJ_SOFTFP && irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + /* Use spill slot or temp slots. */ + int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; +@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) + + /* -- Type conversions ---------------------------------------------------- */ + ++#if !LJ_SOFTFP + static void asm_tointg(ASMState *as, IRIns *ir, Reg left) + { + RegSet allow = RSET_FPR; +@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) + emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); + emit_fab(as, PPCI_FADD, tmp, left, right); + } ++#endif + + static void asm_conv(ASMState *as, IRIns *ir) + { + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++#if !LJ_SOFTFP + int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++#endif + IRRef lref = ir->op1; +- lua_assert(irt_type(ir->t) != st); + lua_assert(!(irt_isint64(ir->t) || + (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ ++#if LJ_SOFTFP ++ /* FP conversions are handled by SPLIT. */ ++ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); ++ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ ++#else ++ lua_assert(irt_type(ir->t) != st); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir) + emit_fb(as, PPCI_FCTIWZ, tmp, left); + } + } +- } else { ++ } else ++#endif ++ { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); +@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; +- int32_t ofs; ++ int32_t ofs = SPOFS_TMP; ++#if LJ_SOFTFP ++ ra_evictset(as, RSET_SCRATCH); ++ if (ra_used(ir)) { ++ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && ++ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { ++ int i; ++ for (i = 0; i < 2; i++) { ++ Reg r = (ir+i)->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ ra_modified(as, r); ++ emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); ++ } ++ } ++ ofs = sps_scale(ir->s & ~1); ++ } else { ++ Reg rhi = ra_dest(as, ir+1, RSET_GPR); ++ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); ++ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); ++ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); ++ } ++ } ++#else + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); ++ if (ir->s) ofs = sps_scale(ir->s); ++#endif + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ +- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; + emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); + } + +@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) + Reg src = ra_alloc1(as, ref, allow); + emit_setgl(as, src, tmptv.gcr); + } +- type = ra_allock(as, irt_toitype(ir->t), allow); ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) ++ type = ra_alloc1(as, ref+1, allow); ++ else ++ type = ra_allock(as, irt_toitype(ir->t), allow); + emit_setgl(as, type, tmptv.it); + } + } +@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + Reg tisnum = RID_NONE, tmpnum = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + + rset_clear(allow, tab); ++#if LJ_SOFTFP ++ if (!isk) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ if (irkey[1].o == IR_HIOP) { ++ if (ra_hasreg((irkey+1)->r)) { ++ tmpnum = (irkey+1)->r; ++ ra_noweak(as, tmpnum); ++ } else { ++ tmpnum = ra_allocref(as, refkey+1, allow); ++ } ++ rset_clear(allow, tmpnum); ++ } ++ } ++#else + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); +@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } ++#endif + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + +@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + asm_guardcc(as, CC_EQ); + else + emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); +- if (irt_isnum(kt)) { ++ if (!LJ_SOFTFP && irt_isnum(kt)) { + emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); + emit_condbranch(as, PPCI_BC, CC_GE, l_next); + emit_ab(as, PPCI_CMPLW, tmp1, tisnum); +@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_ab(as, PPCI_CMPW, tmp2, key); + emit_condbranch(as, PPCI_BC, CC_NE, l_next); + } +- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); ++ if (LJ_SOFTFP && ra_hasreg(tmpnum)) ++ emit_ab(as, PPCI_CMPW, tmp1, tmpnum); ++ else ++ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); + if (!irt_ispri(kt)) + emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); + } +@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + (((char *)as->mcp-(char *)l_loop) & 0xffffu); + + /* Load main position relative to tab->node into dest. */ +- khash = irref_isk(refkey) ? ir_khash(irkey) : 1; ++ khash = isk ? ir_khash(irkey) : 1; + if (khash == 0) { + emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; +- if (irref_isk(refkey)) ++ if (isk) + tmphash = ra_allock(as, khash, allow); + emit_tab(as, PPCI_ADD, dest, dest, tmp1); + emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); + emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); + emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); +- if (irref_isk(refkey)) { ++ if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { + emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); +@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); + emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); + emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); +- if (irt_isnum(kt)) { ++ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { ++#if LJ_SOFTFP ++ emit_asb(as, PPCI_XOR, tmp2, key, tmp1); ++ emit_rotlwi(as, dest, tmp1, HASH_ROT1); ++ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); ++#else + int32_t ofs = ra_spill(as, irkey); + emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); + emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); + emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); + emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); ++#endif + } else { + emit_asb(as, PPCI_XOR, tmp2, key, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); +@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) + case IRT_U8: return PPCI_LBZ; + case IRT_I16: return PPCI_LHA; + case IRT_U16: return PPCI_LHZ; +- case IRT_NUM: return PPCI_LFD; +- case IRT_FLOAT: return PPCI_LFS; ++ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; + default: return PPCI_LWZ; + } + } +@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return PPCI_STB; + case IRT_I16: case IRT_U16: return PPCI_STH; +- case IRT_NUM: return PPCI_STFD; +- case IRT_FLOAT: return PPCI_STFS; ++ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; + default: return PPCI_STW; + } + } +@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) + + static void asm_xload(ASMState *as, IRIns *ir) + { +- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ Reg dest = ra_dest(as, ir, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + if (irt_isi8(ir->t)) + emit_as(as, PPCI_EXTSB, dest, dest); +@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + Reg src = ra_alloc1(as, irb->op1, RSET_GPR); + asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); + } else { +- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ Reg src = ra_alloc1(as, ir->op2, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } +@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = AHUREF_LSX; ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { ++ t.irt = IRT_NUM; ++ if (ra_used(ir+1)) { ++ type = ra_dest(as, ir+1, allow); ++ rset_clear(allow, type); ++ } ++ ofs = 0; ++ } + if (ra_used(ir)) { +- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); +- if (!irt_isnum(t)) ofs = 0; +- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); ++ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t)); ++ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); +@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + asm_guardcc(as, CC_GE); + emit_ab(as, PPCI_CMPLW, type, tisnum); + if (ra_hasreg(dest)) { +- if (ofs == AHUREF_LSX) { ++ if (!LJ_SOFTFP && ofs == AHUREF_LSX) { + tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, + (idx&255)), (idx>>8))); + emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); + } else { +- emit_fai(as, PPCI_LFD, dest, idx, ofs); ++ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, ++ ofs+4*LJ_SOFTFP); + } + } + } else { +@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + int32_t ofs = AHUREF_LSX; + if (ir->r == RID_SINK) + return; +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + } else { + if (!irt_ispri(ir->t)) { +@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + rset_clear(allow, src); + ofs = 0; + } +- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) ++ type = ra_alloc1(as, (ir+1)->op2, allow); ++ else ++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (ofs == AHUREF_LSX) { + emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); + emit_slwi(as, RID_TMP, (idx>>8), 3); +@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType1 t = ir->t; + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; ++ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ if (hiop) ++ t.irt = IRT_NUM; + lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); ++ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lua_assert(LJ_DUALNUM || + !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); ++#if LJ_SOFTFP ++ lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ ++ if (hiop && ra_used(ir+1)) { ++ type = ra_dest(as, ir+1, allow); ++ rset_clear(allow, type); ++ } ++#else + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ +- } else if (ra_used(ir)) { ++ } else ++#endif ++ if (ra_used(ir)) { + lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); +- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +- if ((ir->op2 & IRSLOAD_CONVERT)) { ++ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { + emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); + dest = ra_scratch(as, RSET_FPR); +@@ -994,10 +1097,13 @@ dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); + asm_guardcc(as, CC_GE); +- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); ++#if !LJ_SOFTFP + type = RID_TMP; ++#endif ++ emit_ab(as, PPCI_CMPLW, type, tisnum); + } +- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); ++ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, ++ base, ofs-(LJ_SOFTFP?0:4)); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); +@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + + /* -- Arithmetic and logic operations ------------------------------------- */ + ++#if !LJ_SOFTFP + static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) + { + Reg dest = ra_dest(as, ir, RSET_FPR); +@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir) + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); + } ++#endif + + static void asm_add(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) + asm_fparith(as, ir, PPCI_FADD); +- } else { ++ } else ++#endif ++ { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + PPCIns pi; +@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) + + static void asm_sub(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) + asm_fparith(as, ir, PPCI_FSUB); +- } else { ++ } else ++#endif ++ { + PPCIns pi = PPCI_SUBF; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left, right; +@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) + + static void asm_mul(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, PPCI_FMUL); +- } else { ++ } else ++#endif ++ { + PPCIns pi = PPCI_MULLW; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); +@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir) + + static void asm_neg(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, PPCI_FNEG); +- } else { ++ } else ++#endif ++ { + Reg dest, left; + PPCIns pi = PPCI_NEG; + if (as->flagmcp == as->mcp) { +@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) + PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) + #define asm_bror(as, ir) lua_assert(0) + ++#if LJ_SOFTFP ++static void asm_sfpmin_max(ASMState *as, IRIns *ir) ++{ ++ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; ++ IRRef args[4]; ++ MCLabel l_right, l_end; ++ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); ++ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); ++ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); ++ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; ++ righthi = (lefthi >> 8); lefthi &= 255; ++ rightlo = (leftlo >> 8); leftlo &= 255; ++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; ++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; ++ l_end = emit_label(as); ++ if (desthi != righthi) emit_mr(as, desthi, righthi); ++ if (destlo != rightlo) emit_mr(as, destlo, rightlo); ++ l_right = emit_label(as); ++ if (l_end != l_right) emit_jmp(as, l_end); ++ if (desthi != lefthi) emit_mr(as, desthi, lefthi); ++ if (destlo != leftlo) emit_mr(as, destlo, leftlo); ++ if (l_right == as->mcp+1) { ++ cond ^= 4; l_right = l_end; ++as->mcp; ++ } ++ emit_condbranch(as, PPCI_BC, cond, l_right); ++ ra_evictset(as, RSET_SCRATCH); ++ emit_cmpi(as, RID_RET, 1); ++ asm_gencall(as, &ci, args); ++} ++#endif ++ + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + { +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg tmp = dest; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); +@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) + static void asm_comp(ASMState *as, IRIns *ir) + { + PPCCC cc = asm_compmap[ir->o]; +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guardcc(as, (cc >> 4)); +@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir) + + #define asm_equal(as, ir) asm_comp(as, ir) + ++#if LJ_SOFTFP ++/* SFP comparisons. */ ++static void asm_sfpcomp(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; ++ RegSet drop = RSET_SCRATCH; ++ Reg r; ++ IRRef args[4]; ++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; ++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; ++ ++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { ++ if (!rset_test(as->freeset, r) && ++ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) ++ rset_clear(drop, r); ++ } ++ ra_evictset(as, drop); ++ asm_setupresult(as, ir, ci); ++ switch ((IROp)ir->o) { ++ case IR_ULT: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 0); ++ case IR_ULE: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 1); ++ break; ++ case IR_GE: case IR_GT: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 2); ++ default: ++ asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 0); ++ break; ++ } ++ asm_gencall(as, ci, args); ++} ++#endif ++ + #if LJ_HASFFI + /* 64 bit integer comparisons. */ + static void asm_comp64(ASMState *as, IRIns *ir) +@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir) + /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +-#if LJ_HASFFI ++#if LJ_HASFFI || LJ_SOFTFP + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ + as->curins--; /* Always skip the CONV. */ ++#if LJ_HASFFI && !LJ_SOFTFP + if (usehi || uselo) + asm_conv64(as, ir); + return; ++#endif + } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ + as->curins--; /* Always skip the loword comparison. */ ++#if LJ_SOFTFP ++ if (!irt_isint(ir->t)) { ++ asm_sfpcomp(as, ir-1); ++ return; ++ } ++#endif ++#if LJ_HASFFI + asm_comp64(as, ir); ++#endif ++ return; ++#if LJ_SOFTFP ++ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { ++ as->curins--; /* Always skip the loword min/max. */ ++ if (uselo || usehi) ++ asm_sfpmin_max(as, ir-1); + return; ++#endif + } else if ((ir-1)->o == IR_XSTORE) { + as->curins--; /* Handle both stores here. */ + if ((ir-1)->r != RID_SINK) { +@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir) + } + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { ++#if LJ_HASFFI + case IR_ADD: as->curins--; asm_add64(as, ir); break; + case IR_SUB: as->curins--; asm_sub64(as, ir); break; + case IR_NEG: as->curins--; asm_neg64(as, ir); break; ++#endif ++#if LJ_SOFTFP ++ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: ++ case IR_STRTO: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ ++ break; ++#endif + case IR_CALLN: ++ case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; ++#if LJ_SOFTFP ++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: ++#endif + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; +@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ Reg tmp; ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ ++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); ++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); ++ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); ++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); ++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); ++#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); ++#endif + } else { + Reg type; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); +@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & (SNAP_CONT|SNAP_FRAME))) { + if (s == 0) continue; /* Do not overwrite link to previous frame. */ + type = ra_allock(as, (int32_t)(*flinks--), allow); ++#if LJ_SOFTFP ++ } else if ((sn & SNAP_SOFTFPNUM)) { ++ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); ++#endif + } else { + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + } +@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) + int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) +- if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; + } else { + if (ngpr > 0) ngpr--; else nslots++; + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; +- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); ++ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : ++ REGSP_HINT(RID_RET); + } + + static void asm_setup_target(ASMState *as) +-- +2.20.1 + diff --git a/SOURCES/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch b/SOURCES/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch new file mode 100644 index 0000000..7e9dd8a --- /dev/null +++ b/SOURCES/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch @@ -0,0 +1,26 @@ +From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 10 Sep 2017 14:05:30 +0200 +Subject: [PATCH 14/72] x64/LJ_GC64: Fix type-check-only variant of SLOAD. + +Thanks to Peter Cawley. +--- + src/lj_asm_x86.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h +index 55c02d2..af54dc7 100644 +--- a/src/lj_asm_x86.h ++++ b/src/lj_asm_x86.h +@@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir) + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); +- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); ++ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); + #else + } else { + emit_i8(as, irt_toitype(t)); +-- +2.20.1 + diff --git a/SOURCES/0015-MIPS64-Hide-internal-function.patch b/SOURCES/0015-MIPS64-Hide-internal-function.patch new file mode 100644 index 0000000..0e2f4fd --- /dev/null +++ b/SOURCES/0015-MIPS64-Hide-internal-function.patch @@ -0,0 +1,26 @@ +From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 18 Sep 2017 09:50:22 +0200 +Subject: [PATCH 15/72] MIPS64: Hide internal function. + +--- + src/lj_ccall.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index 799be48..25e938c 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -848,7 +848,8 @@ noth: /* Not a homogeneous float/double aggregate. */ + return 0; /* Struct is in GPRs. */ + } + +-void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) ++static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, ++ int ft) + { + if (LJ_ABI_SOFTFP ? ft : + ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { +-- +2.20.1 + diff --git a/SOURCES/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch b/SOURCES/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch new file mode 100644 index 0000000..66f5bf0 --- /dev/null +++ b/SOURCES/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch @@ -0,0 +1,34 @@ +commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e +Merge: bf12f1d 0c0e7b1 +Author: Mike Pall +Date: Wed Sep 20 19:42:34 2017 +0200 + + Merge branch 'master' into v2.1 + +From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 20 Sep 2017 19:39:50 +0200 +Subject: [PATCH 16/72] DynASM/x86: Fix potential REL_A overflow. + +Thanks to Joshua Haberman. +--- + dynasm/dasm_x86.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h +index 90dc5d1..f9260b0 100644 +--- a/dynasm/dasm_x86.h ++++ b/dynasm/dasm_x86.h +@@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer) + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; +- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ ++ case DASM_REL_A: rel_a: ++ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } +-- +2.20.1 + diff --git a/SOURCES/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch b/SOURCES/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch new file mode 100644 index 0000000..aff6f20 --- /dev/null +++ b/SOURCES/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch @@ -0,0 +1,29 @@ +From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 2 Oct 2017 09:22:46 +0200 +Subject: [PATCH 17/72] LJ_GC64: Fix ir_khash for non-string GCobj. + +Contributed by Peter Cawley. +--- + src/lj_asm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index bed2268..d961927 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir) + } else { + lua_assert(irt_isgcv(ir->t)); + lo = u32ptr(ir_kgc(ir)); ++#if LJ_GC64 ++ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); ++#else + hi = lo + HASH_BIAS; ++#endif + } + return hashrot(lo, hi); + } +-- +2.20.1 + diff --git a/SOURCES/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch b/SOURCES/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch new file mode 100644 index 0000000..d604876 --- /dev/null +++ b/SOURCES/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch @@ -0,0 +1,57 @@ +From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 2 Oct 2017 23:10:56 +0200 +Subject: [PATCH 18/72] LJ_GC64: Make ASMREF_L references 64 bit. + +Reported by Yichun Zhang. +--- + src/lj_asm.c | 1 + + src/lj_ir.h | 4 +++- + src/lj_opt_sink.c | 1 + + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index d961927..753fe6b 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as) + ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { + #if LJ_GC64 ++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ + #else + /* Make life easier for backends by putting address of constant in i. */ +diff --git a/src/lj_ir.h b/src/lj_ir.h +index 34c2785..8057a75 100644 +--- a/src/lj_ir.h ++++ b/src/lj_ir.h +@@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; + #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) + + #if LJ_GC64 ++/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ + #define IRT_IS64 \ + ((1u<cur.nk); ir < irbase; ir++) { + irt_clearmark(ir->t); + ir->prev = REGSP_INIT; ++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; + } +-- +2.20.1 + diff --git a/SOURCES/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch b/SOURCES/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch new file mode 100644 index 0000000..c999ce8 --- /dev/null +++ b/SOURCES/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch @@ -0,0 +1,26 @@ +From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 8 Nov 2017 12:53:05 +0100 +Subject: [PATCH 19/72] Fix FOLD rule for strength reduction of widening. + +Reported by Matthew Burk. +--- + src/lj_opt_fold.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c +index 3d0e35a..5dc7ae3 100644 +--- a/src/lj_opt_fold.c ++++ b/src/lj_opt_fold.c +@@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext) + if (ref == J->scev.idx) { + IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; + lua_assert(irt_isint(J->scev.t)); +- if (lo && IR(lo)->i + ofs >= 0) { ++ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { + ok_reduce: + #if LJ_TARGET_X64 + /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ +-- +2.20.1 + diff --git a/SOURCES/0020-ARM64-Fix-assembly-of-HREFK.patch b/SOURCES/0020-ARM64-Fix-assembly-of-HREFK.patch new file mode 100644 index 0000000..3200304 --- /dev/null +++ b/SOURCES/0020-ARM64-Fix-assembly-of-HREFK.patch @@ -0,0 +1,45 @@ +From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 8 Nov 2017 12:53:48 +0100 +Subject: [PATCH 20/72] ARM64: Fix assembly of HREFK. + +Reported by Jason Teplitz. +--- + src/lj_asm_arm64.h | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h +index 8fd92e7..cbb186d 100644 +--- a/src/lj_asm_arm64.h ++++ b/src/lj_asm_arm64.h +@@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !emit_checkofs(A64I_LDRx, ofs); +- RegSet allow = RSET_GPR; + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; +- Reg node = ra_alloc1(as, ir->op1, allow); +- Reg key = ra_scratch(as, rset_clear(allow, node)); +- Reg idx = node; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg key, idx = node; ++ RegSet allow = rset_exclude(RSET_GPR, node); + uint64_t k; + lua_assert(ofs % sizeof(Node) == 0); +- rset_clear(allow, key); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); +@@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + } else { + k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); + } +- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); ++ key = ra_scratch(as, allow); ++ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); + emit_lso(as, A64I_LDRx, key, idx, kofs); + if (bigofs) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); +-- +2.20.1 + diff --git a/SOURCES/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch b/SOURCES/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch new file mode 100644 index 0000000..80fad2f --- /dev/null +++ b/SOURCES/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch @@ -0,0 +1,81 @@ +From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 8 Nov 2017 12:54:03 +0100 +Subject: [PATCH 21/72] MIPS64: Fix register allocation in assembly of HREF. + +Contributed by James Cowgill. +--- + src/lj_asm_mips.h | 42 +++++++++++++++++++++++++----------------- + 1 file changed, 25 insertions(+), 17 deletions(-) + +diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h +index 1406a87..3a4679b 100644 +--- a/src/lj_asm_mips.h ++++ b/src/lj_asm_mips.h +@@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; ++#if LJ_64 ++ Reg cmp64 = RID_NONE; ++#endif + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); +@@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + #endif + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); ++#if LJ_64 ++ if (LJ_SOFTFP || !irt_isnum(kt)) { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (LJ_SOFTFP && irt_isnum(kt)) { ++ cmp64 = key; ++ } else if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; ++ } else { ++ lua_assert(irt_ispri(kt) && !irt_isnil(kt)); ++ k = ~((int64_t)~irt_toitype(ir->t) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ } ++#endif + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); +@@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); +- } else if (LJ_SOFTFP && irt_isnum(kt)) { +- emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); +- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); +- } else if (irt_isaddr(kt)) { +- Reg refk = tmp2; +- if (isk) { +- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; +- refk = ra_allock(as, k, allow); +- rset_clear(allow, refk); +- } +- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); +- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } else { +- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); +- rset_clear(allow, pri); +- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); +- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); +- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); ++ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); ++ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); + if (!isk && irt_isaddr(kt)) { +-- +2.20.1 + diff --git a/SOURCES/0022-ARM64-Fix-xpcall-error-case.patch b/SOURCES/0022-ARM64-Fix-xpcall-error-case.patch new file mode 100644 index 0000000..ec05a7c --- /dev/null +++ b/SOURCES/0022-ARM64-Fix-xpcall-error-case.patch @@ -0,0 +1,31 @@ +From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Thu, 16 Nov 2017 12:53:34 +0100 +Subject: [PATCH 22/72] ARM64: Fix xpcall() error case. + +Thanks to Stefan Pejic. +--- + src/vm_arm64.dasc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc +index 3eaf376..241c58a 100644 +--- a/src/vm_arm64.dasc ++++ b/src/vm_arm64.dasc +@@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx) + | subs NARGS8:RC, NARGS8:RC, #16 + | blo ->fff_fallback + | mov RB, BASE +- | add BASE, BASE, #24 + | asr ITYPE, CARG2, #47 + | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 + | cmn ITYPE, #-LJ_TFUNC + | add PC, TMP0, #24+FRAME_PCALL + | bne ->fff_fallback // Traceback must be a function. ++ | add BASE, BASE, #24 + | stp CARG2, CARG1, [RB] // Swap function and traceback. + | cbz NARGS8:RC, ->vm_call_dispatch + | b <1 +-- +2.20.1 + diff --git a/SOURCES/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch b/SOURCES/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch new file mode 100644 index 0000000..740a5a7 --- /dev/null +++ b/SOURCES/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch @@ -0,0 +1,26 @@ +From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Thu, 16 Nov 2017 12:58:12 +0100 +Subject: [PATCH 23/72] Fix saved bytecode encapsulated in ELF objects. + +Thanks to Dimitry Andric. +--- + src/jit/bcsave.lua | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua +index aa677df..c94064e 100644 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -275,7 +275,7 @@ typedef struct { + o.sect[2].size = fofs(ofs) + o.sect[3].type = f32(3) -- .strtab + o.sect[3].ofs = fofs(sofs + ofs) +- o.sect[3].size = fofs(#symname+1) ++ o.sect[3].size = fofs(#symname+2) + ffi.copy(o.space+ofs+1, symname) + ofs = ofs + #symname + 2 + o.sect[4].type = f32(1) -- .rodata +-- +2.20.1 + diff --git a/SOURCES/0024-ARM64-Fix-xpcall-error-case-really.patch b/SOURCES/0024-ARM64-Fix-xpcall-error-case-really.patch new file mode 100644 index 0000000..ab518e1 --- /dev/null +++ b/SOURCES/0024-ARM64-Fix-xpcall-error-case-really.patch @@ -0,0 +1,37 @@ +From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sat, 18 Nov 2017 12:23:57 +0100 +Subject: [PATCH 24/72] ARM64: Fix xpcall() error case (really). +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Thanks to François Perrad and Stefan Pejic. +--- + src/vm_arm64.dasc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc +index 241c58a..c55794a 100644 +--- a/src/vm_arm64.dasc ++++ b/src/vm_arm64.dasc +@@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc xpcall + | ldp CARG1, CARG2, [BASE] + | ldrb TMP0w, GL->hookmask +- | subs NARGS8:RC, NARGS8:RC, #16 ++ | subs NARGS8:TMP1, NARGS8:RC, #16 + | blo ->fff_fallback + | mov RB, BASE + | asr ITYPE, CARG2, #47 +@@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx) + | cmn ITYPE, #-LJ_TFUNC + | add PC, TMP0, #24+FRAME_PCALL + | bne ->fff_fallback // Traceback must be a function. ++ | mov NARGS8:RC, NARGS8:TMP1 + | add BASE, BASE, #24 + | stp CARG2, CARG1, [RB] // Swap function and traceback. + | cbz NARGS8:RC, ->vm_call_dispatch +-- +2.20.1 + diff --git a/SOURCES/0025-MIPS64-Fix-xpcall-error-case.patch b/SOURCES/0025-MIPS64-Fix-xpcall-error-case.patch new file mode 100644 index 0000000..5b17e81 --- /dev/null +++ b/SOURCES/0025-MIPS64-Fix-xpcall-error-case.patch @@ -0,0 +1,39 @@ +From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sat, 18 Nov 2017 12:25:35 +0100 +Subject: [PATCH 25/72] MIPS64: Fix xpcall() error case. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Thanks to François Perrad and Stefan Pejic. +--- + src/vm_mips64.dasc | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc +index 75b38de..a78cd25 100644 +--- a/src/vm_mips64.dasc ++++ b/src/vm_mips64.dasc +@@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx) + |. nop + | + |.ffunc xpcall +- | daddiu NARGS8:RC, NARGS8:RC, -16 ++ | daddiu NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) +- | bltz NARGS8:RC, ->fff_fallback ++ | bltz NARGS8:TMP0, ->fff_fallback + |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | gettp AT, CARG2 + | daddiu AT, AT, -LJ_TFUNC + | bnez AT, ->fff_fallback // Traceback must be a function. + |. move TMP2, BASE ++ | move NARGS8:RC, NARGS8:TMP0 + | daddiu BASE, BASE, 24 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT +-- +2.20.1 + diff --git a/SOURCES/0026-Fix-IR_BUFPUT-assembly.patch b/SOURCES/0026-Fix-IR_BUFPUT-assembly.patch new file mode 100644 index 0000000..c942467 --- /dev/null +++ b/SOURCES/0026-Fix-IR_BUFPUT-assembly.patch @@ -0,0 +1,44 @@ +From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 14 Jan 2018 13:57:00 +0100 +Subject: [PATCH 26/72] Fix IR_BUFPUT assembly. + +Thanks to Peter Cawley. +--- + src/lj_asm.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index 753fe6b..5f83779 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; + IRRef args[3]; + IRIns *irs; +- int kchar = -1; ++ int kchar = -129; + args[0] = ir->op1; /* SBuf * */ + args[1] = ir->op2; /* GCstr * */ + irs = IR(ir->op2); +@@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) + if (irs->o == IR_KGC) { + GCstr *s = ir_kstr(irs); + if (s->len == 1) { /* Optimize put of single-char string constant. */ +- kchar = strdata(s)[0]; ++ kchar = (int8_t)strdata(s)[0]; /* Signed! */ + args[1] = ASMREF_TMP1; /* int, truncated to char */ + ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; + } +@@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) + asm_gencall(as, ci, args); + if (args[1] == ASMREF_TMP1) { + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); +- if (kchar == -1) ++ if (kchar == -129) + asm_tvptr(as, tmp, irs->op1); + else + ra_allockreg(as, kchar, tmp); +-- +2.20.1 + diff --git a/SOURCES/0027-Fix-string.format-c-0.patch b/SOURCES/0027-Fix-string.format-c-0.patch new file mode 100644 index 0000000..caece09 --- /dev/null +++ b/SOURCES/0027-Fix-string.format-c-0.patch @@ -0,0 +1,15 @@ +commit 4660dbfa8a4f9eea5218b739075d04faadfeeef6 +Merge: 58d0dde 430d9f8 +Author: Mike Pall +Date: Sun Jan 14 14:26:10 2018 +0100 + + Merge branch 'master' into v2.1 + +From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 14 Jan 2018 14:11:59 +0100 +Subject: [PATCH 27/72] Fix string.format("%c", 0). + +--- + src/lib_string.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/SOURCES/0028-Fix-ARMv8-32-bit-subset-detection.patch b/SOURCES/0028-Fix-ARMv8-32-bit-subset-detection.patch new file mode 100644 index 0000000..00687af --- /dev/null +++ b/SOURCES/0028-Fix-ARMv8-32-bit-subset-detection.patch @@ -0,0 +1,26 @@ +From 9eaad8574f5b2271b981cd31966b1e832cd8de12 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Thu, 18 Jan 2018 12:24:36 +0100 +Subject: [PATCH 28/72] Fix ARMv8 (32 bit subset) detection. + +Thanks to Markus Oberhumber. +--- + src/lj_arch.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index 5962f3a..fcebd84 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -201,7 +201,7 @@ + #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + +-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ ++#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ + #define LJ_ARCH_VERSION 80 + #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ + #define LJ_ARCH_VERSION 70 +-- +2.20.1 + diff --git a/SOURCES/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch b/SOURCES/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch new file mode 100644 index 0000000..70ae35a --- /dev/null +++ b/SOURCES/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch @@ -0,0 +1,28 @@ +From c88602f080dcafea6ba222a2f7cc1ea0e41ef3cc Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Thu, 18 Jan 2018 12:29:39 +0100 +Subject: [PATCH 29/72] Fix LuaJIT API docs for LUAJIT_MODE_*. + +Thanks to sunfishgao. +--- + doc/ext_c_api.html | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html +index 041a722..4bb8251 100644 +--- a/doc/ext_c_api.html ++++ b/doc/ext_c_api.html +@@ -89,8 +89,8 @@ other Lua/C API functions). +

+

+ The third argument specifies the mode, which is 'or'ed with a flag. +-The flag can be LUAJIT_MODE_OFF to turn a feature on, +-LUAJIT_MODE_ON to turn a feature off, or ++The flag can be LUAJIT_MODE_OFF to turn a feature off, ++LUAJIT_MODE_ON to turn a feature on, or + LUAJIT_MODE_FLUSH to flush cached code. +

+

+-- +2.20.1 + diff --git a/SOURCES/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch b/SOURCES/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch new file mode 100644 index 0000000..8ee3a17 --- /dev/null +++ b/SOURCES/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch @@ -0,0 +1,26 @@ +From 8071aa4ad65cf09e3b7adda4a7787d8897e5314c Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 29 Jan 2018 12:12:29 +0100 +Subject: [PATCH 30/72] MIPS64: Fix soft-float +-0.0 vs. +-0.0 comparison. + +Thanks to Stefan Pejic. +--- + src/vm_mips64.dasc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc +index a78cd25..0a3f8e5 100644 +--- a/src/vm_mips64.dasc ++++ b/src/vm_mips64.dasc +@@ -2661,7 +2661,7 @@ static void build_subroutines(BuildCtx *ctx) + |. slt CRET1, CARG2, CARG1 + |8: + | jr ra +- |. nop ++ |. li CRET1, 0 + |9: + | jr ra + |. move CRET1, CRET2 +-- +2.20.1 + diff --git a/SOURCES/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch b/SOURCES/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch new file mode 100644 index 0000000..b95ca0c --- /dev/null +++ b/SOURCES/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch @@ -0,0 +1,69 @@ +commit 74c544d68c07bcd416225598cdf15f88e62fd457 +Merge: 8071aa4 b03a56f +Author: Mike Pall +Date: Mon Jan 29 12:53:42 2018 +0100 + + Merge branch 'master' into v2.1 + +From b03a56f28ec360bbcf43091afd0607890a4a33c7 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 29 Jan 2018 12:47:08 +0100 +Subject: [PATCH 31/72] FFI: Don't assert on #1LL (5.2 compatibility mode + only). + +Reported by Denis Golovan. +--- + src/lib_ffi.c | 2 +- + src/lj_carith.c | 9 +++++++++ + src/lj_carith.h | 1 + + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/src/lib_ffi.c b/src/lib_ffi.c +index f2f2ede..83483d9 100644 +--- a/src/lib_ffi.c ++++ b/src/lib_ffi.c +@@ -193,7 +193,7 @@ LJLIB_CF(ffi_meta___eq) LJLIB_REC(cdata_arith MM_eq) + + LJLIB_CF(ffi_meta___len) LJLIB_REC(cdata_arith MM_len) + { +- return ffi_arith(L); ++ return lj_carith_len(L); + } + + LJLIB_CF(ffi_meta___lt) LJLIB_REC(cdata_arith MM_lt) +diff --git a/src/lj_carith.c b/src/lj_carith.c +index 6224dee..c34596c 100644 +--- a/src/lj_carith.c ++++ b/src/lj_carith.c +@@ -272,6 +272,15 @@ int lj_carith_op(lua_State *L, MMS mm) + return lj_carith_meta(L, cts, &ca, mm); + } + ++/* No built-in functionality for length of cdata. */ ++int lj_carith_len(lua_State *L) ++{ ++ CTState *cts = ctype_cts(L); ++ CDArith ca; ++ carith_checkarg(L, cts, &ca); ++ return lj_carith_meta(L, cts, &ca, MM_len); ++} ++ + /* -- 64 bit bit operations helpers --------------------------------------- */ + + #if LJ_64 +diff --git a/src/lj_carith.h b/src/lj_carith.h +index 3c15591..82fc824 100644 +--- a/src/lj_carith.h ++++ b/src/lj_carith.h +@@ -11,6 +11,7 @@ + #if LJ_HASFFI + + LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); ++LJ_FUNC int lj_carith_len(lua_State *L); + + #if LJ_32 + LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); + +-- +2.20.1 + diff --git a/SOURCES/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch b/SOURCES/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch new file mode 100644 index 0000000..192f271 --- /dev/null +++ b/SOURCES/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch @@ -0,0 +1,291 @@ +commit 0bf46e1edf94c43795b5e491efe682ab70974ce7 +Merge: 74c544d d4ee803 +Author: Mike Pall +Date: Mon Jan 29 13:19:30 2018 +0100 + + Merge branch 'master' into v2.1 + +From d4ee80342770d1281e2ce877f8ae8ab1d99e6528 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 29 Jan 2018 13:06:13 +0100 +Subject: [PATCH 32/72] Fix GCC 7 -Wimplicit-fallthrough warnings. + +--- + dynasm/dasm_arm.h | 2 ++ + dynasm/dasm_mips.h | 1 + + dynasm/dasm_ppc.h | 1 + + dynasm/dasm_x86.h | 14 ++++++++++++-- + src/lj_asm.c | 3 ++- + src/lj_cparse.c | 10 ++++++++++ + src/lj_err.c | 1 + + src/lj_opt_sink.c | 2 +- + src/lj_parse.c | 3 ++- + src/luajit.c | 1 + + 10 files changed, 33 insertions(+), 5 deletions(-) + +diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h +index a43f7c6..1d404cc 100644 +--- a/dynasm/dasm_arm.h ++++ b/dynasm/dasm_arm.h +@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...) + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; ++ /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : +@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer) + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; +diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h +index 7eac669..46af034 100644 +--- a/dynasm/dasm_mips.h ++++ b/dynasm/dasm_mips.h +@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); +diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h +index 6110361..81b9a76 100644 +--- a/dynasm/dasm_ppc.h ++++ b/dynasm/dasm_ppc.h +@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); +diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h +index f9260b0..8ae911d 100644 +--- a/dynasm/dasm_x86.h ++++ b/dynasm/dasm_x86.h +@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...) + switch (action) { + case DASM_DISP: + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } +- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; ++ /* fallthrough */ ++ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; +- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; ++ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ +@@ -323,11 +324,14 @@ int dasm_link(Dst_DECL, size_t *szp) + pos += 2; + break; + } ++ /* fallthrough */ + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; ++ /* fallthrough */ + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; ++ /* fallthrough */ + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; +@@ -385,12 +389,15 @@ int dasm_encode(Dst_DECL, void *buffer) + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } ++ /* fallthrough */ + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if (((n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; ++ /* fallthrough */ + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; ++ /* fallthrough */ + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { + int t = *p++; +@@ -397,6 +404,7 @@ + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; ++ /* fallthrough */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { +@@ -407,6 +415,7 @@ int dasm_encode(Dst_DECL, void *buffer) + } + case DASM_IMM_LG: + p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } ++ /* fallthrough */ + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); +@@ -427,6 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer) + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; break; + case DASM_ESC: action = *p++; ++ /* fallthrough */ + default: *cp++ = action; break; + case DASM_SECTION: case DASM_STOP: goto stop; + } +diff --git a/src/lj_asm.c b/src/lj_asm.c +index 02714d4..dd7186f 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -2136,6 +2136,7 @@ static void asm_setup_regsp(ASMState *as) + case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: + if (REGARG_NUMGPR < 3 && as->evenspill < 3) + as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ ++ /* fallthrough */ + #if LJ_TARGET_X86 && LJ_HASFFI + if (0) { + case IR_CNEW: +@@ -2176,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as) + continue; + #endif + } +- /* fallthrough for integer POW */ ++ /* fallthrough */ /* for integer POW */ + case IR_DIV: case IR_MOD: + if (!irt_isnum(ir->t)) { + ir->prev = REGSP_HINT(RID_RET); +diff --git a/src/lj_cparse.c b/src/lj_cparse.c +index 2ba50a7..f111537 100644 +--- a/src/lj_cparse.c ++++ b/src/lj_cparse.c +@@ -590,28 +590,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->id = k2.id > k3.id ? k2.id : k3.id; + continue; + } ++ /* fallthrough */ + case 1: + if (cp_opt(cp, CTOK_OROR)) { + cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 2: + if (cp_opt(cp, CTOK_ANDAND)) { + cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 3: + if (cp_opt(cp, '|')) { + cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 4: + if (cp_opt(cp, '^')) { + cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 5: + if (cp_opt(cp, '&')) { + cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 6: + if (cp_opt(cp, CTOK_EQ)) { + cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32; +@@ -620,6 +626,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 7: + if (cp_opt(cp, '<')) { + cp_expr_sub(cp, &k2, 8); +@@ -654,6 +661,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 8: + if (cp_opt(cp, CTOK_SHL)) { + cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32; +@@ -666,6 +674,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->u32 = k->u32 >> k2.u32; + continue; + } ++ /* fallthrough */ + case 9: + if (cp_opt(cp, '+')) { + cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32; +@@ -675,6 +684,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + } else if (cp_opt(cp, '-')) { + cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 10: + if (cp_opt(cp, '*')) { + cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result; +diff --git a/src/lj_err.c b/src/lj_err.c +index 54f42c3..13a1ded 100644 +--- a/src/lj_err.c ++++ b/src/lj_err.c +@@ -153,6 +153,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) + case FRAME_CONT: /* Continuation frame. */ + if (frame_iscont_fficb(frame)) + goto unwind_c; ++ /* fallthrough */ + case FRAME_VARG: /* Vararg frame. */ + frame = frame_prevd(frame); + break; +diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c +index 6a00d04..4efe395 100644 +--- a/src/lj_opt_sink.c ++++ b/src/lj_opt_sink.c +@@ -100,8 +100,8 @@ static void sink_mark_ins(jit_State *J) + (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && + !sink_checkphi(J, ir, (ir+1)->op2)))) + irt_setmark(ir->t); /* Mark ineligible allocation. */ +- /* fallthrough */ + #endif ++ /* fallthrough */ + case IR_USTORE: + irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ + break; +diff --git a/src/lj_parse.c b/src/lj_parse.c +index 9e5976f..6785495 100644 +--- a/src/lj_parse.c ++++ b/src/lj_parse.c +@@ -2696,7 +2696,8 @@ static int parse_stmt(LexState *ls) + lj_lex_next(ls); + parse_goto(ls); + break; +- } /* else: fallthrough */ ++ } ++ /* fallthrough */ + default: + parse_call_assign(ls); + break; +diff --git a/src/luajit.c b/src/luajit.c +index 9e15b26..0e18dc5 100644 +--- a/src/luajit.c ++++ b/src/luajit.c +@@ -419,6 +419,7 @@ static int collectargs(char **argv, int *flags) + break; + case 'e': + *flags |= FLAGS_EXEC; ++ /* fallthrough */ + case 'j': /* LuaJIT extension */ + case 'l': + *flags |= FLAGS_OPTION; +-- +2.20.1 + diff --git a/SOURCES/0033-Clear-stack-after-print_jit_status-in-CLI.patch b/SOURCES/0033-Clear-stack-after-print_jit_status-in-CLI.patch new file mode 100644 index 0000000..53a4acf --- /dev/null +++ b/SOURCES/0033-Clear-stack-after-print_jit_status-in-CLI.patch @@ -0,0 +1,32 @@ +commit fddef924097f28c46a0a5b45483a6086b33cab81 +Merge: 0bf46e1 03cd5aa +Author: Mike Pall +Date: Mon Jan 29 13:28:53 2018 +0100 + + Merge branch 'master' into v2.1 + +From 03cd5aa749c1bc3bb4b7d4289236b6096cb3dc85 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Mon, 29 Jan 2018 13:25:51 +0100 +Subject: [PATCH 33/72] Clear stack after print_jit_status() in CLI. + +Suggested by Hydroque. +--- + src/luajit.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/luajit.c b/src/luajit.c +index 0e18dc5..9ede59c 100644 +--- a/src/luajit.c ++++ b/src/luajit.c +@@ -151,6 +151,7 @@ static void print_jit_status(lua_State *L) + fputs(s, stdout); + } + putc('\n', stdout); ++ lua_settop(L, 0); /* clear stack */ + } + + static void createargtable(lua_State *L, char **argv, int argc, int argf) +-- +2.20.1 + diff --git a/SOURCES/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch b/SOURCES/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch new file mode 100644 index 0000000..1b90fb3 --- /dev/null +++ b/SOURCES/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch @@ -0,0 +1,52 @@ +From 046129dbdda5261c1b17469a2895a113d14c070a Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Tue, 27 Feb 2018 23:02:23 +0100 +Subject: [PATCH 34/72] Fix rechaining of pseudo-resurrected string keys. + +This is a serious bug. But extremely hard to reproduce, so it went +undetected for 8 years. One needs two resurrections with different +main nodes, which are both in a hash chain which gets relinked on +key insertion where the colliding node is in a non-main position. Phew. + +Thanks to lbeiming. +--- + src/lj_tab.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/src/lj_tab.c b/src/lj_tab.c +index 50f447e..f2f3c0b 100644 +--- a/src/lj_tab.c ++++ b/src/lj_tab.c +@@ -457,6 +457,29 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) + freenode->next = nn->next; + nn->next = n->next; + setmref(n->next, nn); ++ /* ++ ** Rechaining a resurrected string key creates a new dilemma: ++ ** Another string key may have originally been resurrected via ++ ** _any_ of the previous nodes as a chain anchor. Including ++ ** a node that had to be moved, which makes them unreachable. ++ ** It's not feasible to check for all previous nodes, so rechain ++ ** any string key that's currently in a non-main positions. ++ */ ++ while ((nn = nextnode(freenode))) { ++ if (tvisstr(&nn->key) && !tvisnil(&nn->val)) { ++ Node *mn = hashstr(t, strV(&nn->key)); ++ if (mn != freenode) { ++ freenode->next = nn->next; ++ nn->next = mn->next; ++ setmref(mn->next, nn); ++ } else { ++ freenode = nn; ++ } ++ } else { ++ freenode = nn; ++ } ++ } ++ break; + } else { + freenode = nn; + } +-- +2.20.1 + diff --git a/SOURCES/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch b/SOURCES/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch new file mode 100644 index 0000000..832809e --- /dev/null +++ b/SOURCES/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch @@ -0,0 +1,50 @@ +From fe651bf6e2b4d02b624be3c289378c08bab2fa9b Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Tue, 27 Feb 2018 23:22:40 +0100 +Subject: [PATCH 35/72] DynASM/x86: Add BMI1 and BMI2 instructions. + +Thanks to Peter Cawley. +--- + dynasm/dasm_x86.lua | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua +index 4c031e2..c1d267a 100644 +--- a/dynasm/dasm_x86.lua ++++ b/dynasm/dasm_x86.lua +@@ -955,6 +955,7 @@ end + -- "u" Use VEX encoding, vvvv unused. + -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is + -- removed from the list used by future characters). ++-- "w" Use VEX encoding, vvvv from 3rd operand. + -- "L" Force VEX.L + -- + -- All of the following characters force a flush of the opcode: +@@ -1677,6 +1678,24 @@ local map_op = { + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", ++ ++ -- BMI1 ++ andn_3 = "rrmqd:0F38VF2rM", ++ bextr_3 = "rmrqd:0F38wF7rM", ++ blsi_2 = "rmqd:0F38vF33m", ++ blsmsk_2 = "rmqd:0F38vF32m", ++ blsr_2 = "rmqd:0F38vF31m", ++ tzcnt_2 = "rmqdw:F30FBCrM", ++ ++ -- BMI2 ++ bzhi_3 = "rmrqd:0F38wF5rM", ++ mulx_3 = "rrmqd:F20F38VF6rM", ++ pdep_3 = "rrmqd:F20F38VF5rM", ++ pext_3 = "rrmqd:F30F38VF5rM", ++ rorx_3 = "rmSqd:F20F3AuF0rMS", ++ sarx_3 = "rmrqd:F30F38wF7rM", ++ shrx_3 = "rmrqd:F20F38wF7rM", ++ shlx_3 = "rmrqd:660F38wF7rM", + } + + ------------------------------------------------------------------------------ +-- +2.20.1 + diff --git a/SOURCES/0036-Give-expected-results-for-negative-non-base-10-numbe.patch b/SOURCES/0036-Give-expected-results-for-negative-non-base-10-numbe.patch new file mode 100644 index 0000000..3279dfe --- /dev/null +++ b/SOURCES/0036-Give-expected-results-for-negative-non-base-10-numbe.patch @@ -0,0 +1,55 @@ +From f3cf0d6e15240098147437fed7bd436ff55fdf8c Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 22 Apr 2018 13:14:28 +0200 +Subject: [PATCH 36/72] Give expected results for negative non-base-10 numbers + in tonumber(). + +This was undefined in Lua 5.1, but it's defined in 5.2. +--- + src/lib_base.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +diff --git a/src/lib_base.c b/src/lib_base.c +index 3a75787..d61e876 100644 +--- a/src/lib_base.c ++++ b/src/lib_base.c +@@ -287,18 +287,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) + } else { + const char *p = strdata(lj_lib_checkstr(L, 1)); + char *ep; ++ unsigned int neg = 0; + unsigned long ul; + if (base < 2 || base > 36) + lj_err_arg(L, 2, LJ_ERR_BASERNG); +- ul = strtoul(p, &ep, base); +- if (p != ep) { +- while (lj_char_isspace((unsigned char)(*ep))) ep++; +- if (*ep == '\0') { +- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) +- setintV(L->base-1-LJ_FR2, (int32_t)ul); +- else +- setnumV(L->base-1-LJ_FR2, (lua_Number)ul); +- return FFH_RES(1); ++ while (lj_char_isspace((unsigned char)(*p))) p++; ++ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } ++ if (lj_char_isalnum((unsigned char)(*p))) { ++ ul = strtoul(p, &ep, base); ++ if (p != ep) { ++ while (lj_char_isspace((unsigned char)(*ep))) ep++; ++ if (*ep == '\0') { ++ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { ++ if (neg) ul = -ul; ++ setintV(L->base-1-LJ_FR2, (int32_t)ul); ++ } else { ++ lua_Number n = (lua_Number)ul; ++ if (neg) n = -n; ++ setnumV(L->base-1-LJ_FR2, n); ++ } ++ return FFH_RES(1); ++ } + } + } + } +-- +2.20.1 + diff --git a/SOURCES/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch b/SOURCES/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch new file mode 100644 index 0000000..c0406a5 --- /dev/null +++ b/SOURCES/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch @@ -0,0 +1,27 @@ +From 02b521981a1ab919ff2cd4d9bcaee80baf77dce2 Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Sun, 22 Apr 2018 13:27:25 +0200 +Subject: [PATCH 37/72] FFI: Add tonumber() specialization for failed + conversions. + +Contributed by Javier Guerra Giraldez. +--- + src/lj_crecord.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/lj_crecord.c b/src/lj_crecord.c +index 84fc49e..bc88d63 100644 +--- a/src/lj_crecord.c ++++ b/src/lj_crecord.c +@@ -1661,6 +1661,8 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) + d = ctype_get(cts, CTID_DOUBLE); + J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]); + } else { ++ /* Specialize to the ctype that couldn't be converted. */ ++ argv2cdata(J, J->base[0], &rd->argv[0]); + J->base[0] = TREF_NIL; + } + } +-- +2.20.1 + diff --git a/SOURCES/0038-Bump-copyright-date-to-2018.patch b/SOURCES/0038-Bump-copyright-date-to-2018.patch new file mode 100644 index 0000000..1f9e5eb --- /dev/null +++ b/SOURCES/0038-Bump-copyright-date-to-2018.patch @@ -0,0 +1,387 @@ +From cf7a0540a3a9f80fc729211eb21d1e9b72acc89c Mon Sep 17 00:00:00 2001 +From: Mike Pall +Date: Wed, 25 Apr 2018 12:07:08 +0200 +Subject: [PATCH 38/72] Bump copyright date to 2018. + +--- + doc/bluequad-print.css | 2 +- + doc/bluequad.css | 2 +- + doc/changes.html | 5 ++--- + doc/contact.html | 7 +++---- + doc/ext_c_api.html | 5 ++--- + doc/ext_ffi.html | 5 ++--- + doc/ext_ffi_api.html | 5 ++--- + doc/ext_ffi_semantics.html | 5 ++--- + doc/ext_ffi_tutorial.html | 5 ++--- + doc/ext_jit.html | 5 ++--- + doc/extensions.html | 5 ++--- + doc/faq.html | 5 ++--- + doc/install.html | 5 ++--- + doc/luajit.html | 7 +++---- + doc/running.html | 5 ++--- + doc/status.html | 5 ++--- + 16 files changed, 32 insertions(+), 46 deletions(-) + +diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css +index 62e1c16..d5a3ea3 100644 +--- a/doc/bluequad-print.css ++++ b/doc/bluequad-print.css +@@ -1,4 +1,4 @@ +-/* Copyright (C) 2004-2017 Mike Pall. ++/* Copyright (C) 2004-2018 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. +diff --git a/doc/bluequad.css b/doc/bluequad.css +index be2c4bf..cfc889a 100644 +--- a/doc/bluequad.css ++++ b/doc/bluequad.css +@@ -1,4 +1,4 @@ +-/* Copyright (C) 2004-2017 Mike Pall. ++/* Copyright (C) 2004-2018 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. +diff --git a/doc/changes.html b/doc/changes.html +index 4a4d4fb..c1848e8 100644 +--- a/doc/changes.html ++++ b/doc/changes.html +@@ -3,8 +3,7 @@ + + LuaJIT Change History + +- +- ++ + + + +@@ -1010,7 +1009,7 @@ This is the initial non-public release of LuaJIT. + +

+ + + + + + + + + + + + +