You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2742 lines
72 KiB

From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Wed, 26 Jul 2017 09:52:19 +0200
Subject: [PATCH] PPC: Add soft-float support to interpreter.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
---
src/host/buildvm_asm.c | 2 +-
src/lj_arch.h | 29 +-
src/lj_ccall.c | 38 +-
src/lj_ccall.h | 4 +-
src/lj_ccallback.c | 30 +-
src/lj_frame.h | 2 +-
src/lj_ircall.h | 2 +-
src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
8 files changed, 1101 insertions(+), 255 deletions(-)
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
#endif
-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
/* Hard-float ABI. */
fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
#endif
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -254,6 +254,29 @@
#else
#define LJ_ARCH_BITS 32
#define LJ_ARCH_NAME "ppc"
+
+#if !defined(LJ_ARCH_HASFPU)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ARCH_HASFPU 0
+#else
+#define LJ_ARCH_HASFPU 1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ABI_SOFTFP 1
+#else
+#define LJ_ABI_SOFTFP 0
+#endif
+#endif
+#endif
+
+#if LJ_ABI_SOFTFP
+#define LJ_ARCH_NOJIT 1 /* NYI */
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#else
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
#endif
#define LJ_TARGET_PPC 1
@@ -262,7 +285,6 @@
#define LJ_TARGET_MASKSHIFT 0
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
#if LJ_TARGET_CONSOLE
#define LJ_ARCH_PPC32ON64 1
@@ -415,16 +437,13 @@
#error "No support for ILP32 model on ARM64"
#endif
#elif LJ_TARGET_PPC
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-#error "No support for PowerPC CPUs without double-precision FPU"
-#endif
#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
#error "No support for little-endian PPC32"
#endif
#if LJ_ARCH_PPC64
#error "No support for PowerPC 64 bit mode (yet)"
#endif
-#ifdef __NO_FPRS__
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS32
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -387,6 +387,24 @@
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
+#define CCALL_HANDLE_GPR \
+ /* Try to pass argument in GPRs. */ \
+ if (n > 1) { \
+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+ else if (ngpr + n > maxgpr) \
+ ngpr = maxgpr; /* Prevent reordering. */ \
+ } \
+ if (ngpr + n <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } \
+
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#else
#define CCALL_HANDLE_REGARG \
if (isfp) { /* Try to pass argument in FPRs. */ \
if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -395,24 +413,16 @@
d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
goto done; \
} \
- } else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
- if (ctype_isinteger(d->info)) \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- else if (ngpr + n > maxgpr) \
- ngpr = maxgpr; /* Prevent reordering. */ \
- } \
- if (ngpr + n <= maxgpr) { \
- dp = &cc->gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ } else { \
+ CCALL_HANDLE_GPR \
}
+#endif
+#if !LJ_ABI_SOFTFP
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
+#endif
#elif LJ_TARGET_MIPS32
/* -- MIPS o32 calling conventions ---------------------------------------- */
@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L,
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-#if LJ_TARGET_X64 || LJ_TARGET_PPC
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -86,9 +86,9 @@ typedef union FPRArg {
#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
-#define CCALL_NARG_FPR 8
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
#define CCALL_NRET_GPR 4 /* For complex double. */
-#define CCALL_NRET_FPR 1
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
#define CCALL_SPS_EXTRA 4
#define CCALL_SPS_FREE 0
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *ct
#elif LJ_TARGET_PPC
+#define CALLBACK_HANDLE_GPR \
+ if (n > 1) { \
+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+ } \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+
+#if LJ_ABI_SOFTFP
+#define CALLBACK_HANDLE_REGARG \
+ CALLBACK_HANDLE_GPR \
+ UNUSED(isfp);
+#else
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *ct
goto done; \
} \
} else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- } \
- if (ngpr + n <= maxgpr) { \
- sp = &cts->cb.gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ CALLBACK_HANDLE_GPR \
}
+#endif
+#if !LJ_ABI_SOFTFP
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
+#endif
#elif LJ_TARGET_MIPS32
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
#define CFRAME_OFS_L 36
#define CFRAME_OFS_PC 32
#define CFRAME_OFS_MULTRES 28
-#define CFRAME_SIZE 272
+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
#define CFRAME_SHIFT_MULTRES 3
#endif
#elif LJ_TARGET_MIPS32
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -272,7 +272,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[I
#define fp64_f2l __aeabi_f2lz
#define fp64_f2ul __aeabi_f2ulz
#endif
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
#define softfp_add __adddf3
#define softfp_sub __subdf3
#define softfp_mul __muldf3
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -103,6 +103,18 @@
|// Fixed register assignments for the interpreter.
|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
+|.macro .FPU, a, b, c
+|.if FPU
+| a, b, c
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r14 // Base of current Lua stack frame.
|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +128,10 @@
|.define TISNUM, r22
|.define TISNIL, r23
|.define ZERO, r24
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r20 // Callee-save.
@@ -133,6 +147,7 @@
|
|// Saved temporaries.
|.define SAVE0, r21
+|.define SAVE1, r25
|
|// Calling conventions.
|.define CARG1, r3
@@ -141,8 +156,10 @@
|.define CARG4, r6 // Overlaps TMP3.
|.define CARG5, r7 // Overlaps INS.
|
+|.if FPU
|.define FARG1, f1
|.define FARG2, f2
+|.endif
|
|.define CRET1, r3
|.define CRET2, r4
@@ -213,10 +230,16 @@
|.endif
|.else
|
+|.if FPU
|.define SAVE_LR, 276(sp)
|.define CFRAME_SPACE, 272 // Delta for sp.
|// Back chain for sp: 272(sp) <-- sp entering interpreter
|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
+|.else
+|.define SAVE_LR, 132(sp)
+|.define CFRAME_SPACE, 128 // Delta for sp.
+|// Back chain for sp: 128(sp) <-- sp entering interpreter
+|.endif
|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
|.define SAVE_CR, 52(sp) // 32 bit CR save.
|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +249,25 @@
|.define SAVE_PC, 32(sp)
|.define SAVE_MULTRES, 28(sp)
|.define UNUSED1, 24(sp)
+|.if FPU
|.define TMPD_LO, 20(sp)
|.define TMPD_HI, 16(sp)
|.define TONUM_LO, 12(sp)
|.define TONUM_HI, 8(sp)
+|.else
+|.define SFSAVE_4, 20(sp)
+|.define SFSAVE_3, 16(sp)
+|.define SFSAVE_2, 12(sp)
+|.define SFSAVE_1, 8(sp)
+|.endif
|// Next frame lr: 4(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if FPU
|.define TMPD_BLO, 23(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
|
|.endif
|
@@ -245,7 +277,7 @@
|.else
| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|.macro rest_, reg
|.if GPR64
@@ -253,7 +285,7 @@
|.else
| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|
|.macro saveregs
@@ -323,6 +355,7 @@
|// Trap for not-yet-implemented parts.
|.macro NYI; tw 4, sp, sp; .endmacro
|
+|.if FPU
|// int/FP conversions.
|.macro tonum_i, freg, reg
| xoris reg, reg, 0x8000
@@ -346,6 +379,7 @@
|.macro toint, reg, freg
| toint reg, freg, freg
|.endmacro
+|.endif
|
|//-----------------------------------------------------------------------
|
@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *
| beq >2
|1:
| addic. TMP1, TMP1, -8
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| addi RA, RA, 8
+ |.if FPU
| stfd f0, 0(BASE)
+ |.else
+ | stw CARG1, 0(BASE)
+ | stw CARG2, 4(BASE)
+ |.endif
| addi BASE, BASE, 8
| bney <1
|
@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *
| .toc ld TOCREG, SAVE_TOC
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp BASE, L->base
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
| li ZERO, 0
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li TMP1, LJ_TFALSE
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
+ | .FPU lfs TOBIT, TMPD
| lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
| la RA, -8(BASE) // Results start at BASE-8.
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| addi DISPATCH, DISPATCH, GG_G2DISP
| stw TMP1, 0(RA) // Prepend false to error message.
| li RD, 16 // 2 results: false + error message.
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| b ->vm_returnc
|
|//-----------------------------------------------------------------------
@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
| lwz PC, FRAME_PC(BASE)
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| stb CARG3, L->status
- | stw TMP3, TMPD
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub RD, TMP1, BASE
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| addi RD, RD, 8
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
| li ZERO, 0
| st_vmstate
| andix. TMP0, PC, FRAME_TYPE
| mr MULTRES, RD
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| beq ->BC_RET_Z
| b ->vm_return
@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| add PC, PC, BASE
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub PC, PC, TMP2 // PC = frame delta + frame type
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| sub NARGS8:RC, TMP1, BASE
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| st_vmstate
|
@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *
| lwz INS, -4(PC)
| subi CARG2, RB, 16
| decode_RB8 SAVE0, INS
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz TMP2, 0(RA)
+ | lwz TMP3, 4(RA)
+ |.endif
| add TMP1, BASE, SAVE0
| stp BASE, L->base
| cmplw TMP1, CARG2
| sub CARG3, CARG2, TMP1
| decode_RA8 RA, INS
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw TMP2, 0(CARG2)
+ | stw TMP3, 4(CARG2)
+ |.endif
| bney ->BC_CAT_Z
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| b ->cont_nop
|
|//-- Table indexing metamethods -----------------------------------------
@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
| beq >3
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP0, 0(CRET1)
+ | lwz TMP1, 4(CRET1)
+ |.endif
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
|
|3: // Call __index metamethod.
@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *
| // Returns cTValue * or NULL.
| cmplwi CRET1, 0
| beq >1
+ |.if FPU
| lfd f14, 0(CRET1)
+ |.else
+ | lwz SAVE0, 0(CRET1)
+ | lwz SAVE1, 4(CRET1)
+ |.endif
| b ->BC_TGETR_Z
|1:
| stwx TISNIL, BASE, RA
@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *
| bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
+ |.if FPU
| lfdx f0, BASE, RA
+ |.else
+ | lwzux TMP2, RA, BASE
+ | lwz TMP3, 4(RA)
+ |.endif
| beq >3
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
+ |.if FPU
| stfd f0, 0(CRET1)
+ |.else
+ | stw TMP2, 0(CRET1)
+ | stw TMP3, 4(CRET1)
+ |.endif
| ins_next2
|
|3: // Call __newindex metamethod.
@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
+ |.if FPU
| stfd f0, 16(BASE) // Copy value to third argument.
+ |.else
+ | stw TMP2, 16(BASE)
+ | stw TMP3, 20(BASE)
+ |.endif
| b ->vm_call_dispatch_f
|
|->vmeta_tsetr:
@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *
| stw PC, SAVE_PC
| bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
| // Returns TValue *.
+ |.if FPU
| stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
| b ->cont_nop
|
|//-- Comparison metamethods ---------------------------------------------
@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *
|
|->cont_ra: // RA = resultptr
| lwz INS, -4(PC)
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| decode_RA8 TMP1, INS
+ |.if FPU
| stfdx f0, BASE, TMP1
+ |.else
+ | stwux CARG1, TMP1, BASE
+ | stw CARG2, 4(TMP1)
+ |.endif
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *
|.macro .ffunc_n, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
|.endmacro
|
|.macro .ffunc_nn, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
| lfd FARG2, 8(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ | lwz CARG3, 8(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
| checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bge ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *
| bge cr1, ->fff_fallback
| stw CARG3, 0(RA)
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
+ | addi TMP1, BASE, 8
+ | add TMP2, RA, NARGS8:RC
| stw CARG1, 4(RA)
| beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
|1:
- | cmplw TMP1, RC
- | lfdx f0, BASE, TMP1
- | stfdx f0, RA, TMP1
+ | cmplw TMP1, TMP2
+ |.if FPU
+ | lfd f0, 0(TMP1)
+ | stfd f0, 0(TMP1)
+ |.else
+ | lwz CARG1, 0(TMP1)
+ | lwz CARG2, 4(TMP1)
+ | stw CARG1, -8(TMP1)
+ | stw CARG2, -4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| bney <1
| b ->fff_res
@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *
| orc TMP1, TMP2, TMP0
| addi TMP1, TMP1, ~LJ_TISNUM+1
| slwi TMP1, TMP1, 3
+ |.if FPU
| la TMP2, CFUNC:RB->upvalue
| lfdx FARG1, TMP2, TMP1
+ |.else
+ | add TMP1, CFUNC:RB, TMP1
+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
+ |.endif
| b ->fff_resn
|
|//-- Base library: getters and setters ---------------------------------
@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *
| mr CARG1, L
| bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
| // Returns cTValue *.
+ |.if FPU
| lfd FARG1, 0(CRET1)
+ |.else
+ | lwz CARG2, 4(CRET1)
+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
+ |.endif
| b ->fff_resn
|
|//-- Base library: conversions ------------------------------------------
@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *
| // Only handles the number case inline (without a base argument).
| cmplwi NARGS8:RC, 8
| lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| bne ->fff_fallback // Exactly one argument.
| checknum CARG1; bgt ->fff_fallback
| b ->fff_resn
@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *
| cmplwi CRET1, 0
| li CARG3, LJ_TNIL
| beq ->fff_restv // End of traversal: return nil.
- | lfd f0, 8(BASE) // Copy key and value to results.
| la RA, -8(BASE)
+ |.if FPU
+ | lfd f0, 8(BASE) // Copy key and value to results.
| lfd f1, 16(BASE)
| stfd f0, 0(RA)
- | li RD, (2+1)*8
| stfd f1, 8(RA)
+ |.else
+ | lwz CARG1, 8(BASE)
+ | lwz CARG2, 12(BASE)
+ | lwz CARG3, 16(BASE)
+ | lwz CARG4, 20(BASE)
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ | stw CARG3, 8(RA)
+ | stw CARG4, 12(RA)
+ |.endif
+ | li RD, (2+1)*8
| b ->fff_res
|
|.ffunc_1 pairs
@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
| stw TISNIL, 8(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|.ffunc ipairs_aux
@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *
| stfd FARG2, 0(RA)
|.endif
| ble >2 // Not in array part?
+ |.if FPU
| lwzx TMP2, TMP1, TMP3
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux TMP2, TMP1, TMP3
+ | lwz TMP3, 4(TMP1)
+ |.endif
|1:
| checknil TMP2
| li RD, (0+1)*8
| beq ->fff_res // End of iteration, return 0 results.
| li RD, (2+1)*8
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw TMP2, 8(RA)
+ | stw TMP3, 12(RA)
+ |.endif
| b ->fff_res
|2: // Check for empty hash part first. Otherwise call C function.
| lwz TMP0, TAB:CARG1->hmask
@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *
| li RD, (0+1)*8
| beq ->fff_res
| lwz TMP2, 0(CRET1)
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP3, 4(CRET1)
+ |.endif
| b <1
|
|.ffunc_1 ipairs
@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
|.if DUALNUM
@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *
|.endif
| stw ZERO, 12(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|//-- Base library: catch errors ----------------------------------------
@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *
|
|.ffunc xpcall
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
+ |.if FPU
| lfd FARG2, 8(BASE)
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
| lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| mr TMP2, BASE
- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
| la BASE, 16(BASE)
| // Remember active hook before pcall.
| rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
+ |.if FPU
| stfd FARG2, 0(TMP2) // Swap function and traceback.
- | subi NARGS8:RC, NARGS8:RC, 16
| stfd FARG1, 8(TMP2)
+ |.else
+ | stw CARG3, 0(TMP2)
+ | stw CARG4, 4(TMP2)
+ | stw CARG1, 8(TMP2)
+ | stw CARG2, 12(TMP2)
+ |.endif
+ | subi NARGS8:RC, NARGS8:RC, 16
| addi PC, TMP1, 16+FRAME_PCALL
| b ->vm_call_dispatch
|
@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *
| stp BASE, L->top
|2: // Move args to coroutine.
| cmpw TMP1, NARGS8:RC
+ |.if FPU
| lfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, BASE, TMP1
+ | lwz TMP2, 0(CARG3)
+ | lwz TMP3, 4(CARG3)
+ |.endif
| beq >3
+ |.if FPU
| stfdx f0, CARG2, TMP1
+ |.else
+ | add CARG3, CARG2, TMP1
+ | stw TMP2, 0(CARG3)
+ | stw TMP3, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| b <2
|3:
@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *
| stp TMP2, L:SAVE0->top // Clear coroutine stack.
|5: // Move results from coroutine.
| cmplw TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP1
| stfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ | add CARG3, BASE, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| bne <5
|6:
@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *
| andix. TMP0, PC, FRAME_TYPE
| la TMP3, -8(TMP3)
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfd f0, 0(TMP3)
+ |.else
+ | lwz CARG1, 0(TMP3)
+ | lwz CARG2, 4(TMP3)
+ |.endif
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| stw TMP1, -8(BASE) // Prepend false to results.
| la RA, -8(BASE)
+ |.if FPU
| stfd f0, 0(BASE) // Copy error message.
+ |.else
+ | stw CARG1, 0(BASE) // Copy error message.
+ | stw CARG2, 4(BASE)
+ |.endif
| b <7
|.else
| mr CARG1, L
@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *
| lus CARG1, 0x8000 // -(2^31).
| beqy ->fff_resi
|5:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex func
| b ->fff_resn
|.endmacro
@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *
|
|.ffunc math_log
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lfd FARG1, 0(BASE)
+ | lwz CARG1, 0(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
+ |.if FPU
+ | lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex log
| b ->fff_resn
|
@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *
|.if DUALNUM
|.ffunc math_ldexp
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz TMP0, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
+ | lwz TMP1, 8(BASE)
|.if GPR64
| lwz CARG2, 12(BASE)
- |.else
+ |.elif FPU
| lwz CARG1, 12(BASE)
+ |.else
+ | lwz CARG3, 12(BASE)
|.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bne ->fff_fallback
+ | checknum TMP0; bge ->fff_fallback
+ | checknum TMP1; bne ->fff_fallback
|.else
|.ffunc_nn math_ldexp
|.if GPR64
@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *
|.ffunc_n math_frexp
|.if GPR64
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- |.else
+ |.elif FPU
| la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
+ |.else
+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex frexp
@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *
|.if not DUALNUM
| tonum_i FARG2, TMP1
|.endif
+ |.if FPU
| stfd FARG1, 0(RA)
+ |.else
+ | stw CRET1, 0(RA)
+ | stw CRET2, 4(RA)
+ |.endif
| li RD, (2+1)*8
|.if DUALNUM
| stw TISNUM, 8(RA)
@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *
|.ffunc_n math_modf
|.if GPR64
| la CARG2, -8(BASE)
- |.else
+ |.elif FPU
| la CARG1, -8(BASE)
+ |.else
+ | la CARG3, -8(BASE)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex modf
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, 0(BASE)
+ |.else
+ | stw CRET1, 0(BASE)
+ | stw CRET2, 4(BASE)
+ |.endif
| li RD, (2+1)*8
| b ->fff_res
|
@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *
|.if DUALNUM
| .ffunc_1 name
| checknum CARG3
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
| bne >4
|1: // Handle integers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ | lwz CARG2, 4(SAVE0)
| bge cr1, ->fff_resi
| checknum CARG4
| xoris TMP0, CARG1, 0x8000
@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *
|.if GPR64
| rldicl CARG1, CARG1, 0, 32
|.endif
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|3:
| bge ->fff_fallback
| // Convert intermediate result to number and continue below.
+ |.if FPU
| tonum_i FARG1, CARG1
- | lfd FARG2, 0(TMP1)
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | mr CARG2, CARG1
+ | bl ->vm_sfi2d_1
+ | lwz CARG3, 0(SAVE0)
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| b >6
|4:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| bge ->fff_fallback
|5: // Handle numbers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lfd FARG2, 0(TMP1)
+ | lwz CARG3, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ |.if FPU
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| bge cr1, ->fff_resn
- | checknum CARG4; bge >7
+ | checknum CARG3; bge >7
|6:
+ | addi SAVE0, SAVE0, 8
+ |.if FPU
| fsub f0, FARG1, FARG2
- | addi TMP1, TMP1, 8
|.if ismax
| fsel FARG1, f0, FARG1, FARG2
|.else
| fsel FARG1, f0, FARG2, FARG1
|.endif
+ |.else
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | stw CARG3, SFSAVE_3
+ | stw CARG4, SFSAVE_4
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.if ismax
+ | blt >8
+ |.else
+ | bge >8
+ |.endif
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | b <5
+ |8:
+ | lwz CARG1, SFSAVE_3
+ | lwz CARG2, SFSAVE_4
+ |.endif
| b <5
|7: // Convert integer to number and continue above.
- | lwz CARG2, 4(TMP1)
+ | lwz CARG3, 4(SAVE0)
| bne ->fff_fallback
- | tonum_i FARG2, CARG2
+ |.if FPU
+ | tonum_i FARG2, CARG3
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b <6
|.else
| .ffunc_n name
@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
|1:
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
|.if DUALNUM
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, 4(SAVE0)
|.else
- | lfd FARG1, 0(TMP1)
+ | lfd FARG1, 0(SAVE0)
|.endif
| bgey cr1, ->fff_resi
| checknum CARG4
|.if DUALNUM
+ |.if FPU
| bnel ->fff_bitop_fb
|.else
+ | beq >3
+ | stw CARG1, SFSAVE_1
+ | bl ->fff_bitop_fb
+ | mr CARG2, CARG1
+ | lwz CARG1, SFSAVE_1
+ |3:
+ |.endif
+ |.else
| fadd FARG1, FARG1, TOBIT
| bge ->fff_fallback
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
|.endif
| ins CARG1, CARG1, CARG2
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|.endmacro
|
@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *
|.macro .ffunc_bit_sh, name, ins, shmod
|.if DUALNUM
| .ffunc_2 bit_..name
+ |.if FPU
| checknum CARG3; bnel ->fff_tobit_fb
+ |.else
+ | checknum CARG3; beq >1
+ | bl ->fff_tobit_fb
+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
+ |1:
+ |.endif
| // Note: no inline conversion from number for 2nd argument!
| checknum CARG4; bne ->fff_fallback
|.else
@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *
|->fff_resn:
| lwz PC, FRAME_PC(BASE)
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, -8(BASE)
+ |.else
+ | stw CARG1, -8(BASE)
+ | stw CARG2, -4(BASE)
+ |.endif
| b ->fff_res1
|
|// Fallback FP number to bit conversion.
|->fff_tobit_fb:
|.if DUALNUM
+ |.if FPU
| lfd FARG1, 0(BASE)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG1, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG2, CARG1
+ | mr CARG1, CARG3
+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
+ |->vm_tobit:
+ | slwi TMP2, CARG1, 1
+ | addis TMP2, TMP2, 0x0020
+ | cmpwi TMP2, 0
+ | bge >2
+ | li TMP1, 0x3e0
+ | srawi TMP2, TMP2, 21
+ | not TMP1, TMP1
+ | sub. TMP2, TMP1, TMP2
+ | cmpwi cr7, CARG1, 0
+ | blt >1
+ | slwi TMP1, CARG1, 11
+ | srwi TMP0, CARG2, 21
+ | oris TMP1, TMP1, 0x8000
+ | or TMP1, TMP1, TMP0
+ | srw CARG1, TMP1, TMP2
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |1:
+ | addi TMP2, TMP2, 21
+ | srw TMP1, CARG2, TMP2
+ | slwi CARG2, CARG1, 12
+ | subfic TMP2, TMP2, 20
+ | slw TMP0, CARG2, TMP2
+ | or CARG1, TMP1, TMP0
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |2:
+ | li CARG1, 0
+ | blr
+ |.endif
|.endif
|->fff_bitop_fb:
|.if DUALNUM
- | lfd FARG1, 0(TMP1)
+ |.if FPU
+ | lfd FARG1, 0(SAVE0)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG1, CARG4
+ | b ->vm_tobit
+ |.endif
|.endif
|
|//-----------------------------------------------------------------------
@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *
| decode_RA8 RC, INS // Call base.
| beq >2
|1: // Move results down.
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| addic. TMP1, TMP1, -8
| addi RA, RA, 8
+ |.if FPU
| stfdx f0, BASE, RC
+ |.else
+ | add CARG3, BASE, RC
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| addi RC, RC, 8
| bne <1
|2:
@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b, c, d
+ |.if FPU
| stfd f..a, 16+a*8(sp)
| stfd f..b, 16+b*8(sp)
| stfd f..c, 16+c*8(sp)
| stfd f..d, 16+d*8(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.
| li TISNUM, LJ_TISNUM
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stw TMP3, TMPD
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| li TISNIL, LJ_TNIL
- | stw TMP0, TONUM_HI
- | lfs TONUM, TMPD
+ | .FPU stw TMP0, TONUM_HI
+ | .FPU lfs TONUM, TMPD
| // Modified copy of ins_next which handles function header dispatch, too.
| lwz INS, 0(PC)
| addi PC, PC, 4
@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
- |// NYI: Use internal implementations of floor, ceil, trunc.
+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
+ |
+ |.macro sfi2d, AHI, ALO
+ |.if not FPU
+ | mr. AHI, ALO
+ | bclr 12, 2 // Handle zero first.
+ | srawi TMP0, ALO, 31
+ | xor TMP1, ALO, TMP0
+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | cntlzw AHI, TMP1
+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
+ | slwi ALO, TMP1, 21
+ | or AHI, AHI, TMP0 // Sign | Exponent.
+ | srwi TMP1, TMP1, 11
+ | slwi AHI, AHI, 20 // Align left.
+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
+ | blr
+ |.endif
+ |.endmacro
+ |
+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d CARG1, CARG2
+ |
+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d CARG3, CARG4
|
|->vm_modi:
| divwo. TMP0, CARG1, CARG2
@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *
| addi DISPATCH, r12, GG_G2DISP
| stw r11, CTSTATE->cb.slot
| stw r3, CTSTATE->cb.gpr[0]
- | stfd f1, CTSTATE->cb.fpr[0]
+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
| stw r4, CTSTATE->cb.gpr[1]
- | stfd f2, CTSTATE->cb.fpr[1]
+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
| stw r5, CTSTATE->cb.gpr[2]
- | stfd f3, CTSTATE->cb.fpr[2]
+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
| stw r6, CTSTATE->cb.gpr[3]
- | stfd f4, CTSTATE->cb.fpr[3]
+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
| stw r7, CTSTATE->cb.gpr[4]
- | stfd f5, CTSTATE->cb.fpr[4]
+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
| stw r8, CTSTATE->cb.gpr[5]
- | stfd f6, CTSTATE->cb.fpr[5]
+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
| stw r9, CTSTATE->cb.gpr[6]
- | stfd f7, CTSTATE->cb.fpr[6]
+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
| stw r10, CTSTATE->cb.gpr[7]
- | stfd f8, CTSTATE->cb.fpr[7]
+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
| addi TMP0, sp, CFRAME_SPACE+8
| stw TMP0, CTSTATE->cb.stack
| mr CARG1, CTSTATE
@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *
| lp BASE, L:CRET1->base
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp RC, L:CRET1->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li ZERO, 0
| mr L, CRET1
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | stw TMP0, TONUM_HI
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU stw TMP0, TONUM_HI
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
| sub RC, RC, BASE
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| ins_callt
|.endif
|
@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *
| mr CARG2, RA
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
| lwz CRET1, CTSTATE->cb.gpr[0]
- | lfd FARG1, CTSTATE->cb.fpr[0]
+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
| lwz CRET2, CTSTATE->cb.gpr[1]
| b ->vm_leave_unw
|.endif
@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *
| bge <1
|2:
| bney cr1, >3
- | lfd f1, CCSTATE->fpr[0]
- | lfd f2, CCSTATE->fpr[1]
- | lfd f3, CCSTATE->fpr[2]
- | lfd f4, CCSTATE->fpr[3]
- | lfd f5, CCSTATE->fpr[4]
- | lfd f6, CCSTATE->fpr[5]
- | lfd f7, CCSTATE->fpr[6]
- | lfd f8, CCSTATE->fpr[7]
+ | .FPU lfd f1, CCSTATE->fpr[0]
+ | .FPU lfd f2, CCSTATE->fpr[1]
+ | .FPU lfd f3, CCSTATE->fpr[2]
+ | .FPU lfd f4, CCSTATE->fpr[3]
+ | .FPU lfd f5, CCSTATE->fpr[4]
+ | .FPU lfd f6, CCSTATE->fpr[5]
+ | .FPU lfd f7, CCSTATE->fpr[6]
+ | .FPU lfd f8, CCSTATE->fpr[7]
|3:
| lp TMP0, CCSTATE->func
| lwz CARG2, CCSTATE->gpr[1]
@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *
| lwz TMP2, -4(r14)
| lwz TMP0, 4(r14)
| stw CARG1, CCSTATE:TMP1->gpr[0]
- | stfd FARG1, CCSTATE:TMP1->fpr[0]
+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
| stw CARG2, CCSTATE:TMP1->gpr[1]
| mtlr TMP0
| stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCO
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
+ | lwzux CARG3, RD, BASE
| lwz TMP2, -4(PC)
- | checknum cr0, TMP0
- | lwz CARG3, 4(RD)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RD)
| decode_RD4 TMP2, TMP2
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
if (op == BC_ISLT) {
| bge >2
} else if (op == BC_ISGE) {
@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCO
| ble >2
}
|1:
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
| ins_next
|
|7: // RA is not an integer.
| bgt cr0, ->vmeta_comp
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| bgt cr1, ->vmeta_comp
| blt cr1, >4
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >5
|
|8: // RA is an integer, RD is not an integer.
| bgt cr1, ->vmeta_comp
| // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|4:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|5:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
if (op == BC_ISLT) {
| bge <2
} else if (op == BC_ISGE) {
@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCO
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
| ble cr7, ->BC_ISEQN_Z
} else {
| ble cr7, ->BC_ISNEN_Z
}
|.else
- | lwzux TMP0, RA, BASE
- | lwz TMP2, 0(PC)
+ | lwzux CARG1, RA, BASE
+ | lwz SAVE0, 0(PC)
| lfd f0, 0(RA)
| addi PC, PC, 4
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | decode_RD4 TMP2, TMP2
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | decode_RD4 SAVE0, SAVE0
| lfd f1, 0(RD)
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge cr0, >5
| bge cr1, >5
| fcmpu cr0, f0, f1
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
} else {
| beq >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
}
|1:
| ins_next
@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCO
|5: // Either or both types are not numbers.
|.if not DUALNUM
| lwz CARG2, 4(RA)
- | lwz CARG3, 4(RD)
+ | lwz CARG4, 4(RD)
|.endif
|.if FFI
- | cmpwi cr7, TMP0, LJ_TCDATA
- | cmpwi cr5, TMP1, LJ_TCDATA
+ | cmpwi cr7, CARG1, LJ_TCDATA
+ | cmpwi cr5, CARG3, LJ_TCDATA
|.endif
- | not TMP3, TMP0
- | cmplw TMP0, TMP1
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
+ | not TMP2, CARG1
+ | cmplw CARG1, CARG3
+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
|.if FFI
| cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
|.endif
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
|.if FFI
| beq cr7, ->vmeta_equal_cd
|.endif
- | cmplw cr5, CARG2, CARG3
+ | cmplw cr5, CARG2, CARG4
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
- | mr SAVE0, PC
+ | mr SAVE1, PC
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
if (vk) {
| bne cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
} else {
| beq cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
}
|.if DUALNUM
@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCO
|
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
+ | mr CARG3, CARG4
| lwz TAB:TMP2, TAB:CARG2->metatable
| li CARG4, 1-vk // ne = 0 or 1.
| cmplwi TAB:TMP2, 0
@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCO
| lbz TMP2, TAB:TMP2->nomm
| andix. TMP2, TMP2, 1<<MM_eq
| bne <1 // Or 'no __eq' flag set?
- | mr PC, SAVE0 // Restore old PC.
+ | mr PC, SAVE1 // Restore old PC.
| b ->vmeta_equal // Handle __eq metamethod.
break;
@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCO
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, KBASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | lwzux CARG3, RD, KBASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
|->BC_ISEQN_Z:
} else {
@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCO
}
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
|4:
|.else
if (vk) {
@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCO
} else {
|->BC_ISNEN_Z: // Dummy label.
}
- | lwzx TMP0, BASE, RA
+ | lwzx CARG1, BASE, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
- | lwz TMP2, -4(PC)
+ | lwz SAVE0, -4(PC)
| lfdx f1, KBASE, RD
- | decode_RD4 TMP2, TMP2
- | checknum TMP0
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | decode_RD4 SAVE0, SAVE0
+ | checknum CARG1
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge >3
| fcmpu cr0, f0, f1
|.endif
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|1:
|.if not FFI
|3:
@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCO
|.if not FFI
|3:
|.endif
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
}
| ins_next
|.if FFI
|3:
- | cmpwi TMP0, LJ_TCDATA
+ | cmpwi CARG1, LJ_TCDATA
| beq ->vmeta_equal_cd
| b <1
|.endif
@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCO
|7: // RA is not an integer.
| bge cr0, <3
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| blt cr1, >1
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >2
|
|8: // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|1:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|2:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
| b <4
|.endif
break;
@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCO
| add PC, PC, TMP2
} else {
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfdx f0, BASE, RD
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG2, 4(RD)
+ |.endif
| cmplw TMP0, TMP1
if (op == BC_ISTC) {
| bge >1
@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCO
}
| addis PC, PC, -(BCBIAS_J*4 >> 16)
| decode_RD4 TMP2, INS
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| add PC, PC, TMP2
|1:
}
@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCO
case BC_MOV:
| // RA = dst*8, RD = src*8
| ins_next1
+ |.if FPU
| lfdx f0, BASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, BASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_NOT:
@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCO
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vn
| .else
- | checknum TMP1; bge ->vmeta_arith_vn
+ | checknum CARG1; bge ->vmeta_arith_vn
| .endif
|| break;
||case 1:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f15, BASE, RB
| lfdx f14, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_nv
| .else
- | checknum TMP1; bge ->vmeta_arith_nv
+ | checknum CARG1; bge ->vmeta_arith_nv
| .endif
|| break;
||default:
- | lwzx TMP1, BASE, RB
- | lwzx TMP2, BASE, RC
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
|| break;
@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCO
| fsub a, b, a // b - floor(b/c)*c
|.endmacro
|
+ |.macro sfpmod
+ |->BC_MODVN_Z:
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | mr SAVE0, CARG3
+ | mr SAVE1, CARG4
+ | blex __divdf3
+ | blex floor
+ | mr CARG3, SAVE0
+ | mr CARG4, SAVE1
+ | blex __muldf3
+ | mr CARG3, CRET1
+ | mr CARG4, CRET2
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | blex __subdf3
+ |.endmacro
+ |
|.macro ins_arithfp, fpins
| ins_arithpre
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
+ |.elif FPU
| fpins f0, f14, f15
| ins_next1
| stfdx f0, BASE, RA
| ins_next2
+ |.else
+ | blex __divdf3 // Only soft-float div uses this macro.
+ | ins_next1
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ | ins_next2
|.endif
|.endmacro
|
- |.macro ins_arithdn, intins, fpins
+ |.macro ins_arithdn, intins, fpins, fpcall
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, KBASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||case 1:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG2, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG1, 4(RC)
+ | lwzux CARG3, RB, BASE
+ | lwzux CARG1, RC, KBASE
+ | lwz CARG4, 4(RB)
+ | checknum cr0, CARG3
+ | lwz CARG2, 4(RC)
+ | checknum cr1, CARG1
|| break;
||default:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, BASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, BASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||}
- | checknum cr1, TMP2
| bne >5
| bne cr1, >5
- | intins CARG1, CARG1, CARG2
+ |.if "intins" == "intmod"
+ | mr CARG1, CARG2
+ | mr CARG2, CARG4
+ |.endif
+ | intins CARG1, CARG2, CARG4
| bso >4
|1:
| ins_next1
@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCO
| checkov TMP0, <1 // Ignore unrelated overflow.
| ins_arithfallback b
|5: // FP variant.
+ |.if FPU
||if (vk == 1) {
| lfd f15, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f14, 0(RC)
||} else {
| lfd f14, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f15, 0(RC)
||}
+ |.endif
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| ins_arithfallback bge
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|.else
+ |.if FPU
| fpins f0, f14, f15
- | ins_next1
| stfdx f0, BASE, RA
+ |.else
+ |.if "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | blex fpcall
+ |.endif
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ |.endif
+ | ins_next1
| b <2
|.endif
|.endmacro
|
- |.macro ins_arith, intins, fpins
+ |.macro ins_arith, intins, fpins, fpcall
|.if DUALNUM
- | ins_arithdn intins, fpins
+ | ins_arithdn intins, fpins, fpcall
|.else
| ins_arithfp fpins
|.endif
@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCO
| addo. TMP0, TMP0, TMP3
| add y, a, b
|.endmacro
- | ins_arith addo32., fadd
+ | ins_arith addo32., fadd, __adddf3
|.else
- | ins_arith addo., fadd
+ | ins_arith addo., fadd, __adddf3
|.endif
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCO
| subo. TMP0, TMP0, TMP3
| sub y, a, b
|.endmacro
- | ins_arith subo32., fsub
+ | ins_arith subo32., fsub, __subdf3
|.else
- | ins_arith subo., fsub
+ | ins_arith subo., fsub, __subdf3
|.endif
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mullwo., fmul
+ | ins_arith mullwo., fmul, __muldf3
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
| ins_arithfp fdiv
break;
case BC_MODVN:
- | ins_arith intmod, fpmod
+ | ins_arith intmod, fpmod, sfpmod
break;
case BC_MODNV: case BC_MODVV:
- | ins_arith intmod, fpmod_
+ | ins_arith intmod, fpmod_, sfpmod
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ |.if FPU
| lfdx FARG1, BASE, RB
- | lwzx TMP2, BASE, RC
| lfdx FARG2, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ |.else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ |.endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
| blex pow
| ins_next1
+ |.if FPU
| stfdx FARG1, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCO
| lp BASE, L->base
| bne ->vmeta_binop
| ins_next1
+ |.if FPU
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, SAVE0, BASE
+ | lwz TMP1, 4(SAVE0)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCO
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| ins_next1
+ |.if FPU
| lfdx f0, KBASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, KBASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_KPRI:
@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCO
| lwzx UPVAL:RB, LFUNC:RB, RD
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| lfd f0, 0(TMP1)
| stfdx f0, BASE, RA
+ |.else
+ | lwz TMP2, 0(TMP1)
+ | lwz TMP3, 4(TMP1)
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
break;
case BC_USETV:
@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCO
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdux f0, RD, BASE
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| lbz TMP3, UPVAL:RB->marked
| lwz CARG2, UPVAL:RB->v
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbz TMP0, UPVAL:RB->closed
| lwz TMP2, 0(RD)
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw CARG1, 0(CARG2)
+ | stw CARG3, 4(CARG2)
+ |.endif
| cmplwi cr1, TMP0, 0
| lwz TMP1, 4(RD)
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCO
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdx f0, KBASE, RD
+ |.else
+ | lwzux TMP2, RD, KBASE
+ | lwz TMP3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw TMP2, 0(TMP1)
+ | stw TMP3, 4(TMP1)
+ |.endif
| ins_next2
break;
case BC_USETP:
@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCO
|.endif
| ble ->vmeta_tgetv // Integer key and in array part?
| lwzx TMP0, TMP1, TMP2
+ |.if FPU
| lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP1, TMP2
+ | lwz SAVE1, 4(TMP1)
+ |.endif
| checknil TMP0; beq >2
|1:
| ins_next1
+ |.if FPU
| stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
| ins_next2
|
|2: // Check for __index if table value is nil.
@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCO
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
+ |.if FPU
| lwzx TMP1, TMP2, RC
| lfdx f0, TMP2, RC
+ |.else
+ | lwzux TMP1, TMP2, RC
+ | lwz TMP3, 4(TMP2)
+ |.endif
| checknil TMP1; beq >5
|1:
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP1, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
|
|5: // Check for __index if table value is nil.
@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCO
| cmplw TMP0, CARG2
| slwi TMP2, CARG2, 3
| ble ->vmeta_tgetr // In array part?
+ |.if FPU
| lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP2, TMP1
+ | lwz SAVE1, 4(TMP2)
+ |.endif
|->BC_TGETR_Z:
| ins_next1
+ |.if FPU
| stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
| ins_next2
break;
@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCO
| ble ->vmeta_tsetv // Integer key and in array part?
| lwzx TMP2, TMP1, TMP0
| lbz TMP3, TAB:RB->marked
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add SAVE1, BASE, RA
+ | lwz SAVE0, 0(SAVE1)
+ | lwz SAVE1, 4(SAVE1)
+ |.endif
| checknil TMP2; beq >3
|1:
| andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP1, TMP0
+ |.else
+ | stwux SAVE0, TMP1, TMP0
+ | stw SAVE1, 4(TMP1)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCO
| lwz NODE:TMP2, TAB:RB->node
| stb ZERO, TAB:RB->nomm // Clear metamethod cache.
| and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| slwi TMP0, TMP1, 5
| slwi TMP1, TMP1, 3
| sub TMP1, TMP0, TMP1
@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCO
| checknil CARG2; beq >4 // Key found, but nil value?
|2:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfd f14, NODE:TMP2->val
+ |.else
+ | stw SAVE0, NODE:TMP2->val.u32.hi
+ | stw SAVE1, NODE:TMP2->val.u32.lo
+ |.endif
| bne >7
|3:
| ins_next
@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCO
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Returns TValue *.
| lp BASE, L->base
+ |.if FPU
| stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
| b <3 // No 2nd write barrier needed.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCO
| lwz TMP2, TAB:RB->array
| lbz TMP3, TAB:RB->marked
| cmplw TMP0, TMP1
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| bge ->vmeta_tsetb
| lwzx TMP1, TMP2, RC
| checknil TMP1; beq >5
|1:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP2, RC
+ |.else
+ | stwux SAVE0, RC, TMP2
+ | stw SAVE1, 4(RC)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCO
|2:
| cmplw TMP0, CARG3
| slwi TMP2, CARG3, 3
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | lwzux SAVE0, RA, BASE
+ | lwz SAVE1, 4(RA)
+ |.endif
| ble ->vmeta_tsetr // In array part?
| ins_next1
+ |.if FPU
| stfdx f14, TMP1, TMP2
+ |.else
+ | stwux SAVE0, TMP1, TMP2
+ | stw SAVE1, 4(TMP1)
+ |.endif
| ins_next2
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCO
| add TMP1, TMP1, TMP0
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz SAVE0, 0(RA)
+ | lwz SAVE1, 4(RA)
+ |.endif
| addi RA, RA, 8
| cmpw cr1, RA, TMP2
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw SAVE0, 0(TMP1)
+ | stw SAVE1, 4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| blt cr1, <3
| bne >7
@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCO
| beq cr1, >3
|2:
| addi TMP3, TMP2, 8
+ |.if FPU
| lfdx f0, RA, TMP2
+ |.else
+ | add CARG3, RA, TMP2
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmplw cr1, TMP3, NARGS8:RC
+ |.if FPU
| stfdx f0, BASE, TMP2
+ |.else
+ | stwux CARG1, TMP2, BASE
+ | stw CARG2, 4(TMP2)
+ |.endif
| mr TMP2, TMP3
| bne cr1, <2
|3:
@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCO
| add BASE, BASE, RA
| lwz TMP1, -24(BASE)
| lwz LFUNC:RB, -20(BASE)
+ |.if FPU
| lfd f1, -8(BASE)
| lfd f0, -16(BASE)
+ |.else
+ | lwz CARG1, -8(BASE)
+ | lwz CARG2, -4(BASE)
+ | lwz CARG3, -16(BASE)
+ | lwz CARG4, -12(BASE)
+ |.endif
| stw TMP1, 0(BASE) // Copy callable.
| stw LFUNC:RB, 4(BASE)
| checkfunc TMP1
- | stfd f1, 16(BASE) // Copy control var.
| li NARGS8:RC, 16 // Iterators get 2 arguments.
+ |.if FPU
+ | stfd f1, 16(BASE) // Copy control var.
| stfdu f0, 8(BASE) // Copy state.
+ |.else
+ | stw CARG1, 16(BASE) // Copy control var.
+ | stw CARG2, 20(BASE)
+ | stwu CARG3, 8(BASE) // Copy state.
+ | stw CARG4, 4(BASE)
+ |.endif
| bne ->vmeta_call
| ins_call
break;
@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCO
| slwi TMP3, RC, 3
| bge >5 // Index points after array part?
| lwzx TMP2, TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux CARG1, TMP3, TMP1
+ | lwz CARG2, 4(TMP3)
+ |.endif
| checknil TMP2
| lwz INS, -4(PC)
| beq >4
@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCO
|.endif
| addi RC, RC, 1
| addis TMP3, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| decode_RD4 TMP1, INS
| stw RC, -4(RA) // Update control var.
| add PC, TMP1, TMP3
@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCO
| slwi RB, RC, 3
| sub TMP3, TMP3, RB
| lwzx RB, TMP2, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| add NODE:TMP3, TMP2, TMP3
| checknil RB
| lwz INS, -4(PC)
| beq >7
+ |.if FPU
| lfd f1, NODE:TMP3->key
+ |.else
+ | lwz CARG3, NODE:TMP3->key.u32.hi
+ | lwz CARG4, NODE:TMP3->key.u32.lo
+ |.endif
| addis TMP2, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| add RC, RC, TMP0
| decode_RD4 TMP1, INS
+ |.if FPU
| stfd f1, 0(RA)
+ |.else
+ | stw CARG3, 0(RA)
+ | stw CARG4, 4(RA)
+ |.endif
| addi RC, RC, 1
| add PC, TMP1, TMP2
| stw RC, -4(RA) // Update control var.
@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCO
| subi TMP2, TMP2, 16
| ble >2 // No vararg slots?
|1: // Copy vararg slots to destination slots.
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RA, TMP2
| cmplw cr1, RC, TMP3
| bge >3 // All destination slots filled?
@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCO
| addi MULTRES, TMP1, 8
| bgt >7
|6:
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RC, TMP3
| addi RA, RA, 8
| blt <6 // More vararg slots?
@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCO
| li TMP1, 0
|2:
| addi TMP3, TMP1, 8
+ |.if FPU
| lfdx f0, RA, TMP1
+ |.else
+ | add CARG3, RA, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP3, RC
+ |.if FPU
| stfdx f0, TMP2, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| beq >3
| addi TMP1, TMP3, 8
+ |.if FPU
| lfdx f1, RA, TMP3
+ |.else
+ | add CARG3, RA, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP1, RC
+ |.if FPU
| stfdx f1, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| bne <2
|3:
|5:
@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCO
| subi TMP2, BASE, 8
| decode_RB8 RB, INS
if (op == BC_RET1) {
+ |.if FPU
| lfd f0, 0(RA)
| stfd f0, 0(TMP2)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ | stw CARG1, 0(TMP2)
+ | stw CARG2, 4(TMP2)
+ |.endif
}
|5:
| cmplw RB, RD
@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCO
|4:
| stw CARG1, FORL_IDX*8+4(RA)
} else {
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz CARG3, FORL_STEP*8+4(RA)
| lwz TMP2, FORL_STOP*8(RA)
| lwz CARG2, FORL_STOP*8+4(RA)
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCO
if (vk) {
|.if DUALNUM
|9: // FP loop.
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
|.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
+ |.else
| lfdux f1, RA, BASE
|.endif
+ |.if FPU
| lfd f3, FORL_STEP*8(RA)
| lfd f2, FORL_STOP*8(RA)
- | lwz TMP3, FORL_STEP*8(RA)
| fadd f1, f1, f3
| stfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG3, FORL_STEP*8(RA)
+ | lwz CARG4, FORL_STEP*8+4(RA)
+ | mr SAVE1, RD
+ | blex __adddf3
+ | mr RD, SAVE1
+ | stw CRET1, FORL_IDX*8(RA)
+ | stw CRET2, FORL_IDX*8+4(RA)
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
+ | lwz SAVE0, FORL_STEP*8(RA)
} else {
|.if DUALNUM
|9: // FP loop.
|.else
| lwzux TMP1, RA, BASE
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz TMP2, FORL_STOP*8(RA)
| cmplw cr0, TMP1, TISNUM
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
|.endif
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
| crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ |.if FPU
| lfd f2, FORL_STOP*8(RA)
+ |.else
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
| bge ->vmeta_for
}
- | cmpwi cr6, TMP3, 0
+ | cmpwi cr6, SAVE0, 0
if (op != BC_JFORL) {
| srwi RD, RD, 1
}
+ |.if FPU
| stfd f1, FORL_EXT*8(RA)
+ |.else
+ | stw CARG1, FORL_EXT*8(RA)
+ | stw CARG2, FORL_EXT*8+4(RA)
+ |.endif
if (op != BC_JFORL) {
| add RD, PC, RD
}
+ |.if FPU
| fcmpu cr0, f1, f2
+ |.else
+ | mr SAVE1, RD
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ | mr RD, SAVE1
+ |.endif
if (op == BC_JFORI) {
| addis PC, RD, -(BCBIAS_J*4 >> 16)
}