|
|
- From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
- From: Mike Pall <mike>
- Date: Wed, 26 Jul 2017 09:52:19 +0200
- Subject: [PATCH] PPC: Add soft-float support to interpreter.
-
- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
- Sponsored by Cisco Systems, Inc.
- ---
- src/host/buildvm_asm.c | 2 +-
- src/lj_arch.h | 29 +-
- src/lj_ccall.c | 38 +-
- src/lj_ccall.h | 4 +-
- src/lj_ccallback.c | 30 +-
- src/lj_frame.h | 2 +-
- src/lj_ircall.h | 2 +-
- src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
- 8 files changed, 1101 insertions(+), 255 deletions(-)
-
- --- a/src/host/buildvm_asm.c
- +++ b/src/host/buildvm_asm.c
- @@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
- #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
- fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
- #endif
- -#if LJ_TARGET_PPC && !LJ_TARGET_PS3
- +#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
- /* Hard-float ABI. */
- fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
- #endif
- --- a/src/lj_arch.h
- +++ b/src/lj_arch.h
- @@ -254,6 +254,29 @@
- #else
- #define LJ_ARCH_BITS 32
- #define LJ_ARCH_NAME "ppc"
- +
- +#if !defined(LJ_ARCH_HASFPU)
- +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
- +#define LJ_ARCH_HASFPU 0
- +#else
- +#define LJ_ARCH_HASFPU 1
- +#endif
- +#endif
- +
- +#if !defined(LJ_ABI_SOFTFP)
- +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
- +#define LJ_ABI_SOFTFP 1
- +#else
- +#define LJ_ABI_SOFTFP 0
- +#endif
- +#endif
- +#endif
- +
- +#if LJ_ABI_SOFTFP
- +#define LJ_ARCH_NOJIT 1 /* NYI */
- +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
- +#else
- +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
- #endif
-
- #define LJ_TARGET_PPC 1
- @@ -262,7 +285,6 @@
- #define LJ_TARGET_MASKSHIFT 0
- #define LJ_TARGET_MASKROT 1
- #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
- -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
-
- #if LJ_TARGET_CONSOLE
- #define LJ_ARCH_PPC32ON64 1
- @@ -415,16 +437,13 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
- -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
- -#error "No support for PowerPC CPUs without double-precision FPU"
- -#endif
- #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
- #error "No support for little-endian PPC32"
- #endif
- #if LJ_ARCH_PPC64
- #error "No support for PowerPC 64 bit mode (yet)"
- #endif
- -#ifdef __NO_FPRS__
- +#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
- #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
- #endif
- #elif LJ_TARGET_MIPS32
- --- a/src/lj_ccall.c
- +++ b/src/lj_ccall.c
- @@ -387,6 +387,24 @@
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
- +#define CCALL_HANDLE_GPR \
- + /* Try to pass argument in GPRs. */ \
- + if (n > 1) { \
- + lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
- + if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
- + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- + else if (ngpr + n > maxgpr) \
- + ngpr = maxgpr; /* Prevent reordering. */ \
- + } \
- + if (ngpr + n <= maxgpr) { \
- + dp = &cc->gpr[ngpr]; \
- + ngpr += n; \
- + goto done; \
- + } \
- +
- +#if LJ_ABI_SOFTFP
- +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
- +#else
- #define CCALL_HANDLE_REGARG \
- if (isfp) { /* Try to pass argument in FPRs. */ \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
- @@ -395,24 +413,16 @@
- d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
- goto done; \
- } \
- - } else { /* Try to pass argument in GPRs. */ \
- - if (n > 1) { \
- - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
- - if (ctype_isinteger(d->info)) \
- - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- - else if (ngpr + n > maxgpr) \
- - ngpr = maxgpr; /* Prevent reordering. */ \
- - } \
- - if (ngpr + n <= maxgpr) { \
- - dp = &cc->gpr[ngpr]; \
- - ngpr += n; \
- - goto done; \
- - } \
- + } else { \
- + CCALL_HANDLE_GPR \
- }
- +#endif
-
- +#if !LJ_ABI_SOFTFP
- #define CCALL_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
- +#endif
-
- #elif LJ_TARGET_MIPS32
- /* -- MIPS o32 calling conventions ---------------------------------------- */
- @@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L,
- }
- if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-
- -#if LJ_TARGET_X64 || LJ_TARGET_PPC
- +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
- cc->nfpr = nfpr; /* Required for vararg functions. */
- #endif
- cc->nsp = nsp;
- --- a/src/lj_ccall.h
- +++ b/src/lj_ccall.h
- @@ -86,9 +86,9 @@ typedef union FPRArg {
- #elif LJ_TARGET_PPC
-
- #define CCALL_NARG_GPR 8
- -#define CCALL_NARG_FPR 8
- +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
- #define CCALL_NRET_GPR 4 /* For complex double. */
- -#define CCALL_NRET_FPR 1
- +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
- #define CCALL_SPS_EXTRA 4
- #define CCALL_SPS_FREE 0
-
- --- a/src/lj_ccallback.c
- +++ b/src/lj_ccallback.c
- @@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *ct
-
- #elif LJ_TARGET_PPC
-
- +#define CALLBACK_HANDLE_GPR \
- + if (n > 1) { \
- + lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
- + ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
- + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- + } \
- + if (ngpr + n <= maxgpr) { \
- + sp = &cts->cb.gpr[ngpr]; \
- + ngpr += n; \
- + goto done; \
- + }
- +
- +#if LJ_ABI_SOFTFP
- +#define CALLBACK_HANDLE_REGARG \
- + CALLBACK_HANDLE_GPR \
- + UNUSED(isfp);
- +#else
- #define CALLBACK_HANDLE_REGARG \
- if (isfp) { \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
- @@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *ct
- goto done; \
- } \
- } else { /* Try to pass argument in GPRs. */ \
- - if (n > 1) { \
- - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
- - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- - } \
- - if (ngpr + n <= maxgpr) { \
- - sp = &cts->cb.gpr[ngpr]; \
- - ngpr += n; \
- - goto done; \
- - } \
- + CALLBACK_HANDLE_GPR \
- }
- +#endif
-
- +#if !LJ_ABI_SOFTFP
- #define CALLBACK_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
- +#endif
-
- #elif LJ_TARGET_MIPS32
-
- --- a/src/lj_frame.h
- +++ b/src/lj_frame.h
- @@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
- #define CFRAME_OFS_L 36
- #define CFRAME_OFS_PC 32
- #define CFRAME_OFS_MULTRES 28
- -#define CFRAME_SIZE 272
- +#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
- #define CFRAME_SHIFT_MULTRES 3
- #endif
- #elif LJ_TARGET_MIPS32
- --- a/src/lj_ircall.h
- +++ b/src/lj_ircall.h
- @@ -272,7 +272,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[I
- #define fp64_f2l __aeabi_f2lz
- #define fp64_f2ul __aeabi_f2ulz
- #endif
- -#elif LJ_TARGET_MIPS
- +#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
- #define softfp_add __adddf3
- #define softfp_sub __subdf3
- #define softfp_mul __muldf3
- --- a/src/vm_ppc.dasc
- +++ b/src/vm_ppc.dasc
- @@ -103,6 +103,18 @@
- |// Fixed register assignments for the interpreter.
- |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
- |
- +|.macro .FPU, a, b
- +|.if FPU
- +| a, b
- +|.endif
- +|.endmacro
- +|
- +|.macro .FPU, a, b, c
- +|.if FPU
- +| a, b, c
- +|.endif
- +|.endmacro
- +|
- |// The following must be C callee-save (but BASE is often refetched).
- |.define BASE, r14 // Base of current Lua stack frame.
- |.define KBASE, r15 // Constants of current Lua function.
- @@ -116,8 +128,10 @@
- |.define TISNUM, r22
- |.define TISNIL, r23
- |.define ZERO, r24
- +|.if FPU
- |.define TOBIT, f30 // 2^52 + 2^51.
- |.define TONUM, f31 // 2^52 + 2^51 + 2^31.
- +|.endif
- |
- |// The following temporaries are not saved across C calls, except for RA.
- |.define RA, r20 // Callee-save.
- @@ -133,6 +147,7 @@
- |
- |// Saved temporaries.
- |.define SAVE0, r21
- +|.define SAVE1, r25
- |
- |// Calling conventions.
- |.define CARG1, r3
- @@ -141,8 +156,10 @@
- |.define CARG4, r6 // Overlaps TMP3.
- |.define CARG5, r7 // Overlaps INS.
- |
- +|.if FPU
- |.define FARG1, f1
- |.define FARG2, f2
- +|.endif
- |
- |.define CRET1, r3
- |.define CRET2, r4
- @@ -213,10 +230,16 @@
- |.endif
- |.else
- |
- +|.if FPU
- |.define SAVE_LR, 276(sp)
- |.define CFRAME_SPACE, 272 // Delta for sp.
- |// Back chain for sp: 272(sp) <-- sp entering interpreter
- |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
- +|.else
- +|.define SAVE_LR, 132(sp)
- +|.define CFRAME_SPACE, 128 // Delta for sp.
- +|// Back chain for sp: 128(sp) <-- sp entering interpreter
- +|.endif
- |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
- |.define SAVE_CR, 52(sp) // 32 bit CR save.
- |.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
- @@ -226,16 +249,25 @@
- |.define SAVE_PC, 32(sp)
- |.define SAVE_MULTRES, 28(sp)
- |.define UNUSED1, 24(sp)
- +|.if FPU
- |.define TMPD_LO, 20(sp)
- |.define TMPD_HI, 16(sp)
- |.define TONUM_LO, 12(sp)
- |.define TONUM_HI, 8(sp)
- +|.else
- +|.define SFSAVE_4, 20(sp)
- +|.define SFSAVE_3, 16(sp)
- +|.define SFSAVE_2, 12(sp)
- +|.define SFSAVE_1, 8(sp)
- +|.endif
- |// Next frame lr: 4(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
- +|.if FPU
- |.define TMPD_BLO, 23(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
- +|.endif
- |
- |.endif
- |
- @@ -245,7 +277,7 @@
- |.else
- | stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
- -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- +| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |.macro rest_, reg
- |.if GPR64
- @@ -253,7 +285,7 @@
- |.else
- | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
- -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- +| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |
- |.macro saveregs
- @@ -323,6 +355,7 @@
- |// Trap for not-yet-implemented parts.
- |.macro NYI; tw 4, sp, sp; .endmacro
- |
- +|.if FPU
- |// int/FP conversions.
- |.macro tonum_i, freg, reg
- | xoris reg, reg, 0x8000
- @@ -346,6 +379,7 @@
- |.macro toint, reg, freg
- | toint reg, freg, freg
- |.endmacro
- +|.endif
- |
- |//-----------------------------------------------------------------------
- |
- @@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *
- | beq >2
- |1:
- | addic. TMP1, TMP1, -8
- + |.if FPU
- | lfd f0, 0(RA)
- + |.else
- + | lwz CARG1, 0(RA)
- + | lwz CARG2, 4(RA)
- + |.endif
- | addi RA, RA, 8
- + |.if FPU
- | stfd f0, 0(BASE)
- + |.else
- + | stw CARG1, 0(BASE)
- + | stw CARG2, 4(BASE)
- + |.endif
- | addi BASE, BASE, 8
- | bney <1
- |
- @@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *
- | .toc ld TOCREG, SAVE_TOC
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp BASE, L->base
- - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li ZERO, 0
- - | stw TMP3, TMPD
- + | .FPU stw TMP3, TMPD
- | li TMP1, LJ_TFALSE
- - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
- - | lfs TOBIT, TMPD
- + | .FPU lfs TOBIT, TMPD
- | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | la RA, -8(BASE) // Results start at BASE-8.
- - | stw TMP3, TMPD
- + | .FPU stw TMP3, TMPD
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
- - | lfs TONUM, TMPD
- + | .FPU lfs TONUM, TMPD
- | b ->vm_returnc
- |
- |//-----------------------------------------------------------------------
- @@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
- | lwz PC, FRAME_PC(BASE)
- - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stb CARG3, L->status
- - | stw TMP3, TMPD
- - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- - | lfs TOBIT, TMPD
- + | .FPU stw TMP3, TMPD
- + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- + | .FPU lfs TOBIT, TMPD
- | sub RD, TMP1, BASE
- - | stw TMP3, TMPD
- - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- + | .FPU stw TMP3, TMPD
- + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | addi RD, RD, 8
- - | stw TMP0, TONUM_HI
- + | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
- | li ZERO, 0
- | st_vmstate
- | andix. TMP0, PC, FRAME_TYPE
- | mr MULTRES, RD
- - | lfs TONUM, TMPD
- + | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | beq ->BC_RET_Z
- | b ->vm_return
- @@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *
- | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
- - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | add PC, PC, BASE
- - | stw TMP3, TMPD
- + | .FPU stw TMP3, TMPD
- | li ZERO, 0
- - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- - | lfs TOBIT, TMPD
- + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- + | .FPU lfs TOBIT, TMPD
- | sub PC, PC, TMP2 // PC = frame delta + frame type
- - | stw TMP3, TMPD
- - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- + | .FPU stw TMP3, TMPD
- + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | sub NARGS8:RC, TMP1, BASE
- - | stw TMP0, TONUM_HI
- + | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
- - | lfs TONUM, TMPD
- + | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | st_vmstate
- |
- @@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *
- | lwz INS, -4(PC)
- | subi CARG2, RB, 16
- | decode_RB8 SAVE0, INS
- + |.if FPU
- | lfd f0, 0(RA)
- + |.else
- + | lwz TMP2, 0(RA)
- + | lwz TMP3, 4(RA)
- + |.endif
- | add TMP1, BASE, SAVE0
- | stp BASE, L->base
- | cmplw TMP1, CARG2
- | sub CARG3, CARG2, TMP1
- | decode_RA8 RA, INS
- + |.if FPU
- | stfd f0, 0(CARG2)
- + |.else
- + | stw TMP2, 0(CARG2)
- + | stw TMP3, 4(CARG2)
- + |.endif
- | bney ->BC_CAT_Z
- + |.if FPU
- | stfdx f0, BASE, RA
- + |.else
- + | stwux TMP2, RA, BASE
- + | stw TMP3, 4(RA)
- + |.endif
- | b ->cont_nop
- |
- |//-- Table indexing metamethods -----------------------------------------
- @@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | beq >3
- + |.if FPU
- | lfd f0, 0(CRET1)
- + |.else
- + | lwz TMP0, 0(CRET1)
- + | lwz TMP1, 4(CRET1)
- + |.endif
- | ins_next1
- + |.if FPU
- | stfdx f0, BASE, RA
- + |.else
- + | stwux TMP0, RA, BASE
- + | stw TMP1, 4(RA)
- + |.endif
- | ins_next2
- |
- |3: // Call __index metamethod.
- @@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *
- | // Returns cTValue * or NULL.
- | cmplwi CRET1, 0
- | beq >1
- + |.if FPU
- | lfd f14, 0(CRET1)
- + |.else
- + | lwz SAVE0, 0(CRET1)
- + | lwz SAVE1, 4(CRET1)
- + |.endif
- | b ->BC_TGETR_Z
- |1:
- | stwx TISNIL, BASE, RA
- @@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *
- | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- + |.if FPU
- | lfdx f0, BASE, RA
- + |.else
- + | lwzux TMP2, RA, BASE
- + | lwz TMP3, 4(RA)
- + |.endif
- | beq >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | ins_next1
- + |.if FPU
- | stfd f0, 0(CRET1)
- + |.else
- + | stw TMP2, 0(CRET1)
- + | stw TMP3, 4(CRET1)
- + |.endif
- | ins_next2
- |
- |3: // Call __newindex metamethod.
- @@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
- + |.if FPU
- | stfd f0, 16(BASE) // Copy value to third argument.
- + |.else
- + | stw TMP2, 16(BASE)
- + | stw TMP3, 20(BASE)
- + |.endif
- | b ->vm_call_dispatch_f
- |
- |->vmeta_tsetr:
- @@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *
- | stw PC, SAVE_PC
- | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
- | // Returns TValue *.
- + |.if FPU
- | stfd f14, 0(CRET1)
- + |.else
- + | stw SAVE0, 0(CRET1)
- + | stw SAVE1, 4(CRET1)
- + |.endif
- | b ->cont_nop
- |
- |//-- Comparison metamethods ---------------------------------------------
- @@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *
- |
- |->cont_ra: // RA = resultptr
- | lwz INS, -4(PC)
- + |.if FPU
- | lfd f0, 0(RA)
- + |.else
- + | lwz CARG1, 0(RA)
- + | lwz CARG2, 4(RA)
- + |.endif
- | decode_RA8 TMP1, INS
- + |.if FPU
- | stfdx f0, BASE, TMP1
- + |.else
- + | stwux CARG1, TMP1, BASE
- + | stw CARG2, 4(TMP1)
- + |.endif
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
- @@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *
- |.macro .ffunc_n, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
- - | lwz CARG3, 0(BASE)
- + | lwz CARG1, 0(BASE)
- + |.if FPU
- | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG2, 4(BASE)
- + |.endif
- | blt ->fff_fallback
- - | checknum CARG3; bge ->fff_fallback
- + | checknum CARG1; bge ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_nn, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
- - | lwz CARG3, 0(BASE)
- + | lwz CARG1, 0(BASE)
- + |.if FPU
- | lfd FARG1, 0(BASE)
- - | lwz CARG4, 8(BASE)
- + | lwz CARG3, 8(BASE)
- | lfd FARG2, 8(BASE)
- + |.else
- + | lwz CARG2, 4(BASE)
- + | lwz CARG3, 8(BASE)
- + | lwz CARG4, 12(BASE)
- + |.endif
- | blt ->fff_fallback
- + | checknum CARG1; bge ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
- - | checknum CARG4; bge ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
- @@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *
- | bge cr1, ->fff_fallback
- | stw CARG3, 0(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
- + | addi TMP1, BASE, 8
- + | add TMP2, RA, NARGS8:RC
- | stw CARG1, 4(RA)
- | beq ->fff_res // Done if exactly 1 argument.
- - | li TMP1, 8
- - | subi RC, RC, 8
- |1:
- - | cmplw TMP1, RC
- - | lfdx f0, BASE, TMP1
- - | stfdx f0, RA, TMP1
- + | cmplw TMP1, TMP2
- + |.if FPU
- + | lfd f0, 0(TMP1)
- + | stfd f0, 0(TMP1)
- + |.else
- + | lwz CARG1, 0(TMP1)
- + | lwz CARG2, 4(TMP1)
- + | stw CARG1, -8(TMP1)
- + | stw CARG2, -4(TMP1)
- + |.endif
- | addi TMP1, TMP1, 8
- | bney <1
- | b ->fff_res
- @@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *
- | orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
- | slwi TMP1, TMP1, 3
- + |.if FPU
- | la TMP2, CFUNC:RB->upvalue
- | lfdx FARG1, TMP2, TMP1
- + |.else
- + | add TMP1, CFUNC:RB, TMP1
- + | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
- + | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
- + |.endif
- | b ->fff_resn
- |
- |//-- Base library: getters and setters ---------------------------------
- @@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *
- | mr CARG1, L
- | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // Returns cTValue *.
- + |.if FPU
- | lfd FARG1, 0(CRET1)
- + |.else
- + | lwz CARG2, 4(CRET1)
- + | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
- + |.endif
- | b ->fff_resn
- |
- |//-- Base library: conversions ------------------------------------------
- @@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
- + |.if FPU
- | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG2, 4(BASE)
- + |.endif
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1; bgt ->fff_fallback
- | b ->fff_resn
- @@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *
- | cmplwi CRET1, 0
- | li CARG3, LJ_TNIL
- | beq ->fff_restv // End of traversal: return nil.
- - | lfd f0, 8(BASE) // Copy key and value to results.
- | la RA, -8(BASE)
- + |.if FPU
- + | lfd f0, 8(BASE) // Copy key and value to results.
- | lfd f1, 16(BASE)
- | stfd f0, 0(RA)
- - | li RD, (2+1)*8
- | stfd f1, 8(RA)
- + |.else
- + | lwz CARG1, 8(BASE)
- + | lwz CARG2, 12(BASE)
- + | lwz CARG3, 16(BASE)
- + | lwz CARG4, 20(BASE)
- + | stw CARG1, 0(RA)
- + | stw CARG2, 4(RA)
- + | stw CARG3, 8(RA)
- + | stw CARG4, 12(RA)
- + |.endif
- + | li RD, (2+1)*8
- | b ->fff_res
- |
- |.ffunc_1 pairs
- @@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- + |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
- + |.else
- + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
- + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
- + |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
- + |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
- + |.else
- + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
- + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
- + |.endif
- | la RA, -8(BASE)
- #endif
- | stw TISNIL, 8(BASE)
- | li RD, (3+1)*8
- + |.if FPU
- | stfd f0, 0(RA)
- + |.else
- + | stw TMP0, 0(RA)
- + | stw TMP1, 4(RA)
- + |.endif
- | b ->fff_res
- |
- |.ffunc ipairs_aux
- @@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *
- | stfd FARG2, 0(RA)
- |.endif
- | ble >2 // Not in array part?
- + |.if FPU
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
- + |.else
- + | lwzux TMP2, TMP1, TMP3
- + | lwz TMP3, 4(TMP1)
- + |.endif
- |1:
- | checknil TMP2
- | li RD, (0+1)*8
- | beq ->fff_res // End of iteration, return 0 results.
- | li RD, (2+1)*8
- + |.if FPU
- | stfd f0, 8(RA)
- + |.else
- + | stw TMP2, 8(RA)
- + | stw TMP3, 12(RA)
- + |.endif
- | b ->fff_res
- |2: // Check for empty hash part first. Otherwise call C function.
- | lwz TMP0, TAB:CARG1->hmask
- @@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *
- | li RD, (0+1)*8
- | beq ->fff_res
- | lwz TMP2, 0(CRET1)
- + |.if FPU
- | lfd f0, 0(CRET1)
- + |.else
- + | lwz TMP3, 4(CRET1)
- + |.endif
- | b <1
- |
- |.ffunc_1 ipairs
- @@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- + |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
- + |.else
- + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
- + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
- + |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
- + |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
- + |.else
- + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
- + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
- + |.endif
- | la RA, -8(BASE)
- #endif
- |.if DUALNUM
- @@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *
- |.endif
- | stw ZERO, 12(BASE)
- | li RD, (3+1)*8
- + |.if FPU
- | stfd f0, 0(RA)
- + |.else
- + | stw TMP0, 0(RA)
- + | stw TMP1, 4(RA)
- + |.endif
- | b ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
- @@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *
- |
- |.ffunc xpcall
- | cmplwi NARGS8:RC, 16
- - | lwz CARG4, 8(BASE)
- + | lwz CARG3, 8(BASE)
- + |.if FPU
- | lfd FARG2, 8(BASE)
- | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG1, 0(BASE)
- + | lwz CARG2, 4(BASE)
- + | lwz CARG4, 12(BASE)
- + |.endif
- | blt ->fff_fallback
- | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
- | mr TMP2, BASE
- - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
- + | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
- | la BASE, 16(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
- + |.if FPU
- | stfd FARG2, 0(TMP2) // Swap function and traceback.
- - | subi NARGS8:RC, NARGS8:RC, 16
- | stfd FARG1, 8(TMP2)
- + |.else
- + | stw CARG3, 0(TMP2)
- + | stw CARG4, 4(TMP2)
- + | stw CARG1, 8(TMP2)
- + | stw CARG2, 12(TMP2)
- + |.endif
- + | subi NARGS8:RC, NARGS8:RC, 16
- | addi PC, TMP1, 16+FRAME_PCALL
- | b ->vm_call_dispatch
- |
- @@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *
- | stp BASE, L->top
- |2: // Move args to coroutine.
- | cmpw TMP1, NARGS8:RC
- + |.if FPU
- | lfdx f0, BASE, TMP1
- + |.else
- + | add CARG3, BASE, TMP1
- + | lwz TMP2, 0(CARG3)
- + | lwz TMP3, 4(CARG3)
- + |.endif
- | beq >3
- + |.if FPU
- | stfdx f0, CARG2, TMP1
- + |.else
- + | add CARG3, CARG2, TMP1
- + | stw TMP2, 0(CARG3)
- + | stw TMP3, 4(CARG3)
- + |.endif
- | addi TMP1, TMP1, 8
- | b <2
- |3:
- @@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *
- | stp TMP2, L:SAVE0->top // Clear coroutine stack.
- |5: // Move results from coroutine.
- | cmplw TMP1, TMP3
- + |.if FPU
- | lfdx f0, TMP2, TMP1
- | stfdx f0, BASE, TMP1
- + |.else
- + | add CARG3, TMP2, TMP1
- + | lwz CARG1, 0(CARG3)
- + | lwz CARG2, 4(CARG3)
- + | add CARG3, BASE, TMP1
- + | stw CARG1, 0(CARG3)
- + | stw CARG2, 4(CARG3)
- + |.endif
- | addi TMP1, TMP1, 8
- | bne <5
- |6:
- @@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *
- | andix. TMP0, PC, FRAME_TYPE
- | la TMP3, -8(TMP3)
- | li TMP1, LJ_TFALSE
- + |.if FPU
- | lfd f0, 0(TMP3)
- + |.else
- + | lwz CARG1, 0(TMP3)
- + | lwz CARG2, 4(TMP3)
- + |.endif
- | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
- + |.if FPU
- | stfd f0, 0(BASE) // Copy error message.
- + |.else
- + | stw CARG1, 0(BASE) // Copy error message.
- + | stw CARG2, 4(BASE)
- + |.endif
- | b <7
- |.else
- | mr CARG1, L
- @@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *
- | lus CARG1, 0x8000 // -(2^31).
- | beqy ->fff_resi
- |5:
- + |.if FPU
- | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG1, 0(BASE)
- + | lwz CARG2, 4(BASE)
- + |.endif
- | blex func
- | b ->fff_resn
- |.endmacro
- @@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
- - | lwz CARG3, 0(BASE)
- - | lfd FARG1, 0(BASE)
- + | lwz CARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- - | checknum CARG3; bge ->fff_fallback
- + | checknum CARG1; bge ->fff_fallback
- + |.if FPU
- + | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG2, 4(BASE)
- + |.endif
- | blex log
- | b ->fff_resn
- |
- @@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *
- |.if DUALNUM
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
- - | lwz CARG3, 0(BASE)
- + | lwz TMP0, 0(BASE)
- + |.if FPU
- | lfd FARG1, 0(BASE)
- - | lwz CARG4, 8(BASE)
- + |.else
- + | lwz CARG1, 0(BASE)
- + | lwz CARG2, 4(BASE)
- + |.endif
- + | lwz TMP1, 8(BASE)
- |.if GPR64
- | lwz CARG2, 12(BASE)
- - |.else
- + |.elif FPU
- | lwz CARG1, 12(BASE)
- + |.else
- + | lwz CARG3, 12(BASE)
- |.endif
- | blt ->fff_fallback
- - | checknum CARG3; bge ->fff_fallback
- - | checknum CARG4; bne ->fff_fallback
- + | checknum TMP0; bge ->fff_fallback
- + | checknum TMP1; bne ->fff_fallback
- |.else
- |.ffunc_nn math_ldexp
- |.if GPR64
- @@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *
- |.ffunc_n math_frexp
- |.if GPR64
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- - |.else
- + |.elif FPU
- | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
- + |.else
- + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex frexp
- @@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *
- |.if not DUALNUM
- | tonum_i FARG2, TMP1
- |.endif
- + |.if FPU
- | stfd FARG1, 0(RA)
- + |.else
- + | stw CRET1, 0(RA)
- + | stw CRET2, 4(RA)
- + |.endif
- | li RD, (2+1)*8
- |.if DUALNUM
- | stw TISNUM, 8(RA)
- @@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *
- |.ffunc_n math_modf
- |.if GPR64
- | la CARG2, -8(BASE)
- - |.else
- + |.elif FPU
- | la CARG1, -8(BASE)
- + |.else
- + | la CARG3, -8(BASE)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex modf
- | la RA, -8(BASE)
- + |.if FPU
- | stfd FARG1, 0(BASE)
- + |.else
- + | stw CRET1, 0(BASE)
- + | stw CRET2, 4(BASE)
- + |.endif
- | li RD, (2+1)*8
- | b ->fff_res
- |
- @@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *
- |.if DUALNUM
- | .ffunc_1 name
- | checknum CARG3
- - | addi TMP1, BASE, 8
- - | add TMP2, BASE, NARGS8:RC
- + | addi SAVE0, BASE, 8
- + | add SAVE1, BASE, NARGS8:RC
- | bne >4
- |1: // Handle integers.
- - | lwz CARG4, 0(TMP1)
- - | cmplw cr1, TMP1, TMP2
- - | lwz CARG2, 4(TMP1)
- + | lwz CARG4, 0(SAVE0)
- + | cmplw cr1, SAVE0, SAVE1
- + | lwz CARG2, 4(SAVE0)
- | bge cr1, ->fff_resi
- | checknum CARG4
- | xoris TMP0, CARG1, 0x8000
- @@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *
- |.if GPR64
- | rldicl CARG1, CARG1, 0, 32
- |.endif
- - | addi TMP1, TMP1, 8
- + | addi SAVE0, SAVE0, 8
- | b <1
- |3:
- | bge ->fff_fallback
- | // Convert intermediate result to number and continue below.
- + |.if FPU
- | tonum_i FARG1, CARG1
- - | lfd FARG2, 0(TMP1)
- + | lfd FARG2, 0(SAVE0)
- + |.else
- + | mr CARG2, CARG1
- + | bl ->vm_sfi2d_1
- + | lwz CARG3, 0(SAVE0)
- + | lwz CARG4, 4(SAVE0)
- + |.endif
- | b >6
- |4:
- + |.if FPU
- | lfd FARG1, 0(BASE)
- + |.else
- + | lwz CARG1, 0(BASE)
- + | lwz CARG2, 4(BASE)
- + |.endif
- | bge ->fff_fallback
- |5: // Handle numbers.
- - | lwz CARG4, 0(TMP1)
- - | cmplw cr1, TMP1, TMP2
- - | lfd FARG2, 0(TMP1)
- + | lwz CARG3, 0(SAVE0)
- + | cmplw cr1, SAVE0, SAVE1
- + |.if FPU
- + | lfd FARG2, 0(SAVE0)
- + |.else
- + | lwz CARG4, 4(SAVE0)
- + |.endif
- | bge cr1, ->fff_resn
- - | checknum CARG4; bge >7
- + | checknum CARG3; bge >7
- |6:
- + | addi SAVE0, SAVE0, 8
- + |.if FPU
- | fsub f0, FARG1, FARG2
- - | addi TMP1, TMP1, 8
- |.if ismax
- | fsel FARG1, f0, FARG1, FARG2
- |.else
- | fsel FARG1, f0, FARG2, FARG1
- |.endif
- + |.else
- + | stw CARG1, SFSAVE_1
- + | stw CARG2, SFSAVE_2
- + | stw CARG3, SFSAVE_3
- + | stw CARG4, SFSAVE_4
- + | blex __ledf2
- + | cmpwi CRET1, 0
- + |.if ismax
- + | blt >8
- + |.else
- + | bge >8
- + |.endif
- + | lwz CARG1, SFSAVE_1
- + | lwz CARG2, SFSAVE_2
- + | b <5
- + |8:
- + | lwz CARG1, SFSAVE_3
- + | lwz CARG2, SFSAVE_4
- + |.endif
- | b <5
- |7: // Convert integer to number and continue above.
- - | lwz CARG2, 4(TMP1)
- + | lwz CARG3, 4(SAVE0)
- | bne ->fff_fallback
- - | tonum_i FARG2, CARG2
- + |.if FPU
- + | tonum_i FARG2, CARG3
- + |.else
- + | bl ->vm_sfi2d_2
- + |.endif
- | b <6
- |.else
- | .ffunc_n name
- @@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name
- - | addi TMP1, BASE, 8
- - | add TMP2, BASE, NARGS8:RC
- + | addi SAVE0, BASE, 8
- + | add SAVE1, BASE, NARGS8:RC
- |1:
- - | lwz CARG4, 0(TMP1)
- - | cmplw cr1, TMP1, TMP2
- + | lwz CARG4, 0(SAVE0)
- + | cmplw cr1, SAVE0, SAVE1
- |.if DUALNUM
- - | lwz CARG2, 4(TMP1)
- + | lwz CARG2, 4(SAVE0)
- |.else
- - | lfd FARG1, 0(TMP1)
- + | lfd FARG1, 0(SAVE0)
- |.endif
- | bgey cr1, ->fff_resi
- | checknum CARG4
- |.if DUALNUM
- + |.if FPU
- | bnel ->fff_bitop_fb
- |.else
- + | beq >3
- + | stw CARG1, SFSAVE_1
- + | bl ->fff_bitop_fb
- + | mr CARG2, CARG1
- + | lwz CARG1, SFSAVE_1
- + |3:
- + |.endif
- + |.else
- | fadd FARG1, FARG1, TOBIT
- | bge ->fff_fallback
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- |.endif
- | ins CARG1, CARG1, CARG2
- - | addi TMP1, TMP1, 8
- + | addi SAVE0, SAVE0, 8
- | b <1
- |.endmacro
- |
- @@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *
- |.macro .ffunc_bit_sh, name, ins, shmod
- |.if DUALNUM
- | .ffunc_2 bit_..name
- + |.if FPU
- | checknum CARG3; bnel ->fff_tobit_fb
- + |.else
- + | checknum CARG3; beq >1
- + | bl ->fff_tobit_fb
- + | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
- + |1:
- + |.endif
- | // Note: no inline conversion from number for 2nd argument!
- | checknum CARG4; bne ->fff_fallback
- |.else
- @@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *
- |->fff_resn:
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
- + |.if FPU
- | stfd FARG1, -8(BASE)
- + |.else
- + | stw CARG1, -8(BASE)
- + | stw CARG2, -4(BASE)
- + |.endif
- | b ->fff_res1
- |
- |// Fallback FP number to bit conversion.
- |->fff_tobit_fb:
- |.if DUALNUM
- + |.if FPU
- | lfd FARG1, 0(BASE)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG1, TMPD_LO
- | blr
- + |.else
- + | bgt ->fff_fallback
- + | mr CARG2, CARG1
- + | mr CARG1, CARG3
- + |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
- + |->vm_tobit:
- + | slwi TMP2, CARG1, 1
- + | addis TMP2, TMP2, 0x0020
- + | cmpwi TMP2, 0
- + | bge >2
- + | li TMP1, 0x3e0
- + | srawi TMP2, TMP2, 21
- + | not TMP1, TMP1
- + | sub. TMP2, TMP1, TMP2
- + | cmpwi cr7, CARG1, 0
- + | blt >1
- + | slwi TMP1, CARG1, 11
- + | srwi TMP0, CARG2, 21
- + | oris TMP1, TMP1, 0x8000
- + | or TMP1, TMP1, TMP0
- + | srw CARG1, TMP1, TMP2
- + | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
- + | neg CARG1, CARG1
- + | blr
- + |1:
- + | addi TMP2, TMP2, 21
- + | srw TMP1, CARG2, TMP2
- + | slwi CARG2, CARG1, 12
- + | subfic TMP2, TMP2, 20
- + | slw TMP0, CARG2, TMP2
- + | or CARG1, TMP1, TMP0
- + | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
- + | neg CARG1, CARG1
- + | blr
- + |2:
- + | li CARG1, 0
- + | blr
- + |.endif
- |.endif
- |->fff_bitop_fb:
- |.if DUALNUM
- - | lfd FARG1, 0(TMP1)
- + |.if FPU
- + | lfd FARG1, 0(SAVE0)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- | blr
- + |.else
- + | bgt ->fff_fallback
- + | mr CARG1, CARG4
- + | b ->vm_tobit
- + |.endif
- |.endif
- |
- |//-----------------------------------------------------------------------
- @@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *
- | decode_RA8 RC, INS // Call base.
- | beq >2
- |1: // Move results down.
- + |.if FPU
- | lfd f0, 0(RA)
- + |.else
- + | lwz CARG1, 0(RA)
- + | lwz CARG2, 4(RA)
- + |.endif
- | addic. TMP1, TMP1, -8
- | addi RA, RA, 8
- + |.if FPU
- | stfdx f0, BASE, RC
- + |.else
- + | add CARG3, BASE, RC
- + | stw CARG1, 0(CARG3)
- + | stw CARG2, 4(CARG3)
- + |.endif
- | addi RC, RC, 8
- | bne <1
- |2:
- @@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *
- |//-----------------------------------------------------------------------
- |
- |.macro savex_, a, b, c, d
- + |.if FPU
- | stfd f..a, 16+a*8(sp)
- | stfd f..b, 16+b*8(sp)
- | stfd f..c, 16+c*8(sp)
- | stfd f..d, 16+d*8(sp)
- + |.endif
- |.endmacro
- |
- |->vm_exit_handler:
- @@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // Setup type comparison constants.
- | li TISNUM, LJ_TISNUM
- - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- - | stw TMP3, TMPD
- + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- + | .FPU stw TMP3, TMPD
- | li ZERO, 0
- - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- - | lfs TOBIT, TMPD
- - | stw TMP3, TMPD
- - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- + | .FPU lfs TOBIT, TMPD
- + | .FPU stw TMP3, TMPD
- + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | li TISNIL, LJ_TNIL
- - | stw TMP0, TONUM_HI
- - | lfs TONUM, TMPD
- + | .FPU stw TMP0, TONUM_HI
- + | .FPU lfs TONUM, TMPD
- | // Modified copy of ins_next which handles function header dispatch, too.
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- @@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- - |// NYI: Use internal implementations of floor, ceil, trunc.
- + |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
- + |
- + |.macro sfi2d, AHI, ALO
- + |.if not FPU
- + | mr. AHI, ALO
- + | bclr 12, 2 // Handle zero first.
- + | srawi TMP0, ALO, 31
- + | xor TMP1, ALO, TMP0
- + | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
- + | cntlzw AHI, TMP1
- + | andix. TMP0, TMP0, 0x800 // Mask sign bit.
- + | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
- + | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
- + | slwi ALO, TMP1, 21
- + | or AHI, AHI, TMP0 // Sign | Exponent.
- + | srwi TMP1, TMP1, 11
- + | slwi AHI, AHI, 20 // Align left.
- + | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
- + | blr
- + |.endif
- + |.endmacro
- + |
- + |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
- + |->vm_sfi2d_1:
- + | sfi2d CARG1, CARG2
- + |
- + |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
- + |->vm_sfi2d_2:
- + | sfi2d CARG3, CARG4
- |
- |->vm_modi:
- | divwo. TMP0, CARG1, CARG2
- @@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *
- | addi DISPATCH, r12, GG_G2DISP
- | stw r11, CTSTATE->cb.slot
- | stw r3, CTSTATE->cb.gpr[0]
- - | stfd f1, CTSTATE->cb.fpr[0]
- + | .FPU stfd f1, CTSTATE->cb.fpr[0]
- | stw r4, CTSTATE->cb.gpr[1]
- - | stfd f2, CTSTATE->cb.fpr[1]
- + | .FPU stfd f2, CTSTATE->cb.fpr[1]
- | stw r5, CTSTATE->cb.gpr[2]
- - | stfd f3, CTSTATE->cb.fpr[2]
- + | .FPU stfd f3, CTSTATE->cb.fpr[2]
- | stw r6, CTSTATE->cb.gpr[3]
- - | stfd f4, CTSTATE->cb.fpr[3]
- + | .FPU stfd f4, CTSTATE->cb.fpr[3]
- | stw r7, CTSTATE->cb.gpr[4]
- - | stfd f5, CTSTATE->cb.fpr[4]
- + | .FPU stfd f5, CTSTATE->cb.fpr[4]
- | stw r8, CTSTATE->cb.gpr[5]
- - | stfd f6, CTSTATE->cb.fpr[5]
- + | .FPU stfd f6, CTSTATE->cb.fpr[5]
- | stw r9, CTSTATE->cb.gpr[6]
- - | stfd f7, CTSTATE->cb.fpr[6]
- + | .FPU stfd f7, CTSTATE->cb.fpr[6]
- | stw r10, CTSTATE->cb.gpr[7]
- - | stfd f8, CTSTATE->cb.fpr[7]
- + | .FPU stfd f8, CTSTATE->cb.fpr[7]
- | addi TMP0, sp, CFRAME_SPACE+8
- | stw TMP0, CTSTATE->cb.stack
- | mr CARG1, CTSTATE
- @@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *
- | lp BASE, L:CRET1->base
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp RC, L:CRET1->top
- - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | li ZERO, 0
- | mr L, CRET1
- - | stw TMP3, TMPD
- - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- + | .FPU stw TMP3, TMPD
- + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- - | stw TMP0, TONUM_HI
- + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- + | .FPU stw TMP0, TONUM_HI
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
- - | lfs TOBIT, TMPD
- - | stw TMP3, TMPD
- + | .FPU lfs TOBIT, TMPD
- + | .FPU stw TMP3, TMPD
- | sub RC, RC, BASE
- | st_vmstate
- - | lfs TONUM, TMPD
- + | .FPU lfs TONUM, TMPD
- | ins_callt
- |.endif
- |
- @@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *
- | mr CARG2, RA
- | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
- | lwz CRET1, CTSTATE->cb.gpr[0]
- - | lfd FARG1, CTSTATE->cb.fpr[0]
- + | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
- | lwz CRET2, CTSTATE->cb.gpr[1]
- | b ->vm_leave_unw
- |.endif
- @@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *
- | bge <1
- |2:
- | bney cr1, >3
- - | lfd f1, CCSTATE->fpr[0]
- - | lfd f2, CCSTATE->fpr[1]
- - | lfd f3, CCSTATE->fpr[2]
- - | lfd f4, CCSTATE->fpr[3]
- - | lfd f5, CCSTATE->fpr[4]
- - | lfd f6, CCSTATE->fpr[5]
- - | lfd f7, CCSTATE->fpr[6]
- - | lfd f8, CCSTATE->fpr[7]
- + | .FPU lfd f1, CCSTATE->fpr[0]
- + | .FPU lfd f2, CCSTATE->fpr[1]
- + | .FPU lfd f3, CCSTATE->fpr[2]
- + | .FPU lfd f4, CCSTATE->fpr[3]
- + | .FPU lfd f5, CCSTATE->fpr[4]
- + | .FPU lfd f6, CCSTATE->fpr[5]
- + | .FPU lfd f7, CCSTATE->fpr[6]
- + | .FPU lfd f8, CCSTATE->fpr[7]
- |3:
- | lp TMP0, CCSTATE->func
- | lwz CARG2, CCSTATE->gpr[1]
- @@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *
- | lwz TMP2, -4(r14)
- | lwz TMP0, 4(r14)
- | stw CARG1, CCSTATE:TMP1->gpr[0]
- - | stfd FARG1, CCSTATE:TMP1->fpr[0]
- + | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
- | stw CARG2, CCSTATE:TMP1->gpr[1]
- | mtlr TMP0
- | stw CARG3, CCSTATE:TMP1->gpr[2]
- @@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCO
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
- - | lwzux TMP0, RA, BASE
- + | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
- - | lwzux TMP1, RD, BASE
- + | lwzux CARG3, RD, BASE
- | lwz TMP2, -4(PC)
- - | checknum cr0, TMP0
- - | lwz CARG3, 4(RD)
- + | checknum cr0, CARG1
- + | lwz CARG4, 4(RD)
- | decode_RD4 TMP2, TMP2
- - | checknum cr1, TMP1
- - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- + | checknum cr1, CARG3
- + | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
- | bne cr0, >7
- | bne cr1, >8
- - | cmpw CARG2, CARG3
- + | cmpw CARG2, CARG4
- if (op == BC_ISLT) {
- | bge >2
- } else if (op == BC_ISGE) {
- @@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCO
- | ble >2
- }
- |1:
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- |2:
- | ins_next
- |
- |7: // RA is not an integer.
- | bgt cr0, ->vmeta_comp
- | // RA is a number.
- - | lfd f0, 0(RA)
- + | .FPU lfd f0, 0(RA)
- | bgt cr1, ->vmeta_comp
- | blt cr1, >4
- | // RA is a number, RD is an integer.
- - | tonum_i f1, CARG3
- + |.if FPU
- + | tonum_i f1, CARG4
- + |.else
- + | bl ->vm_sfi2d_2
- + |.endif
- | b >5
- |
- |8: // RA is an integer, RD is not an integer.
- | bgt cr1, ->vmeta_comp
- | // RA is an integer, RD is a number.
- + |.if FPU
- | tonum_i f0, CARG2
- + |.else
- + | bl ->vm_sfi2d_1
- + |.endif
- |4:
- - | lfd f1, 0(RD)
- + | .FPU lfd f1, 0(RD)
- |5:
- + |.if FPU
- | fcmpu cr0, f0, f1
- + |.else
- + | blex __ledf2
- + | cmpwi CRET1, 0
- + |.endif
- if (op == BC_ISLT) {
- | bge <2
- } else if (op == BC_ISGE) {
- @@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCO
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
- - | lwzux TMP0, RA, BASE
- + | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
- - | lwzux TMP1, RD, BASE
- - | checknum cr0, TMP0
- - | lwz TMP2, -4(PC)
- - | checknum cr1, TMP1
- - | decode_RD4 TMP2, TMP2
- - | lwz CARG3, 4(RD)
- + | lwzux CARG3, RD, BASE
- + | checknum cr0, CARG1
- + | lwz SAVE0, -4(PC)
- + | checknum cr1, CARG3
- + | decode_RD4 SAVE0, SAVE0
- + | lwz CARG4, 4(RD)
- | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- | ble cr7, ->BC_ISEQN_Z
- } else {
- | ble cr7, ->BC_ISNEN_Z
- }
- |.else
- - | lwzux TMP0, RA, BASE
- - | lwz TMP2, 0(PC)
- + | lwzux CARG1, RA, BASE
- + | lwz SAVE0, 0(PC)
- | lfd f0, 0(RA)
- | addi PC, PC, 4
- - | lwzux TMP1, RD, BASE
- - | checknum cr0, TMP0
- - | decode_RD4 TMP2, TMP2
- + | lwzux CARG3, RD, BASE
- + | checknum cr0, CARG1
- + | decode_RD4 SAVE0, SAVE0
- | lfd f1, 0(RD)
- - | checknum cr1, TMP1
- - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- + | checknum cr1, CARG3
- + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge cr0, >5
- | bge cr1, >5
- | fcmpu cr0, f0, f1
- if (vk) {
- | bne >1
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- } else {
- | beq >1
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- }
- |1:
- | ins_next
- @@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCO
- |5: // Either or both types are not numbers.
- |.if not DUALNUM
- | lwz CARG2, 4(RA)
- - | lwz CARG3, 4(RD)
- + | lwz CARG4, 4(RD)
- |.endif
- |.if FFI
- - | cmpwi cr7, TMP0, LJ_TCDATA
- - | cmpwi cr5, TMP1, LJ_TCDATA
- + | cmpwi cr7, CARG1, LJ_TCDATA
- + | cmpwi cr5, CARG3, LJ_TCDATA
- |.endif
- - | not TMP3, TMP0
- - | cmplw TMP0, TMP1
- - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
- + | not TMP2, CARG1
- + | cmplw CARG1, CARG3
- + | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
- |.if FFI
- | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
- |.endif
- - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
- + | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
- |.if FFI
- | beq cr7, ->vmeta_equal_cd
- |.endif
- - | cmplw cr5, CARG2, CARG3
- + | cmplw cr5, CARG2, CARG4
- | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
- | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
- - | mr SAVE0, PC
- + | mr SAVE1, PC
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
- if (vk) {
- | bne cr0, >6
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- |6:
- } else {
- | beq cr0, >6
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- |6:
- }
- |.if DUALNUM
- @@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCO
- |
- | // Different tables or userdatas. Need to check __eq metamethod.
- | // Field metatable must be at same offset for GCtab and GCudata!
- + | mr CARG3, CARG4
- | lwz TAB:TMP2, TAB:CARG2->metatable
- | li CARG4, 1-vk // ne = 0 or 1.
- | cmplwi TAB:TMP2, 0
- @@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCO
- | lbz TMP2, TAB:TMP2->nomm
- | andix. TMP2, TMP2, 1<<MM_eq
- | bne <1 // Or 'no __eq' flag set?
- - | mr PC, SAVE0 // Restore old PC.
- + | mr PC, SAVE1 // Restore old PC.
- | b ->vmeta_equal // Handle __eq metamethod.
- break;
-
- @@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCO
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- |.if DUALNUM
- - | lwzux TMP0, RA, BASE
- + | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
- - | lwzux TMP1, RD, KBASE
- - | checknum cr0, TMP0
- - | lwz TMP2, -4(PC)
- - | checknum cr1, TMP1
- - | decode_RD4 TMP2, TMP2
- - | lwz CARG3, 4(RD)
- - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- + | lwzux CARG3, RD, KBASE
- + | checknum cr0, CARG1
- + | lwz SAVE0, -4(PC)
- + | checknum cr1, CARG3
- + | decode_RD4 SAVE0, SAVE0
- + | lwz CARG4, 4(RD)
- + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- |->BC_ISEQN_Z:
- } else {
- @@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCO
- }
- | bne cr0, >7
- | bne cr1, >8
- - | cmpw CARG2, CARG3
- + | cmpw CARG2, CARG4
- |4:
- |.else
- if (vk) {
- @@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCO
- } else {
- |->BC_ISNEN_Z: // Dummy label.
- }
- - | lwzx TMP0, BASE, RA
- + | lwzx CARG1, BASE, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
- - | lwz TMP2, -4(PC)
- + | lwz SAVE0, -4(PC)
- | lfdx f1, KBASE, RD
- - | decode_RD4 TMP2, TMP2
- - | checknum TMP0
- - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- + | decode_RD4 SAVE0, SAVE0
- + | checknum CARG1
- + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge >3
- | fcmpu cr0, f0, f1
- |.endif
- if (vk) {
- | bne >1
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- |1:
- |.if not FFI
- |3:
- @@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCO
- |.if not FFI
- |3:
- |.endif
- - | add PC, PC, TMP2
- + | add PC, PC, SAVE0
- |2:
- }
- | ins_next
- |.if FFI
- |3:
- - | cmpwi TMP0, LJ_TCDATA
- + | cmpwi CARG1, LJ_TCDATA
- | beq ->vmeta_equal_cd
- | b <1
- |.endif
- @@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCO
- |7: // RA is not an integer.
- | bge cr0, <3
- | // RA is a number.
- - | lfd f0, 0(RA)
- + | .FPU lfd f0, 0(RA)
- | blt cr1, >1
- | // RA is a number, RD is an integer.
- - | tonum_i f1, CARG3
- + |.if FPU
- + | tonum_i f1, CARG4
- + |.else
- + | bl ->vm_sfi2d_2
- + |.endif
- | b >2
- |
- |8: // RA is an integer, RD is a number.
- + |.if FPU
- | tonum_i f0, CARG2
- + |.else
- + | bl ->vm_sfi2d_1
- + |.endif
- |1:
- - | lfd f1, 0(RD)
- + | .FPU lfd f1, 0(RD)
- |2:
- + |.if FPU
- | fcmpu cr0, f0, f1
- + |.else
- + | blex __ledf2
- + | cmpwi CRET1, 0
- + |.endif
- | b <4
- |.endif
- break;
- @@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCO
- | add PC, PC, TMP2
- } else {
- | li TMP1, LJ_TFALSE
- + |.if FPU
- | lfdx f0, BASE, RD
- + |.else
- + | lwzux CARG1, RD, BASE
- + | lwz CARG2, 4(RD)
- + |.endif
- | cmplw TMP0, TMP1
- if (op == BC_ISTC) {
- | bge >1
- @@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCO
- }
- | addis PC, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- + |.if FPU
- | stfdx f0, BASE, RA
- + |.else
- + | stwux CARG1, RA, BASE
- + | stw CARG2, 4(RA)
- + |.endif
- | add PC, PC, TMP2
- |1:
- }
- @@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCO
- case BC_MOV:
- | // RA = dst*8, RD = src*8
- | ins_next1
- + |.if FPU
- | lfdx f0, BASE, RD
- | stfdx f0, BASE, RA
- + |.else
- + | lwzux TMP0, RD, BASE
- + | lwz TMP1, 4(RD)
- + | stwux TMP0, RA, BASE
- + | stw TMP1, 4(RA)
- + |.endif
- | ins_next2
- break;
- case BC_NOT:
- @@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCO
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
- - | lwzx TMP1, BASE, RB
- + | lwzx CARG1, BASE, RB
- | .if DUALNUM
- - | lwzx TMP2, KBASE, RC
- + | lwzx CARG3, KBASE, RC
- | .endif
- + | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, KBASE, RC
- + | .else
- + | add TMP1, BASE, RB
- + | add TMP2, KBASE, RC
- + | lwz CARG2, 4(TMP1)
- + | lwz CARG4, 4(TMP2)
- + | .endif
- | .if DUALNUM
- - | checknum cr0, TMP1
- - | checknum cr1, TMP2
- + | checknum cr0, CARG1
- + | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vn
- | .else
- - | checknum TMP1; bge ->vmeta_arith_vn
- + | checknum CARG1; bge ->vmeta_arith_vn
- | .endif
- || break;
- ||case 1:
- - | lwzx TMP1, BASE, RB
- + | lwzx CARG1, BASE, RB
- | .if DUALNUM
- - | lwzx TMP2, KBASE, RC
- + | lwzx CARG3, KBASE, RC
- | .endif
- + | .if FPU
- | lfdx f15, BASE, RB
- | lfdx f14, KBASE, RC
- + | .else
- + | add TMP1, BASE, RB
- + | add TMP2, KBASE, RC
- + | lwz CARG2, 4(TMP1)
- + | lwz CARG4, 4(TMP2)
- + | .endif
- | .if DUALNUM
- - | checknum cr0, TMP1
- - | checknum cr1, TMP2
- + | checknum cr0, CARG1
- + | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_nv
- | .else
- - | checknum TMP1; bge ->vmeta_arith_nv
- + | checknum CARG1; bge ->vmeta_arith_nv
- | .endif
- || break;
- ||default:
- - | lwzx TMP1, BASE, RB
- - | lwzx TMP2, BASE, RC
- + | lwzx CARG1, BASE, RB
- + | lwzx CARG3, BASE, RC
- + | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, BASE, RC
- - | checknum cr0, TMP1
- - | checknum cr1, TMP2
- + | .else
- + | add TMP1, BASE, RB
- + | add TMP2, BASE, RC
- + | lwz CARG2, 4(TMP1)
- + | lwz CARG4, 4(TMP2)
- + | .endif
- + | checknum cr0, CARG1
- + | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- || break;
- @@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCO
- | fsub a, b, a // b - floor(b/c)*c
- |.endmacro
- |
- + |.macro sfpmod
- + |->BC_MODVN_Z:
- + | stw CARG1, SFSAVE_1
- + | stw CARG2, SFSAVE_2
- + | mr SAVE0, CARG3
- + | mr SAVE1, CARG4
- + | blex __divdf3
- + | blex floor
- + | mr CARG3, SAVE0
- + | mr CARG4, SAVE1
- + | blex __muldf3
- + | mr CARG3, CRET1
- + | mr CARG4, CRET2
- + | lwz CARG1, SFSAVE_1
- + | lwz CARG2, SFSAVE_2
- + | blex __subdf3
- + |.endmacro
- + |
- |.macro ins_arithfp, fpins
- | ins_arithpre
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- - |.else
- + |.elif FPU
- | fpins f0, f14, f15
- | ins_next1
- | stfdx f0, BASE, RA
- | ins_next2
- + |.else
- + | blex __divdf3 // Only soft-float div uses this macro.
- + | ins_next1
- + | stwux CRET1, RA, BASE
- + | stw CRET2, 4(RA)
- + | ins_next2
- |.endif
- |.endmacro
- |
- - |.macro ins_arithdn, intins, fpins
- + |.macro ins_arithdn, intins, fpins, fpcall
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
- - | lwzux TMP1, RB, BASE
- - | lwzux TMP2, RC, KBASE
- - | lwz CARG1, 4(RB)
- - | checknum cr0, TMP1
- - | lwz CARG2, 4(RC)
- + | lwzux CARG1, RB, BASE
- + | lwzux CARG3, RC, KBASE
- + | lwz CARG2, 4(RB)
- + | checknum cr0, CARG1
- + | lwz CARG4, 4(RC)
- + | checknum cr1, CARG3
- || break;
- ||case 1:
- - | lwzux TMP1, RB, BASE
- - | lwzux TMP2, RC, KBASE
- - | lwz CARG2, 4(RB)
- - | checknum cr0, TMP1
- - | lwz CARG1, 4(RC)
- + | lwzux CARG3, RB, BASE
- + | lwzux CARG1, RC, KBASE
- + | lwz CARG4, 4(RB)
- + | checknum cr0, CARG3
- + | lwz CARG2, 4(RC)
- + | checknum cr1, CARG1
- || break;
- ||default:
- - | lwzux TMP1, RB, BASE
- - | lwzux TMP2, RC, BASE
- - | lwz CARG1, 4(RB)
- - | checknum cr0, TMP1
- - | lwz CARG2, 4(RC)
- + | lwzux CARG1, RB, BASE
- + | lwzux CARG3, RC, BASE
- + | lwz CARG2, 4(RB)
- + | checknum cr0, CARG1
- + | lwz CARG4, 4(RC)
- + | checknum cr1, CARG3
- || break;
- ||}
- - | checknum cr1, TMP2
- | bne >5
- | bne cr1, >5
- - | intins CARG1, CARG1, CARG2
- + |.if "intins" == "intmod"
- + | mr CARG1, CARG2
- + | mr CARG2, CARG4
- + |.endif
- + | intins CARG1, CARG2, CARG4
- | bso >4
- |1:
- | ins_next1
- @@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCO
- | checkov TMP0, <1 // Ignore unrelated overflow.
- | ins_arithfallback b
- |5: // FP variant.
- + |.if FPU
- ||if (vk == 1) {
- | lfd f15, 0(RB)
- - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f14, 0(RC)
- ||} else {
- | lfd f14, 0(RB)
- - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f15, 0(RC)
- ||}
- + |.endif
- + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | ins_arithfallback bge
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
- + |.if FPU
- | fpins f0, f14, f15
- - | ins_next1
- | stfdx f0, BASE, RA
- + |.else
- + |.if "fpcall" == "sfpmod"
- + | sfpmod
- + |.else
- + | blex fpcall
- + |.endif
- + | stwux CRET1, RA, BASE
- + | stw CRET2, 4(RA)
- + |.endif
- + | ins_next1
- | b <2
- |.endif
- |.endmacro
- |
- - |.macro ins_arith, intins, fpins
- + |.macro ins_arith, intins, fpins, fpcall
- |.if DUALNUM
- - | ins_arithdn intins, fpins
- + | ins_arithdn intins, fpins, fpcall
- |.else
- | ins_arithfp fpins
- |.endif
- @@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCO
- | addo. TMP0, TMP0, TMP3
- | add y, a, b
- |.endmacro
- - | ins_arith addo32., fadd
- + | ins_arith addo32., fadd, __adddf3
- |.else
- - | ins_arith addo., fadd
- + | ins_arith addo., fadd, __adddf3
- |.endif
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- @@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCO
- | subo. TMP0, TMP0, TMP3
- | sub y, a, b
- |.endmacro
- - | ins_arith subo32., fsub
- + | ins_arith subo32., fsub, __subdf3
- |.else
- - | ins_arith subo., fsub
- + | ins_arith subo., fsub, __subdf3
- |.endif
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
- - | ins_arith mullwo., fmul
- + | ins_arith mullwo., fmul, __muldf3
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arithfp fdiv
- break;
- case BC_MODVN:
- - | ins_arith intmod, fpmod
- + | ins_arith intmod, fpmod, sfpmod
- break;
- case BC_MODNV: case BC_MODVV:
- - | ins_arith intmod, fpmod_
- + | ins_arith intmod, fpmod_, sfpmod
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
- - | lwzx TMP1, BASE, RB
- + | lwzx CARG1, BASE, RB
- + | lwzx CARG3, BASE, RC
- + |.if FPU
- | lfdx FARG1, BASE, RB
- - | lwzx TMP2, BASE, RC
- | lfdx FARG2, BASE, RC
- - | checknum cr0, TMP1
- - | checknum cr1, TMP2
- + |.else
- + | add TMP1, BASE, RB
- + | add TMP2, BASE, RC
- + | lwz CARG2, 4(TMP1)
- + | lwz CARG4, 4(TMP2)
- + |.endif
- + | checknum cr0, CARG1
- + | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- | blex pow
- | ins_next1
- + |.if FPU
- | stfdx FARG1, BASE, RA
- + |.else
- + | stwux CARG1, RA, BASE
- + | stw CARG2, 4(RA)
- + |.endif
- | ins_next2
- break;
-
- @@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCO
- | lp BASE, L->base
- | bne ->vmeta_binop
- | ins_next1
- + |.if FPU
- | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
- | stfdx f0, BASE, RA
- + |.else
- + | lwzux TMP0, SAVE0, BASE
- + | lwz TMP1, 4(SAVE0)
- + | stwux TMP0, RA, BASE
- + | stw TMP1, 4(RA)
- + |.endif
- | ins_next2
- break;
-
- @@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCO
- case BC_KNUM:
- | // RA = dst*8, RD = num_const*8
- | ins_next1
- + |.if FPU
- | lfdx f0, KBASE, RD
- | stfdx f0, BASE, RA
- + |.else
- + | lwzux TMP0, RD, KBASE
- + | lwz TMP1, 4(RD)
- + | stwux TMP0, RA, BASE
- + | stw TMP1, 4(RA)
- + |.endif
- | ins_next2
- break;
- case BC_KPRI:
- @@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwzx UPVAL:RB, LFUNC:RB, RD
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
- + |.if FPU
- | lfd f0, 0(TMP1)
- | stfdx f0, BASE, RA
- + |.else
- + | lwz TMP2, 0(TMP1)
- + | lwz TMP3, 4(TMP1)
- + | stwux TMP2, RA, BASE
- + | stw TMP3, 4(RA)
- + |.endif
- | ins_next2
- break;
- case BC_USETV:
- @@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- + |.if FPU
- | lfdux f0, RD, BASE
- + |.else
- + | lwzux CARG1, RD, BASE
- + | lwz CARG3, 4(RD)
- + |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
- | lwz TMP2, 0(RD)
- + |.if FPU
- | stfd f0, 0(CARG2)
- + |.else
- + | stw CARG1, 0(CARG2)
- + | stw CARG3, 4(CARG2)
- + |.endif
- | cmplwi cr1, TMP0, 0
- | lwz TMP1, 4(RD)
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- @@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- + |.if FPU
- | lfdx f0, KBASE, RD
- + |.else
- + | lwzux TMP2, RD, KBASE
- + | lwz TMP3, 4(RD)
- + |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
- + |.if FPU
- | stfd f0, 0(TMP1)
- + |.else
- + | stw TMP2, 0(TMP1)
- + | stw TMP3, 4(TMP1)
- + |.endif
- | ins_next2
- break;
- case BC_USETP:
- @@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCO
- |.endif
- | ble ->vmeta_tgetv // Integer key and in array part?
- | lwzx TMP0, TMP1, TMP2
- + |.if FPU
- | lfdx f14, TMP1, TMP2
- + |.else
- + | lwzux SAVE0, TMP1, TMP2
- + | lwz SAVE1, 4(TMP1)
- + |.endif
- | checknil TMP0; beq >2
- |1:
- | ins_next1
- + |.if FPU
- | stfdx f14, BASE, RA
- + |.else
- + | stwux SAVE0, RA, BASE
- + | stw SAVE1, 4(RA)
- + |.endif
- | ins_next2
- |
- |2: // Check for __index if table value is nil.
- @@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1; bge ->vmeta_tgetb
- + |.if FPU
- | lwzx TMP1, TMP2, RC
- | lfdx f0, TMP2, RC
- + |.else
- + | lwzux TMP1, TMP2, RC
- + | lwz TMP3, 4(TMP2)
- + |.endif
- | checknil TMP1; beq >5
- |1:
- | ins_next1
- + |.if FPU
- | stfdx f0, BASE, RA
- + |.else
- + | stwux TMP1, RA, BASE
- + | stw TMP3, 4(RA)
- + |.endif
- | ins_next2
- |
- |5: // Check for __index if table value is nil.
- @@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCO
- | cmplw TMP0, CARG2
- | slwi TMP2, CARG2, 3
- | ble ->vmeta_tgetr // In array part?
- + |.if FPU
- | lfdx f14, TMP1, TMP2
- + |.else
- + | lwzux SAVE0, TMP2, TMP1
- + | lwz SAVE1, 4(TMP2)
- + |.endif
- |->BC_TGETR_Z:
- | ins_next1
- + |.if FPU
- | stfdx f14, BASE, RA
- + |.else
- + | stwux SAVE0, RA, BASE
- + | stw SAVE1, 4(RA)
- + |.endif
- | ins_next2
- break;
-
- @@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCO
- | ble ->vmeta_tsetv // Integer key and in array part?
- | lwzx TMP2, TMP1, TMP0
- | lbz TMP3, TAB:RB->marked
- + |.if FPU
- | lfdx f14, BASE, RA
- + |.else
- + | add SAVE1, BASE, RA
- + | lwz SAVE0, 0(SAVE1)
- + | lwz SAVE1, 4(SAVE1)
- + |.endif
- | checknil TMP2; beq >3
- |1:
- | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
- + |.if FPU
- | stfdx f14, TMP1, TMP0
- + |.else
- + | stwux SAVE0, TMP1, TMP0
- + | stw SAVE1, 4(TMP1)
- + |.endif
- | bne >7
- |2:
- | ins_next
- @@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwz NODE:TMP2, TAB:RB->node
- | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- + |.if FPU
- | lfdx f14, BASE, RA
- + |.else
- + | add CARG2, BASE, RA
- + | lwz SAVE0, 0(CARG2)
- + | lwz SAVE1, 4(CARG2)
- + |.endif
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- @@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCO
- | checknil CARG2; beq >4 // Key found, but nil value?
- |2:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- + |.if FPU
- | stfd f14, NODE:TMP2->val
- + |.else
- + | stw SAVE0, NODE:TMP2->val.u32.hi
- + | stw SAVE1, NODE:TMP2->val.u32.lo
- + |.endif
- | bne >7
- |3:
- | ins_next
- @@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCO
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lp BASE, L->base
- + |.if FPU
- | stfd f14, 0(CRET1)
- + |.else
- + | stw SAVE0, 0(CRET1)
- + | stw SAVE1, 4(CRET1)
- + |.endif
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- @@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCO
- | lwz TMP2, TAB:RB->array
- | lbz TMP3, TAB:RB->marked
- | cmplw TMP0, TMP1
- + |.if FPU
- | lfdx f14, BASE, RA
- + |.else
- + | add CARG2, BASE, RA
- + | lwz SAVE0, 0(CARG2)
- + | lwz SAVE1, 4(CARG2)
- + |.endif
- | bge ->vmeta_tsetb
- | lwzx TMP1, TMP2, RC
- | checknil TMP1; beq >5
- |1:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- + |.if FPU
- | stfdx f14, TMP2, RC
- + |.else
- + | stwux SAVE0, RC, TMP2
- + | stw SAVE1, 4(RC)
- + |.endif
- | bne >7
- |2:
- | ins_next
- @@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCO
- |2:
- | cmplw TMP0, CARG3
- | slwi TMP2, CARG3, 3
- + |.if FPU
- | lfdx f14, BASE, RA
- + |.else
- + | lwzux SAVE0, RA, BASE
- + | lwz SAVE1, 4(RA)
- + |.endif
- | ble ->vmeta_tsetr // In array part?
- | ins_next1
- + |.if FPU
- | stfdx f14, TMP1, TMP2
- + |.else
- + | stwux SAVE0, TMP1, TMP2
- + | stw SAVE1, 4(TMP1)
- + |.endif
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- @@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCO
- | add TMP1, TMP1, TMP0
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- |3: // Copy result slots to table.
- + |.if FPU
- | lfd f0, 0(RA)
- + |.else
- + | lwz SAVE0, 0(RA)
- + | lwz SAVE1, 4(RA)
- + |.endif
- | addi RA, RA, 8
- | cmpw cr1, RA, TMP2
- + |.if FPU
- | stfd f0, 0(TMP1)
- + |.else
- + | stw SAVE0, 0(TMP1)
- + | stw SAVE1, 4(TMP1)
- + |.endif
- | addi TMP1, TMP1, 8
- | blt cr1, <3
- | bne >7
- @@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCO
- | beq cr1, >3
- |2:
- | addi TMP3, TMP2, 8
- + |.if FPU
- | lfdx f0, RA, TMP2
- + |.else
- + | add CARG3, RA, TMP2
- + | lwz CARG1, 0(CARG3)
- + | lwz CARG2, 4(CARG3)
- + |.endif
- | cmplw cr1, TMP3, NARGS8:RC
- + |.if FPU
- | stfdx f0, BASE, TMP2
- + |.else
- + | stwux CARG1, TMP2, BASE
- + | stw CARG2, 4(TMP2)
- + |.endif
- | mr TMP2, TMP3
- | bne cr1, <2
- |3:
- @@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCO
- | add BASE, BASE, RA
- | lwz TMP1, -24(BASE)
- | lwz LFUNC:RB, -20(BASE)
- + |.if FPU
- | lfd f1, -8(BASE)
- | lfd f0, -16(BASE)
- + |.else
- + | lwz CARG1, -8(BASE)
- + | lwz CARG2, -4(BASE)
- + | lwz CARG3, -16(BASE)
- + | lwz CARG4, -12(BASE)
- + |.endif
- | stw TMP1, 0(BASE) // Copy callable.
- | stw LFUNC:RB, 4(BASE)
- | checkfunc TMP1
- - | stfd f1, 16(BASE) // Copy control var.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
- + |.if FPU
- + | stfd f1, 16(BASE) // Copy control var.
- | stfdu f0, 8(BASE) // Copy state.
- + |.else
- + | stw CARG1, 16(BASE) // Copy control var.
- + | stw CARG2, 20(BASE)
- + | stwu CARG3, 8(BASE) // Copy state.
- + | stw CARG4, 4(BASE)
- + |.endif
- | bne ->vmeta_call
- | ins_call
- break;
- @@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCO
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
- | lwzx TMP2, TMP1, TMP3
- + |.if FPU
- | lfdx f0, TMP1, TMP3
- + |.else
- + | lwzux CARG1, TMP3, TMP1
- + | lwz CARG2, 4(TMP3)
- + |.endif
- | checknil TMP2
- | lwz INS, -4(PC)
- | beq >4
- @@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCO
- |.endif
- | addi RC, RC, 1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- + |.if FPU
- | stfd f0, 8(RA)
- + |.else
- + | stw CARG1, 8(RA)
- + | stw CARG2, 12(RA)
- + |.endif
- | decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
- | add PC, TMP1, TMP3
- @@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCO
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
- | lwzx RB, TMP2, TMP3
- + |.if FPU
- | lfdx f0, TMP2, TMP3
- + |.else
- + | add CARG3, TMP2, TMP3
- + | lwz CARG1, 0(CARG3)
- + | lwz CARG2, 4(CARG3)
- + |.endif
- | add NODE:TMP3, TMP2, TMP3
- | checknil RB
- | lwz INS, -4(PC)
- | beq >7
- + |.if FPU
- | lfd f1, NODE:TMP3->key
- + |.else
- + | lwz CARG3, NODE:TMP3->key.u32.hi
- + | lwz CARG4, NODE:TMP3->key.u32.lo
- + |.endif
- | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
- + |.if FPU
- | stfd f0, 8(RA)
- + |.else
- + | stw CARG1, 8(RA)
- + | stw CARG2, 12(RA)
- + |.endif
- | add RC, RC, TMP0
- | decode_RD4 TMP1, INS
- + |.if FPU
- | stfd f1, 0(RA)
- + |.else
- + | stw CARG3, 0(RA)
- + | stw CARG4, 4(RA)
- + |.endif
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
- @@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCO
- | subi TMP2, TMP2, 16
- | ble >2 // No vararg slots?
- |1: // Copy vararg slots to destination slots.
- + |.if FPU
- | lfd f0, 0(RC)
- + |.else
- + | lwz CARG1, 0(RC)
- + | lwz CARG2, 4(RC)
- + |.endif
- | addi RC, RC, 8
- + |.if FPU
- | stfd f0, 0(RA)
- + |.else
- + | stw CARG1, 0(RA)
- + | stw CARG2, 4(RA)
- + |.endif
- | cmplw RA, TMP2
- | cmplw cr1, RC, TMP3
- | bge >3 // All destination slots filled?
- @@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCO
- | addi MULTRES, TMP1, 8
- | bgt >7
- |6:
- + |.if FPU
- | lfd f0, 0(RC)
- + |.else
- + | lwz CARG1, 0(RC)
- + | lwz CARG2, 4(RC)
- + |.endif
- | addi RC, RC, 8
- + |.if FPU
- | stfd f0, 0(RA)
- + |.else
- + | stw CARG1, 0(RA)
- + | stw CARG2, 4(RA)
- + |.endif
- | cmplw RC, TMP3
- | addi RA, RA, 8
- | blt <6 // More vararg slots?
- @@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCO
- | li TMP1, 0
- |2:
- | addi TMP3, TMP1, 8
- + |.if FPU
- | lfdx f0, RA, TMP1
- + |.else
- + | add CARG3, RA, TMP1
- + | lwz CARG1, 0(CARG3)
- + | lwz CARG2, 4(CARG3)
- + |.endif
- | cmpw TMP3, RC
- + |.if FPU
- | stfdx f0, TMP2, TMP1
- + |.else
- + | add CARG3, TMP2, TMP1
- + | stw CARG1, 0(CARG3)
- + | stw CARG2, 4(CARG3)
- + |.endif
- | beq >3
- | addi TMP1, TMP3, 8
- + |.if FPU
- | lfdx f1, RA, TMP3
- + |.else
- + | add CARG3, RA, TMP3
- + | lwz CARG1, 0(CARG3)
- + | lwz CARG2, 4(CARG3)
- + |.endif
- | cmpw TMP1, RC
- + |.if FPU
- | stfdx f1, TMP2, TMP3
- + |.else
- + | add CARG3, TMP2, TMP3
- + | stw CARG1, 0(CARG3)
- + | stw CARG2, 4(CARG3)
- + |.endif
- | bne <2
- |3:
- |5:
- @@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCO
- | subi TMP2, BASE, 8
- | decode_RB8 RB, INS
- if (op == BC_RET1) {
- + |.if FPU
- | lfd f0, 0(RA)
- | stfd f0, 0(TMP2)
- + |.else
- + | lwz CARG1, 0(RA)
- + | lwz CARG2, 4(RA)
- + | stw CARG1, 0(TMP2)
- + | stw CARG2, 4(TMP2)
- + |.endif
- }
- |5:
- | cmplw RB, RD
- @@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCO
- |4:
- | stw CARG1, FORL_IDX*8+4(RA)
- } else {
- - | lwz TMP3, FORL_STEP*8(RA)
- + | lwz SAVE0, FORL_STEP*8(RA)
- | lwz CARG3, FORL_STEP*8+4(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | lwz CARG2, FORL_STOP*8+4(RA)
- - | cmplw cr7, TMP3, TISNUM
- + | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- @@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCO
- if (vk) {
- |.if DUALNUM
- |9: // FP loop.
- + |.if FPU
- | lfd f1, FORL_IDX*8(RA)
- |.else
- + | lwz CARG1, FORL_IDX*8(RA)
- + | lwz CARG2, FORL_IDX*8+4(RA)
- + |.endif
- + |.else
- | lfdux f1, RA, BASE
- |.endif
- + |.if FPU
- | lfd f3, FORL_STEP*8(RA)
- | lfd f2, FORL_STOP*8(RA)
- - | lwz TMP3, FORL_STEP*8(RA)
- | fadd f1, f1, f3
- | stfd f1, FORL_IDX*8(RA)
- + |.else
- + | lwz CARG3, FORL_STEP*8(RA)
- + | lwz CARG4, FORL_STEP*8+4(RA)
- + | mr SAVE1, RD
- + | blex __adddf3
- + | mr RD, SAVE1
- + | stw CRET1, FORL_IDX*8(RA)
- + | stw CRET2, FORL_IDX*8+4(RA)
- + | lwz CARG3, FORL_STOP*8(RA)
- + | lwz CARG4, FORL_STOP*8+4(RA)
- + |.endif
- + | lwz SAVE0, FORL_STEP*8(RA)
- } else {
- |.if DUALNUM
- |9: // FP loop.
- |.else
- | lwzux TMP1, RA, BASE
- - | lwz TMP3, FORL_STEP*8(RA)
- + | lwz SAVE0, FORL_STEP*8(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | cmplw cr0, TMP1, TISNUM
- - | cmplw cr7, TMP3, TISNUM
- + | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- |.endif
- + |.if FPU
- | lfd f1, FORL_IDX*8(RA)
- + |.else
- + | lwz CARG1, FORL_IDX*8(RA)
- + | lwz CARG2, FORL_IDX*8+4(RA)
- + |.endif
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- + |.if FPU
- | lfd f2, FORL_STOP*8(RA)
- + |.else
- + | lwz CARG3, FORL_STOP*8(RA)
- + | lwz CARG4, FORL_STOP*8+4(RA)
- + |.endif
- | bge ->vmeta_for
- }
- - | cmpwi cr6, TMP3, 0
- + | cmpwi cr6, SAVE0, 0
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- }
- + |.if FPU
- | stfd f1, FORL_EXT*8(RA)
- + |.else
- + | stw CARG1, FORL_EXT*8(RA)
- + | stw CARG2, FORL_EXT*8+4(RA)
- + |.endif
- if (op != BC_JFORL) {
- | add RD, PC, RD
- }
- + |.if FPU
- | fcmpu cr0, f1, f2
- + |.else
- + | mr SAVE1, RD
- + | blex __ledf2
- + | cmpwi CRET1, 0
- + | mr RD, SAVE1
- + |.endif
- if (op == BC_JFORI) {
- | addis PC, RD, -(BCBIAS_J*4 >> 16)
- }
|