From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
|
|
From: Mike Pall <mike>
|
|
Date: Wed, 26 Jul 2017 09:52:19 +0200
|
|
Subject: [PATCH] PPC: Add soft-float support to interpreter.
|
|
|
|
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
|
|
Sponsored by Cisco Systems, Inc.
|
|
---
|
|
src/host/buildvm_asm.c | 2 +-
|
|
src/lj_arch.h | 29 +-
|
|
src/lj_ccall.c | 38 +-
|
|
src/lj_ccall.h | 4 +-
|
|
src/lj_ccallback.c | 30 +-
|
|
src/lj_frame.h | 2 +-
|
|
src/lj_ircall.h | 2 +-
|
|
src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
|
|
8 files changed, 1101 insertions(+), 255 deletions(-)
|
|
|
|
--- a/src/host/buildvm_asm.c
|
|
+++ b/src/host/buildvm_asm.c
|
|
@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
|
|
#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
|
|
fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
|
|
#endif
|
|
-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
|
|
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
|
|
/* Hard-float ABI. */
|
|
fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
|
|
#endif
|
|
--- a/src/lj_arch.h
|
|
+++ b/src/lj_arch.h
|
|
@@ -254,6 +254,29 @@
|
|
#else
|
|
#define LJ_ARCH_BITS 32
|
|
#define LJ_ARCH_NAME "ppc"
|
|
+
|
|
+#if !defined(LJ_ARCH_HASFPU)
|
|
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
|
|
+#define LJ_ARCH_HASFPU 0
|
|
+#else
|
|
+#define LJ_ARCH_HASFPU 1
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#if !defined(LJ_ABI_SOFTFP)
|
|
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
|
|
+#define LJ_ABI_SOFTFP 1
|
|
+#else
|
|
+#define LJ_ABI_SOFTFP 0
|
|
+#endif
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#if LJ_ABI_SOFTFP
|
|
+#define LJ_ARCH_NOJIT 1 /* NYI */
|
|
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
|
|
+#else
|
|
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
|
|
#endif
|
|
|
|
#define LJ_TARGET_PPC 1
|
|
@@ -262,7 +285,6 @@
|
|
#define LJ_TARGET_MASKSHIFT 0
|
|
#define LJ_TARGET_MASKROT 1
|
|
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
|
|
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
|
|
|
|
#if LJ_TARGET_CONSOLE
|
|
#define LJ_ARCH_PPC32ON64 1
|
|
@@ -415,16 +437,13 @@
|
|
#error "No support for ILP32 model on ARM64"
|
|
#endif
|
|
#elif LJ_TARGET_PPC
|
|
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
|
|
-#error "No support for PowerPC CPUs without double-precision FPU"
|
|
-#endif
|
|
#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
|
|
#error "No support for little-endian PPC32"
|
|
#endif
|
|
#if LJ_ARCH_PPC64
|
|
#error "No support for PowerPC 64 bit mode (yet)"
|
|
#endif
|
|
-#ifdef __NO_FPRS__
|
|
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
|
|
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
|
|
#endif
|
|
#elif LJ_TARGET_MIPS32
|
|
--- a/src/lj_ccall.c
|
|
+++ b/src/lj_ccall.c
|
|
@@ -387,6 +387,24 @@
|
|
#define CCALL_HANDLE_COMPLEXARG \
|
|
/* Pass complex by value in 2 or 4 GPRs. */
|
|
|
|
+#define CCALL_HANDLE_GPR \
|
|
+ /* Try to pass argument in GPRs. */ \
|
|
+ if (n > 1) { \
|
|
+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
|
|
+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
|
|
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
|
|
+ else if (ngpr + n > maxgpr) \
|
|
+ ngpr = maxgpr; /* Prevent reordering. */ \
|
|
+ } \
|
|
+ if (ngpr + n <= maxgpr) { \
|
|
+ dp = &cc->gpr[ngpr]; \
|
|
+ ngpr += n; \
|
|
+ goto done; \
|
|
+ } \
|
|
+
|
|
+#if LJ_ABI_SOFTFP
|
|
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
|
|
+#else
|
|
#define CCALL_HANDLE_REGARG \
|
|
if (isfp) { /* Try to pass argument in FPRs. */ \
|
|
if (nfpr + 1 <= CCALL_NARG_FPR) { \
|
|
@@ -395,24 +413,16 @@
|
|
d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
|
|
goto done; \
|
|
} \
|
|
- } else { /* Try to pass argument in GPRs. */ \
|
|
- if (n > 1) { \
|
|
- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
|
|
- if (ctype_isinteger(d->info)) \
|
|
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
|
|
- else if (ngpr + n > maxgpr) \
|
|
- ngpr = maxgpr; /* Prevent reordering. */ \
|
|
- } \
|
|
- if (ngpr + n <= maxgpr) { \
|
|
- dp = &cc->gpr[ngpr]; \
|
|
- ngpr += n; \
|
|
- goto done; \
|
|
- } \
|
|
+ } else { \
|
|
+ CCALL_HANDLE_GPR \
|
|
}
|
|
+#endif
|
|
|
|
+#if !LJ_ABI_SOFTFP
|
|
#define CCALL_HANDLE_RET \
|
|
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
|
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
|
|
+#endif
|
|
|
|
#elif LJ_TARGET_MIPS32
|
|
/* -- MIPS o32 calling conventions ---------------------------------------- */
|
|
@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L,
|
|
}
|
|
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
|
|
|
|
-#if LJ_TARGET_X64 || LJ_TARGET_PPC
|
|
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
|
|
cc->nfpr = nfpr; /* Required for vararg functions. */
|
|
#endif
|
|
cc->nsp = nsp;
|
|
--- a/src/lj_ccall.h
|
|
+++ b/src/lj_ccall.h
|
|
@@ -86,9 +86,9 @@ typedef union FPRArg {
|
|
#elif LJ_TARGET_PPC
|
|
|
|
#define CCALL_NARG_GPR 8
|
|
-#define CCALL_NARG_FPR 8
|
|
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
|
|
#define CCALL_NRET_GPR 4 /* For complex double. */
|
|
-#define CCALL_NRET_FPR 1
|
|
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
|
|
#define CCALL_SPS_EXTRA 4
|
|
#define CCALL_SPS_FREE 0
|
|
|
|
--- a/src/lj_ccallback.c
|
|
+++ b/src/lj_ccallback.c
|
|
@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *ct
|
|
|
|
#elif LJ_TARGET_PPC
|
|
|
|
+#define CALLBACK_HANDLE_GPR \
|
|
+ if (n > 1) { \
|
|
+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
|
|
+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
|
|
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
|
|
+ } \
|
|
+ if (ngpr + n <= maxgpr) { \
|
|
+ sp = &cts->cb.gpr[ngpr]; \
|
|
+ ngpr += n; \
|
|
+ goto done; \
|
|
+ }
|
|
+
|
|
+#if LJ_ABI_SOFTFP
|
|
+#define CALLBACK_HANDLE_REGARG \
|
|
+ CALLBACK_HANDLE_GPR \
|
|
+ UNUSED(isfp);
|
|
+#else
|
|
#define CALLBACK_HANDLE_REGARG \
|
|
if (isfp) { \
|
|
if (nfpr + 1 <= CCALL_NARG_FPR) { \
|
|
@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *ct
|
|
goto done; \
|
|
} \
|
|
} else { /* Try to pass argument in GPRs. */ \
|
|
- if (n > 1) { \
|
|
- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
|
|
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
|
|
- } \
|
|
- if (ngpr + n <= maxgpr) { \
|
|
- sp = &cts->cb.gpr[ngpr]; \
|
|
- ngpr += n; \
|
|
- goto done; \
|
|
- } \
|
|
+ CALLBACK_HANDLE_GPR \
|
|
}
|
|
+#endif
|
|
|
|
+#if !LJ_ABI_SOFTFP
|
|
#define CALLBACK_HANDLE_RET \
|
|
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
|
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
|
|
+#endif
|
|
|
|
#elif LJ_TARGET_MIPS32
|
|
|
|
--- a/src/lj_frame.h
|
|
+++ b/src/lj_frame.h
|
|
@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
|
|
#define CFRAME_OFS_L 36
|
|
#define CFRAME_OFS_PC 32
|
|
#define CFRAME_OFS_MULTRES 28
|
|
-#define CFRAME_SIZE 272
|
|
+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
|
|
#define CFRAME_SHIFT_MULTRES 3
|
|
#endif
|
|
#elif LJ_TARGET_MIPS32
|
|
--- a/src/lj_ircall.h
|
|
+++ b/src/lj_ircall.h
|
|
@@ -272,7 +272,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[I
|
|
#define fp64_f2l __aeabi_f2lz
|
|
#define fp64_f2ul __aeabi_f2ulz
|
|
#endif
|
|
-#elif LJ_TARGET_MIPS
|
|
+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
|
|
#define softfp_add __adddf3
|
|
#define softfp_sub __subdf3
|
|
#define softfp_mul __muldf3
|
|
--- a/src/vm_ppc.dasc
|
|
+++ b/src/vm_ppc.dasc
|
|
@@ -103,6 +103,18 @@
|
|
|// Fixed register assignments for the interpreter.
|
|
|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
|
|
|
|
|
+|.macro .FPU, a, b
|
|
+|.if FPU
|
|
+| a, b
|
|
+|.endif
|
|
+|.endmacro
|
|
+|
|
|
+|.macro .FPU, a, b, c
|
|
+|.if FPU
|
|
+| a, b, c
|
|
+|.endif
|
|
+|.endmacro
|
|
+|
|
|
|// The following must be C callee-save (but BASE is often refetched).
|
|
|.define BASE, r14 // Base of current Lua stack frame.
|
|
|.define KBASE, r15 // Constants of current Lua function.
|
|
@@ -116,8 +128,10 @@
|
|
|.define TISNUM, r22
|
|
|.define TISNIL, r23
|
|
|.define ZERO, r24
|
|
+|.if FPU
|
|
|.define TOBIT, f30 // 2^52 + 2^51.
|
|
|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
|
|
+|.endif
|
|
|
|
|
|// The following temporaries are not saved across C calls, except for RA.
|
|
|.define RA, r20 // Callee-save.
|
|
@@ -133,6 +147,7 @@
|
|
|
|
|
|// Saved temporaries.
|
|
|.define SAVE0, r21
|
|
+|.define SAVE1, r25
|
|
|
|
|
|// Calling conventions.
|
|
|.define CARG1, r3
|
|
@@ -141,8 +156,10 @@
|
|
|.define CARG4, r6 // Overlaps TMP3.
|
|
|.define CARG5, r7 // Overlaps INS.
|
|
|
|
|
+|.if FPU
|
|
|.define FARG1, f1
|
|
|.define FARG2, f2
|
|
+|.endif
|
|
|
|
|
|.define CRET1, r3
|
|
|.define CRET2, r4
|
|
@@ -213,10 +230,16 @@
|
|
|.endif
|
|
|.else
|
|
|
|
|
+|.if FPU
|
|
|.define SAVE_LR, 276(sp)
|
|
|.define CFRAME_SPACE, 272 // Delta for sp.
|
|
|// Back chain for sp: 272(sp) <-- sp entering interpreter
|
|
|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
|
|
+|.else
|
|
+|.define SAVE_LR, 132(sp)
|
|
+|.define CFRAME_SPACE, 128 // Delta for sp.
|
|
+|// Back chain for sp: 128(sp) <-- sp entering interpreter
|
|
+|.endif
|
|
|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
|
|
|.define SAVE_CR, 52(sp) // 32 bit CR save.
|
|
|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
|
|
@@ -226,16 +249,25 @@
|
|
|.define SAVE_PC, 32(sp)
|
|
|.define SAVE_MULTRES, 28(sp)
|
|
|.define UNUSED1, 24(sp)
|
|
+|.if FPU
|
|
|.define TMPD_LO, 20(sp)
|
|
|.define TMPD_HI, 16(sp)
|
|
|.define TONUM_LO, 12(sp)
|
|
|.define TONUM_HI, 8(sp)
|
|
+|.else
|
|
+|.define SFSAVE_4, 20(sp)
|
|
+|.define SFSAVE_3, 16(sp)
|
|
+|.define SFSAVE_2, 12(sp)
|
|
+|.define SFSAVE_1, 8(sp)
|
|
+|.endif
|
|
|// Next frame lr: 4(sp)
|
|
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
|
|
|
|
+|.if FPU
|
|
|.define TMPD_BLO, 23(sp)
|
|
|.define TMPD, TMPD_HI
|
|
|.define TONUM_D, TONUM_HI
|
|
+|.endif
|
|
|
|
|
|.endif
|
|
|
|
|
@@ -245,7 +277,7 @@
|
|
|.else
|
|
| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
|
|
|.endif
|
|
-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|
|
+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|
|
|.endmacro
|
|
|.macro rest_, reg
|
|
|.if GPR64
|
|
@@ -253,7 +285,7 @@
|
|
|.else
|
|
| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
|
|
|.endif
|
|
-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|
|
+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|
|
|.endmacro
|
|
|
|
|
|.macro saveregs
|
|
@@ -323,6 +355,7 @@
|
|
|// Trap for not-yet-implemented parts.
|
|
|.macro NYI; tw 4, sp, sp; .endmacro
|
|
|
|
|
+|.if FPU
|
|
|// int/FP conversions.
|
|
|.macro tonum_i, freg, reg
|
|
| xoris reg, reg, 0x8000
|
|
@@ -346,6 +379,7 @@
|
|
|.macro toint, reg, freg
|
|
| toint reg, freg, freg
|
|
|.endmacro
|
|
+|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *
|
|
| beq >2
|
|
|1:
|
|
| addic. TMP1, TMP1, -8
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RA)
|
|
+ | lwz CARG2, 4(RA)
|
|
+ |.endif
|
|
| addi RA, RA, 8
|
|
+ |.if FPU
|
|
| stfd f0, 0(BASE)
|
|
+ |.else
|
|
+ | stw CARG1, 0(BASE)
|
|
+ | stw CARG2, 4(BASE)
|
|
+ |.endif
|
|
| addi BASE, BASE, 8
|
|
| bney <1
|
|
|
|
|
@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *
|
|
| .toc ld TOCREG, SAVE_TOC
|
|
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
|
|
| lp BASE, L->base
|
|
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
|
|
| li ZERO, 0
|
|
- | stw TMP3, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
| li TMP1, LJ_TFALSE
|
|
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
| li TISNIL, LJ_TNIL
|
|
| li_vmstate INTERP
|
|
- | lfs TOBIT, TMPD
|
|
+ | .FPU lfs TOBIT, TMPD
|
|
| lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
|
|
| la RA, -8(BASE) // Results start at BASE-8.
|
|
- | stw TMP3, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
| addi DISPATCH, DISPATCH, GG_G2DISP
|
|
| stw TMP1, 0(RA) // Prepend false to error message.
|
|
| li RD, 16 // 2 results: false + error message.
|
|
| st_vmstate
|
|
- | lfs TONUM, TMPD
|
|
+ | .FPU lfs TONUM, TMPD
|
|
| b ->vm_returnc
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *
|
|
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
|
|
| lp TMP1, L->top
|
|
| lwz PC, FRAME_PC(BASE)
|
|
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
| stb CARG3, L->status
|
|
- | stw TMP3, TMPD
|
|
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
- | lfs TOBIT, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
+ | .FPU lfs TOBIT, TMPD
|
|
| sub RD, TMP1, BASE
|
|
- | stw TMP3, TMPD
|
|
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
+ | .FPU stw TMP3, TMPD
|
|
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
| addi RD, RD, 8
|
|
- | stw TMP0, TONUM_HI
|
|
+ | .FPU stw TMP0, TONUM_HI
|
|
| li_vmstate INTERP
|
|
| li ZERO, 0
|
|
| st_vmstate
|
|
| andix. TMP0, PC, FRAME_TYPE
|
|
| mr MULTRES, RD
|
|
- | lfs TONUM, TMPD
|
|
+ | .FPU lfs TONUM, TMPD
|
|
| li TISNIL, LJ_TNIL
|
|
| beq ->BC_RET_Z
|
|
| b ->vm_return
|
|
@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *
|
|
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
|
|
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
|
|
| lp TMP1, L->top
|
|
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
| add PC, PC, BASE
|
|
- | stw TMP3, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
| li ZERO, 0
|
|
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
- | lfs TOBIT, TMPD
|
|
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
+ | .FPU lfs TOBIT, TMPD
|
|
| sub PC, PC, TMP2 // PC = frame delta + frame type
|
|
- | stw TMP3, TMPD
|
|
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
+ | .FPU stw TMP3, TMPD
|
|
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
| sub NARGS8:RC, TMP1, BASE
|
|
- | stw TMP0, TONUM_HI
|
|
+ | .FPU stw TMP0, TONUM_HI
|
|
| li_vmstate INTERP
|
|
- | lfs TONUM, TMPD
|
|
+ | .FPU lfs TONUM, TMPD
|
|
| li TISNIL, LJ_TNIL
|
|
| st_vmstate
|
|
|
|
|
@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *
|
|
| lwz INS, -4(PC)
|
|
| subi CARG2, RB, 16
|
|
| decode_RB8 SAVE0, INS
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
+ |.else
|
|
+ | lwz TMP2, 0(RA)
|
|
+ | lwz TMP3, 4(RA)
|
|
+ |.endif
|
|
| add TMP1, BASE, SAVE0
|
|
| stp BASE, L->base
|
|
| cmplw TMP1, CARG2
|
|
| sub CARG3, CARG2, TMP1
|
|
| decode_RA8 RA, INS
|
|
+ |.if FPU
|
|
| stfd f0, 0(CARG2)
|
|
+ |.else
|
|
+ | stw TMP2, 0(CARG2)
|
|
+ | stw TMP3, 4(CARG2)
|
|
+ |.endif
|
|
| bney ->BC_CAT_Z
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | stwux TMP2, RA, BASE
|
|
+ | stw TMP3, 4(RA)
|
|
+ |.endif
|
|
| b ->cont_nop
|
|
|
|
|
|//-- Table indexing metamethods -----------------------------------------
|
|
@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *
|
|
| // Returns TValue * (finished) or NULL (metamethod).
|
|
| cmplwi CRET1, 0
|
|
| beq >3
|
|
+ |.if FPU
|
|
| lfd f0, 0(CRET1)
|
|
+ |.else
|
|
+ | lwz TMP0, 0(CRET1)
|
|
+ | lwz TMP1, 4(CRET1)
|
|
+ |.endif
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | stwux TMP0, RA, BASE
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
|
|
|
|3: // Call __index metamethod.
|
|
@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *
|
|
| // Returns cTValue * or NULL.
|
|
| cmplwi CRET1, 0
|
|
| beq >1
|
|
+ |.if FPU
|
|
| lfd f14, 0(CRET1)
|
|
+ |.else
|
|
+ | lwz SAVE0, 0(CRET1)
|
|
+ | lwz SAVE1, 4(CRET1)
|
|
+ |.endif
|
|
| b ->BC_TGETR_Z
|
|
|1:
|
|
| stwx TISNIL, BASE, RA
|
|
@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *
|
|
| bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|
|
| // Returns TValue * (finished) or NULL (metamethod).
|
|
| cmplwi CRET1, 0
|
|
+ |.if FPU
|
|
| lfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | lwzux TMP2, RA, BASE
|
|
+ | lwz TMP3, 4(RA)
|
|
+ |.endif
|
|
| beq >3
|
|
| // NOBARRIER: lj_meta_tset ensures the table is not black.
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfd f0, 0(CRET1)
|
|
+ |.else
|
|
+ | stw TMP2, 0(CRET1)
|
|
+ | stw TMP3, 4(CRET1)
|
|
+ |.endif
|
|
| ins_next2
|
|
|
|
|
|3: // Call __newindex metamethod.
|
|
@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *
|
|
| add PC, TMP1, BASE
|
|
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
|
|
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
|
|
+ |.if FPU
|
|
| stfd f0, 16(BASE) // Copy value to third argument.
|
|
+ |.else
|
|
+ | stw TMP2, 16(BASE)
|
|
+ | stw TMP3, 20(BASE)
|
|
+ |.endif
|
|
| b ->vm_call_dispatch_f
|
|
|
|
|
|->vmeta_tsetr:
|
|
@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *
|
|
| stw PC, SAVE_PC
|
|
| bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
|
|
| // Returns TValue *.
|
|
+ |.if FPU
|
|
| stfd f14, 0(CRET1)
|
|
+ |.else
|
|
+ | stw SAVE0, 0(CRET1)
|
|
+ | stw SAVE1, 4(CRET1)
|
|
+ |.endif
|
|
| b ->cont_nop
|
|
|
|
|
|//-- Comparison metamethods ---------------------------------------------
|
|
@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *
|
|
|
|
|
|->cont_ra: // RA = resultptr
|
|
| lwz INS, -4(PC)
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RA)
|
|
+ | lwz CARG2, 4(RA)
|
|
+ |.endif
|
|
| decode_RA8 TMP1, INS
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, TMP1
|
|
+ |.else
|
|
+ | stwux CARG1, TMP1, BASE
|
|
+ | stw CARG2, 4(TMP1)
|
|
+ |.endif
|
|
| b ->cont_nop
|
|
|
|
|
|->cont_condt: // RA = resultptr
|
|
@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *
|
|
|.macro .ffunc_n, name
|
|
|->ff_ .. name:
|
|
| cmplwi NARGS8:RC, 8
|
|
- | lwz CARG3, 0(BASE)
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
| blt ->fff_fallback
|
|
- | checknum CARG3; bge ->fff_fallback
|
|
+ | checknum CARG1; bge ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nn, name
|
|
|->ff_ .. name:
|
|
| cmplwi NARGS8:RC, 16
|
|
- | lwz CARG3, 0(BASE)
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
- | lwz CARG4, 8(BASE)
|
|
+ | lwz CARG3, 8(BASE)
|
|
| lfd FARG2, 8(BASE)
|
|
+ |.else
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ | lwz CARG3, 8(BASE)
|
|
+ | lwz CARG4, 12(BASE)
|
|
+ |.endif
|
|
| blt ->fff_fallback
|
|
+ | checknum CARG1; bge ->fff_fallback
|
|
| checknum CARG3; bge ->fff_fallback
|
|
- | checknum CARG4; bge ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
|// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
|
|
@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *
|
|
| bge cr1, ->fff_fallback
|
|
| stw CARG3, 0(RA)
|
|
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
|
|
+ | addi TMP1, BASE, 8
|
|
+ | add TMP2, RA, NARGS8:RC
|
|
| stw CARG1, 4(RA)
|
|
| beq ->fff_res // Done if exactly 1 argument.
|
|
- | li TMP1, 8
|
|
- | subi RC, RC, 8
|
|
|1:
|
|
- | cmplw TMP1, RC
|
|
- | lfdx f0, BASE, TMP1
|
|
- | stfdx f0, RA, TMP1
|
|
+ | cmplw TMP1, TMP2
|
|
+ |.if FPU
|
|
+ | lfd f0, 0(TMP1)
|
|
+ | stfd f0, 0(TMP1)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(TMP1)
|
|
+ | lwz CARG2, 4(TMP1)
|
|
+ | stw CARG1, -8(TMP1)
|
|
+ | stw CARG2, -4(TMP1)
|
|
+ |.endif
|
|
| addi TMP1, TMP1, 8
|
|
| bney <1
|
|
| b ->fff_res
|
|
@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *
|
|
| orc TMP1, TMP2, TMP0
|
|
| addi TMP1, TMP1, ~LJ_TISNUM+1
|
|
| slwi TMP1, TMP1, 3
|
|
+ |.if FPU
|
|
| la TMP2, CFUNC:RB->upvalue
|
|
| lfdx FARG1, TMP2, TMP1
|
|
+ |.else
|
|
+ | add TMP1, CFUNC:RB, TMP1
|
|
+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
|
|
+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
|
|
+ |.endif
|
|
| b ->fff_resn
|
|
|
|
|
|//-- Base library: getters and setters ---------------------------------
|
|
@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *
|
|
| mr CARG1, L
|
|
| bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|
|
| // Returns cTValue *.
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(CRET1)
|
|
+ |.else
|
|
+ | lwz CARG2, 4(CRET1)
|
|
+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
|
|
+ |.endif
|
|
| b ->fff_resn
|
|
|
|
|
|//-- Base library: conversions ------------------------------------------
|
|
@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *
|
|
| // Only handles the number case inline (without a base argument).
|
|
| cmplwi NARGS8:RC, 8
|
|
| lwz CARG1, 0(BASE)
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
| bne ->fff_fallback // Exactly one argument.
|
|
| checknum CARG1; bgt ->fff_fallback
|
|
| b ->fff_resn
|
|
@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *
|
|
| cmplwi CRET1, 0
|
|
| li CARG3, LJ_TNIL
|
|
| beq ->fff_restv // End of traversal: return nil.
|
|
- | lfd f0, 8(BASE) // Copy key and value to results.
|
|
| la RA, -8(BASE)
|
|
+ |.if FPU
|
|
+ | lfd f0, 8(BASE) // Copy key and value to results.
|
|
| lfd f1, 16(BASE)
|
|
| stfd f0, 0(RA)
|
|
- | li RD, (2+1)*8
|
|
| stfd f1, 8(RA)
|
|
+ |.else
|
|
+ | lwz CARG1, 8(BASE)
|
|
+ | lwz CARG2, 12(BASE)
|
|
+ | lwz CARG3, 16(BASE)
|
|
+ | lwz CARG4, 20(BASE)
|
|
+ | stw CARG1, 0(RA)
|
|
+ | stw CARG2, 4(RA)
|
|
+ | stw CARG3, 8(RA)
|
|
+ | stw CARG4, 12(RA)
|
|
+ |.endif
|
|
+ | li RD, (2+1)*8
|
|
| b ->fff_res
|
|
|
|
|
|.ffunc_1 pairs
|
|
@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *
|
|
| bne ->fff_fallback
|
|
#if LJ_52
|
|
| lwz TAB:TMP2, TAB:CARG1->metatable
|
|
+ |.if FPU
|
|
| lfd f0, CFUNC:RB->upvalue[0]
|
|
+ |.else
|
|
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
|
|
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
|
|
+ |.endif
|
|
| cmplwi TAB:TMP2, 0
|
|
| la RA, -8(BASE)
|
|
| bne ->fff_fallback
|
|
#else
|
|
+ |.if FPU
|
|
| lfd f0, CFUNC:RB->upvalue[0]
|
|
+ |.else
|
|
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
|
|
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
|
|
+ |.endif
|
|
| la RA, -8(BASE)
|
|
#endif
|
|
| stw TISNIL, 8(BASE)
|
|
| li RD, (3+1)*8
|
|
+ |.if FPU
|
|
| stfd f0, 0(RA)
|
|
+ |.else
|
|
+ | stw TMP0, 0(RA)
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| b ->fff_res
|
|
|
|
|
|.ffunc ipairs_aux
|
|
@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *
|
|
| stfd FARG2, 0(RA)
|
|
|.endif
|
|
| ble >2 // Not in array part?
|
|
+ |.if FPU
|
|
| lwzx TMP2, TMP1, TMP3
|
|
| lfdx f0, TMP1, TMP3
|
|
+ |.else
|
|
+ | lwzux TMP2, TMP1, TMP3
|
|
+ | lwz TMP3, 4(TMP1)
|
|
+ |.endif
|
|
|1:
|
|
| checknil TMP2
|
|
| li RD, (0+1)*8
|
|
| beq ->fff_res // End of iteration, return 0 results.
|
|
| li RD, (2+1)*8
|
|
+ |.if FPU
|
|
| stfd f0, 8(RA)
|
|
+ |.else
|
|
+ | stw TMP2, 8(RA)
|
|
+ | stw TMP3, 12(RA)
|
|
+ |.endif
|
|
| b ->fff_res
|
|
|2: // Check for empty hash part first. Otherwise call C function.
|
|
| lwz TMP0, TAB:CARG1->hmask
|
|
@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *
|
|
| li RD, (0+1)*8
|
|
| beq ->fff_res
|
|
| lwz TMP2, 0(CRET1)
|
|
+ |.if FPU
|
|
| lfd f0, 0(CRET1)
|
|
+ |.else
|
|
+ | lwz TMP3, 4(CRET1)
|
|
+ |.endif
|
|
| b <1
|
|
|
|
|
|.ffunc_1 ipairs
|
|
@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *
|
|
| bne ->fff_fallback
|
|
#if LJ_52
|
|
| lwz TAB:TMP2, TAB:CARG1->metatable
|
|
+ |.if FPU
|
|
| lfd f0, CFUNC:RB->upvalue[0]
|
|
+ |.else
|
|
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
|
|
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
|
|
+ |.endif
|
|
| cmplwi TAB:TMP2, 0
|
|
| la RA, -8(BASE)
|
|
| bne ->fff_fallback
|
|
#else
|
|
+ |.if FPU
|
|
| lfd f0, CFUNC:RB->upvalue[0]
|
|
+ |.else
|
|
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
|
|
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
|
|
+ |.endif
|
|
| la RA, -8(BASE)
|
|
#endif
|
|
|.if DUALNUM
|
|
@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *
|
|
|.endif
|
|
| stw ZERO, 12(BASE)
|
|
| li RD, (3+1)*8
|
|
+ |.if FPU
|
|
| stfd f0, 0(RA)
|
|
+ |.else
|
|
+ | stw TMP0, 0(RA)
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| b ->fff_res
|
|
|
|
|
|//-- Base library: catch errors ----------------------------------------
|
|
@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *
|
|
|
|
|
|.ffunc xpcall
|
|
| cmplwi NARGS8:RC, 16
|
|
- | lwz CARG4, 8(BASE)
|
|
+ | lwz CARG3, 8(BASE)
|
|
+ |.if FPU
|
|
| lfd FARG2, 8(BASE)
|
|
| lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ | lwz CARG4, 12(BASE)
|
|
+ |.endif
|
|
| blt ->fff_fallback
|
|
| lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
|
|
| mr TMP2, BASE
|
|
- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
|
|
+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
|
|
| la BASE, 16(BASE)
|
|
| // Remember active hook before pcall.
|
|
| rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
|
|
+ |.if FPU
|
|
| stfd FARG2, 0(TMP2) // Swap function and traceback.
|
|
- | subi NARGS8:RC, NARGS8:RC, 16
|
|
| stfd FARG1, 8(TMP2)
|
|
+ |.else
|
|
+ | stw CARG3, 0(TMP2)
|
|
+ | stw CARG4, 4(TMP2)
|
|
+ | stw CARG1, 8(TMP2)
|
|
+ | stw CARG2, 12(TMP2)
|
|
+ |.endif
|
|
+ | subi NARGS8:RC, NARGS8:RC, 16
|
|
| addi PC, TMP1, 16+FRAME_PCALL
|
|
| b ->vm_call_dispatch
|
|
|
|
|
@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *
|
|
| stp BASE, L->top
|
|
|2: // Move args to coroutine.
|
|
| cmpw TMP1, NARGS8:RC
|
|
+ |.if FPU
|
|
| lfdx f0, BASE, TMP1
|
|
+ |.else
|
|
+ | add CARG3, BASE, TMP1
|
|
+ | lwz TMP2, 0(CARG3)
|
|
+ | lwz TMP3, 4(CARG3)
|
|
+ |.endif
|
|
| beq >3
|
|
+ |.if FPU
|
|
| stfdx f0, CARG2, TMP1
|
|
+ |.else
|
|
+ | add CARG3, CARG2, TMP1
|
|
+ | stw TMP2, 0(CARG3)
|
|
+ | stw TMP3, 4(CARG3)
|
|
+ |.endif
|
|
| addi TMP1, TMP1, 8
|
|
| b <2
|
|
|3:
|
|
@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *
|
|
| stp TMP2, L:SAVE0->top // Clear coroutine stack.
|
|
|5: // Move results from coroutine.
|
|
| cmplw TMP1, TMP3
|
|
+ |.if FPU
|
|
| lfdx f0, TMP2, TMP1
|
|
| stfdx f0, BASE, TMP1
|
|
+ |.else
|
|
+ | add CARG3, TMP2, TMP1
|
|
+ | lwz CARG1, 0(CARG3)
|
|
+ | lwz CARG2, 4(CARG3)
|
|
+ | add CARG3, BASE, TMP1
|
|
+ | stw CARG1, 0(CARG3)
|
|
+ | stw CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| addi TMP1, TMP1, 8
|
|
| bne <5
|
|
|6:
|
|
@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *
|
|
| andix. TMP0, PC, FRAME_TYPE
|
|
| la TMP3, -8(TMP3)
|
|
| li TMP1, LJ_TFALSE
|
|
+ |.if FPU
|
|
| lfd f0, 0(TMP3)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(TMP3)
|
|
+ | lwz CARG2, 4(TMP3)
|
|
+ |.endif
|
|
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
|
|
| li RD, (2+1)*8
|
|
| stw TMP1, -8(BASE) // Prepend false to results.
|
|
| la RA, -8(BASE)
|
|
+ |.if FPU
|
|
| stfd f0, 0(BASE) // Copy error message.
|
|
+ |.else
|
|
+ | stw CARG1, 0(BASE) // Copy error message.
|
|
+ | stw CARG2, 4(BASE)
|
|
+ |.endif
|
|
| b <7
|
|
|.else
|
|
| mr CARG1, L
|
|
@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *
|
|
| lus CARG1, 0x8000 // -(2^31).
|
|
| beqy ->fff_resi
|
|
|5:
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
| blex func
|
|
| b ->fff_resn
|
|
|.endmacro
|
|
@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *
|
|
|
|
|
|.ffunc math_log
|
|
| cmplwi NARGS8:RC, 8
|
|
- | lwz CARG3, 0(BASE)
|
|
- | lfd FARG1, 0(BASE)
|
|
+ | lwz CARG1, 0(BASE)
|
|
| bne ->fff_fallback // Need exactly 1 argument.
|
|
- | checknum CARG3; bge ->fff_fallback
|
|
+ | checknum CARG1; bge ->fff_fallback
|
|
+ |.if FPU
|
|
+ | lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
| blex log
|
|
| b ->fff_resn
|
|
|
|
|
@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *
|
|
|.if DUALNUM
|
|
|.ffunc math_ldexp
|
|
| cmplwi NARGS8:RC, 16
|
|
- | lwz CARG3, 0(BASE)
|
|
+ | lwz TMP0, 0(BASE)
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
- | lwz CARG4, 8(BASE)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
+ | lwz TMP1, 8(BASE)
|
|
|.if GPR64
|
|
| lwz CARG2, 12(BASE)
|
|
- |.else
|
|
+ |.elif FPU
|
|
| lwz CARG1, 12(BASE)
|
|
+ |.else
|
|
+ | lwz CARG3, 12(BASE)
|
|
|.endif
|
|
| blt ->fff_fallback
|
|
- | checknum CARG3; bge ->fff_fallback
|
|
- | checknum CARG4; bne ->fff_fallback
|
|
+ | checknum TMP0; bge ->fff_fallback
|
|
+ | checknum TMP1; bne ->fff_fallback
|
|
|.else
|
|
|.ffunc_nn math_ldexp
|
|
|.if GPR64
|
|
@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *
|
|
|.ffunc_n math_frexp
|
|
|.if GPR64
|
|
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
|
|
- |.else
|
|
+ |.elif FPU
|
|
| la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
|
|
+ |.else
|
|
+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
|
|
|.endif
|
|
| lwz PC, FRAME_PC(BASE)
|
|
| blex frexp
|
|
@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *
|
|
|.if not DUALNUM
|
|
| tonum_i FARG2, TMP1
|
|
|.endif
|
|
+ |.if FPU
|
|
| stfd FARG1, 0(RA)
|
|
+ |.else
|
|
+ | stw CRET1, 0(RA)
|
|
+ | stw CRET2, 4(RA)
|
|
+ |.endif
|
|
| li RD, (2+1)*8
|
|
|.if DUALNUM
|
|
| stw TISNUM, 8(RA)
|
|
@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *
|
|
|.ffunc_n math_modf
|
|
|.if GPR64
|
|
| la CARG2, -8(BASE)
|
|
- |.else
|
|
+ |.elif FPU
|
|
| la CARG1, -8(BASE)
|
|
+ |.else
|
|
+ | la CARG3, -8(BASE)
|
|
|.endif
|
|
| lwz PC, FRAME_PC(BASE)
|
|
| blex modf
|
|
| la RA, -8(BASE)
|
|
+ |.if FPU
|
|
| stfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | stw CRET1, 0(BASE)
|
|
+ | stw CRET2, 4(BASE)
|
|
+ |.endif
|
|
| li RD, (2+1)*8
|
|
| b ->fff_res
|
|
|
|
|
@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *
|
|
|.if DUALNUM
|
|
| .ffunc_1 name
|
|
| checknum CARG3
|
|
- | addi TMP1, BASE, 8
|
|
- | add TMP2, BASE, NARGS8:RC
|
|
+ | addi SAVE0, BASE, 8
|
|
+ | add SAVE1, BASE, NARGS8:RC
|
|
| bne >4
|
|
|1: // Handle integers.
|
|
- | lwz CARG4, 0(TMP1)
|
|
- | cmplw cr1, TMP1, TMP2
|
|
- | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG4, 0(SAVE0)
|
|
+ | cmplw cr1, SAVE0, SAVE1
|
|
+ | lwz CARG2, 4(SAVE0)
|
|
| bge cr1, ->fff_resi
|
|
| checknum CARG4
|
|
| xoris TMP0, CARG1, 0x8000
|
|
@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *
|
|
|.if GPR64
|
|
| rldicl CARG1, CARG1, 0, 32
|
|
|.endif
|
|
- | addi TMP1, TMP1, 8
|
|
+ | addi SAVE0, SAVE0, 8
|
|
| b <1
|
|
|3:
|
|
| bge ->fff_fallback
|
|
| // Convert intermediate result to number and continue below.
|
|
+ |.if FPU
|
|
| tonum_i FARG1, CARG1
|
|
- | lfd FARG2, 0(TMP1)
|
|
+ | lfd FARG2, 0(SAVE0)
|
|
+ |.else
|
|
+ | mr CARG2, CARG1
|
|
+ | bl ->vm_sfi2d_1
|
|
+ | lwz CARG3, 0(SAVE0)
|
|
+ | lwz CARG4, 4(SAVE0)
|
|
+ |.endif
|
|
| b >6
|
|
|4:
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(BASE)
|
|
+ | lwz CARG2, 4(BASE)
|
|
+ |.endif
|
|
| bge ->fff_fallback
|
|
|5: // Handle numbers.
|
|
- | lwz CARG4, 0(TMP1)
|
|
- | cmplw cr1, TMP1, TMP2
|
|
- | lfd FARG2, 0(TMP1)
|
|
+ | lwz CARG3, 0(SAVE0)
|
|
+ | cmplw cr1, SAVE0, SAVE1
|
|
+ |.if FPU
|
|
+ | lfd FARG2, 0(SAVE0)
|
|
+ |.else
|
|
+ | lwz CARG4, 4(SAVE0)
|
|
+ |.endif
|
|
| bge cr1, ->fff_resn
|
|
- | checknum CARG4; bge >7
|
|
+ | checknum CARG3; bge >7
|
|
|6:
|
|
+ | addi SAVE0, SAVE0, 8
|
|
+ |.if FPU
|
|
| fsub f0, FARG1, FARG2
|
|
- | addi TMP1, TMP1, 8
|
|
|.if ismax
|
|
| fsel FARG1, f0, FARG1, FARG2
|
|
|.else
|
|
| fsel FARG1, f0, FARG2, FARG1
|
|
|.endif
|
|
+ |.else
|
|
+ | stw CARG1, SFSAVE_1
|
|
+ | stw CARG2, SFSAVE_2
|
|
+ | stw CARG3, SFSAVE_3
|
|
+ | stw CARG4, SFSAVE_4
|
|
+ | blex __ledf2
|
|
+ | cmpwi CRET1, 0
|
|
+ |.if ismax
|
|
+ | blt >8
|
|
+ |.else
|
|
+ | bge >8
|
|
+ |.endif
|
|
+ | lwz CARG1, SFSAVE_1
|
|
+ | lwz CARG2, SFSAVE_2
|
|
+ | b <5
|
|
+ |8:
|
|
+ | lwz CARG1, SFSAVE_3
|
|
+ | lwz CARG2, SFSAVE_4
|
|
+ |.endif
|
|
| b <5
|
|
|7: // Convert integer to number and continue above.
|
|
- | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG3, 4(SAVE0)
|
|
| bne ->fff_fallback
|
|
- | tonum_i FARG2, CARG2
|
|
+ |.if FPU
|
|
+ | tonum_i FARG2, CARG3
|
|
+ |.else
|
|
+ | bl ->vm_sfi2d_2
|
|
+ |.endif
|
|
| b <6
|
|
|.else
|
|
| .ffunc_n name
|
|
@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *
|
|
|
|
|
|.macro .ffunc_bit_op, name, ins
|
|
| .ffunc_bit name
|
|
- | addi TMP1, BASE, 8
|
|
- | add TMP2, BASE, NARGS8:RC
|
|
+ | addi SAVE0, BASE, 8
|
|
+ | add SAVE1, BASE, NARGS8:RC
|
|
|1:
|
|
- | lwz CARG4, 0(TMP1)
|
|
- | cmplw cr1, TMP1, TMP2
|
|
+ | lwz CARG4, 0(SAVE0)
|
|
+ | cmplw cr1, SAVE0, SAVE1
|
|
|.if DUALNUM
|
|
- | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG2, 4(SAVE0)
|
|
|.else
|
|
- | lfd FARG1, 0(TMP1)
|
|
+ | lfd FARG1, 0(SAVE0)
|
|
|.endif
|
|
| bgey cr1, ->fff_resi
|
|
| checknum CARG4
|
|
|.if DUALNUM
|
|
+ |.if FPU
|
|
| bnel ->fff_bitop_fb
|
|
|.else
|
|
+ | beq >3
|
|
+ | stw CARG1, SFSAVE_1
|
|
+ | bl ->fff_bitop_fb
|
|
+ | mr CARG2, CARG1
|
|
+ | lwz CARG1, SFSAVE_1
|
|
+ |3:
|
|
+ |.endif
|
|
+ |.else
|
|
| fadd FARG1, FARG1, TOBIT
|
|
| bge ->fff_fallback
|
|
| stfd FARG1, TMPD
|
|
| lwz CARG2, TMPD_LO
|
|
|.endif
|
|
| ins CARG1, CARG1, CARG2
|
|
- | addi TMP1, TMP1, 8
|
|
+ | addi SAVE0, SAVE0, 8
|
|
| b <1
|
|
|.endmacro
|
|
|
|
|
@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *
|
|
|.macro .ffunc_bit_sh, name, ins, shmod
|
|
|.if DUALNUM
|
|
| .ffunc_2 bit_..name
|
|
+ |.if FPU
|
|
| checknum CARG3; bnel ->fff_tobit_fb
|
|
+ |.else
|
|
+ | checknum CARG3; beq >1
|
|
+ | bl ->fff_tobit_fb
|
|
+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
|
|
+ |1:
|
|
+ |.endif
|
|
| // Note: no inline conversion from number for 2nd argument!
|
|
| checknum CARG4; bne ->fff_fallback
|
|
|.else
|
|
@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *
|
|
|->fff_resn:
|
|
| lwz PC, FRAME_PC(BASE)
|
|
| la RA, -8(BASE)
|
|
+ |.if FPU
|
|
| stfd FARG1, -8(BASE)
|
|
+ |.else
|
|
+ | stw CARG1, -8(BASE)
|
|
+ | stw CARG2, -4(BASE)
|
|
+ |.endif
|
|
| b ->fff_res1
|
|
|
|
|
|// Fallback FP number to bit conversion.
|
|
|->fff_tobit_fb:
|
|
|.if DUALNUM
|
|
+ |.if FPU
|
|
| lfd FARG1, 0(BASE)
|
|
| bgt ->fff_fallback
|
|
| fadd FARG1, FARG1, TOBIT
|
|
| stfd FARG1, TMPD
|
|
| lwz CARG1, TMPD_LO
|
|
| blr
|
|
+ |.else
|
|
+ | bgt ->fff_fallback
|
|
+ | mr CARG2, CARG1
|
|
+ | mr CARG1, CARG3
|
|
+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
|
|
+ |->vm_tobit:
|
|
+ | slwi TMP2, CARG1, 1
|
|
+ | addis TMP2, TMP2, 0x0020
|
|
+ | cmpwi TMP2, 0
|
|
+ | bge >2
|
|
+ | li TMP1, 0x3e0
|
|
+ | srawi TMP2, TMP2, 21
|
|
+ | not TMP1, TMP1
|
|
+ | sub. TMP2, TMP1, TMP2
|
|
+ | cmpwi cr7, CARG1, 0
|
|
+ | blt >1
|
|
+ | slwi TMP1, CARG1, 11
|
|
+ | srwi TMP0, CARG2, 21
|
|
+ | oris TMP1, TMP1, 0x8000
|
|
+ | or TMP1, TMP1, TMP0
|
|
+ | srw CARG1, TMP1, TMP2
|
|
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
|
|
+ | neg CARG1, CARG1
|
|
+ | blr
|
|
+ |1:
|
|
+ | addi TMP2, TMP2, 21
|
|
+ | srw TMP1, CARG2, TMP2
|
|
+ | slwi CARG2, CARG1, 12
|
|
+ | subfic TMP2, TMP2, 20
|
|
+ | slw TMP0, CARG2, TMP2
|
|
+ | or CARG1, TMP1, TMP0
|
|
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
|
|
+ | neg CARG1, CARG1
|
|
+ | blr
|
|
+ |2:
|
|
+ | li CARG1, 0
|
|
+ | blr
|
|
+ |.endif
|
|
|.endif
|
|
|->fff_bitop_fb:
|
|
|.if DUALNUM
|
|
- | lfd FARG1, 0(TMP1)
|
|
+ |.if FPU
|
|
+ | lfd FARG1, 0(SAVE0)
|
|
| bgt ->fff_fallback
|
|
| fadd FARG1, FARG1, TOBIT
|
|
| stfd FARG1, TMPD
|
|
| lwz CARG2, TMPD_LO
|
|
| blr
|
|
+ |.else
|
|
+ | bgt ->fff_fallback
|
|
+ | mr CARG1, CARG4
|
|
+ | b ->vm_tobit
|
|
+ |.endif
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *
|
|
| decode_RA8 RC, INS // Call base.
|
|
| beq >2
|
|
|1: // Move results down.
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RA)
|
|
+ | lwz CARG2, 4(RA)
|
|
+ |.endif
|
|
| addic. TMP1, TMP1, -8
|
|
| addi RA, RA, 8
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, RC
|
|
+ |.else
|
|
+ | add CARG3, BASE, RC
|
|
+ | stw CARG1, 0(CARG3)
|
|
+ | stw CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| addi RC, RC, 8
|
|
| bne <1
|
|
|2:
|
|
@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|.macro savex_, a, b, c, d
|
|
+ |.if FPU
|
|
| stfd f..a, 16+a*8(sp)
|
|
| stfd f..b, 16+b*8(sp)
|
|
| stfd f..c, 16+c*8(sp)
|
|
| stfd f..d, 16+d*8(sp)
|
|
+ |.endif
|
|
|.endmacro
|
|
|
|
|
|->vm_exit_handler:
|
|
@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *
|
|
| lwz KBASE, PC2PROTO(k)(TMP1)
|
|
| // Setup type comparison constants.
|
|
| li TISNUM, LJ_TISNUM
|
|
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
- | stw TMP3, TMPD
|
|
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
+ | .FPU stw TMP3, TMPD
|
|
| li ZERO, 0
|
|
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
- | lfs TOBIT, TMPD
|
|
- | stw TMP3, TMPD
|
|
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
+ | .FPU lfs TOBIT, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
| li TISNIL, LJ_TNIL
|
|
- | stw TMP0, TONUM_HI
|
|
- | lfs TONUM, TMPD
|
|
+ | .FPU stw TMP0, TONUM_HI
|
|
+ | .FPU lfs TONUM, TMPD
|
|
| // Modified copy of ins_next which handles function header dispatch, too.
|
|
| lwz INS, 0(PC)
|
|
| addi PC, PC, 4
|
|
@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *
|
|
|//-- Math helper functions ----------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
- |// NYI: Use internal implementations of floor, ceil, trunc.
|
|
+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
|
|
+ |
|
|
+ |.macro sfi2d, AHI, ALO
|
|
+ |.if not FPU
|
|
+ | mr. AHI, ALO
|
|
+ | bclr 12, 2 // Handle zero first.
|
|
+ | srawi TMP0, ALO, 31
|
|
+ | xor TMP1, ALO, TMP0
|
|
+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
|
|
+ | cntlzw AHI, TMP1
|
|
+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
|
|
+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
|
|
+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
|
|
+ | slwi ALO, TMP1, 21
|
|
+ | or AHI, AHI, TMP0 // Sign | Exponent.
|
|
+ | srwi TMP1, TMP1, 11
|
|
+ | slwi AHI, AHI, 20 // Align left.
|
|
+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
|
|
+ | blr
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
|
|
+ |->vm_sfi2d_1:
|
|
+ | sfi2d CARG1, CARG2
|
|
+ |
|
|
+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
|
|
+ |->vm_sfi2d_2:
|
|
+ | sfi2d CARG3, CARG4
|
|
|
|
|
|->vm_modi:
|
|
| divwo. TMP0, CARG1, CARG2
|
|
@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *
|
|
| addi DISPATCH, r12, GG_G2DISP
|
|
| stw r11, CTSTATE->cb.slot
|
|
| stw r3, CTSTATE->cb.gpr[0]
|
|
- | stfd f1, CTSTATE->cb.fpr[0]
|
|
+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
|
|
| stw r4, CTSTATE->cb.gpr[1]
|
|
- | stfd f2, CTSTATE->cb.fpr[1]
|
|
+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
|
|
| stw r5, CTSTATE->cb.gpr[2]
|
|
- | stfd f3, CTSTATE->cb.fpr[2]
|
|
+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
|
|
| stw r6, CTSTATE->cb.gpr[3]
|
|
- | stfd f4, CTSTATE->cb.fpr[3]
|
|
+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
|
|
| stw r7, CTSTATE->cb.gpr[4]
|
|
- | stfd f5, CTSTATE->cb.fpr[4]
|
|
+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
|
|
| stw r8, CTSTATE->cb.gpr[5]
|
|
- | stfd f6, CTSTATE->cb.fpr[5]
|
|
+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
|
|
| stw r9, CTSTATE->cb.gpr[6]
|
|
- | stfd f7, CTSTATE->cb.fpr[6]
|
|
+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
|
|
| stw r10, CTSTATE->cb.gpr[7]
|
|
- | stfd f8, CTSTATE->cb.fpr[7]
|
|
+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
|
|
| addi TMP0, sp, CFRAME_SPACE+8
|
|
| stw TMP0, CTSTATE->cb.stack
|
|
| mr CARG1, CTSTATE
|
|
@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *
|
|
| lp BASE, L:CRET1->base
|
|
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
|
|
| lp RC, L:CRET1->top
|
|
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
|
|
| li ZERO, 0
|
|
| mr L, CRET1
|
|
- | stw TMP3, TMPD
|
|
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
+ | .FPU stw TMP3, TMPD
|
|
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
|
|
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
|
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
- | stw TMP0, TONUM_HI
|
|
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
|
|
+ | .FPU stw TMP0, TONUM_HI
|
|
| li TISNIL, LJ_TNIL
|
|
| li_vmstate INTERP
|
|
- | lfs TOBIT, TMPD
|
|
- | stw TMP3, TMPD
|
|
+ | .FPU lfs TOBIT, TMPD
|
|
+ | .FPU stw TMP3, TMPD
|
|
| sub RC, RC, BASE
|
|
| st_vmstate
|
|
- | lfs TONUM, TMPD
|
|
+ | .FPU lfs TONUM, TMPD
|
|
| ins_callt
|
|
|.endif
|
|
|
|
|
@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *
|
|
| mr CARG2, RA
|
|
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
|
|
| lwz CRET1, CTSTATE->cb.gpr[0]
|
|
- | lfd FARG1, CTSTATE->cb.fpr[0]
|
|
+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
|
|
| lwz CRET2, CTSTATE->cb.gpr[1]
|
|
| b ->vm_leave_unw
|
|
|.endif
|
|
@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *
|
|
| bge <1
|
|
|2:
|
|
| bney cr1, >3
|
|
- | lfd f1, CCSTATE->fpr[0]
|
|
- | lfd f2, CCSTATE->fpr[1]
|
|
- | lfd f3, CCSTATE->fpr[2]
|
|
- | lfd f4, CCSTATE->fpr[3]
|
|
- | lfd f5, CCSTATE->fpr[4]
|
|
- | lfd f6, CCSTATE->fpr[5]
|
|
- | lfd f7, CCSTATE->fpr[6]
|
|
- | lfd f8, CCSTATE->fpr[7]
|
|
+ | .FPU lfd f1, CCSTATE->fpr[0]
|
|
+ | .FPU lfd f2, CCSTATE->fpr[1]
|
|
+ | .FPU lfd f3, CCSTATE->fpr[2]
|
|
+ | .FPU lfd f4, CCSTATE->fpr[3]
|
|
+ | .FPU lfd f5, CCSTATE->fpr[4]
|
|
+ | .FPU lfd f6, CCSTATE->fpr[5]
|
|
+ | .FPU lfd f7, CCSTATE->fpr[6]
|
|
+ | .FPU lfd f8, CCSTATE->fpr[7]
|
|
|3:
|
|
| lp TMP0, CCSTATE->func
|
|
| lwz CARG2, CCSTATE->gpr[1]
|
|
@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *
|
|
| lwz TMP2, -4(r14)
|
|
| lwz TMP0, 4(r14)
|
|
| stw CARG1, CCSTATE:TMP1->gpr[0]
|
|
- | stfd FARG1, CCSTATE:TMP1->fpr[0]
|
|
+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
|
|
| stw CARG2, CCSTATE:TMP1->gpr[1]
|
|
| mtlr TMP0
|
|
| stw CARG3, CCSTATE:TMP1->gpr[2]
|
|
@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
|
|
| // RA = src1*8, RD = src2*8, JMP with RD = target
|
|
|.if DUALNUM
|
|
- | lwzux TMP0, RA, BASE
|
|
+ | lwzux CARG1, RA, BASE
|
|
| addi PC, PC, 4
|
|
| lwz CARG2, 4(RA)
|
|
- | lwzux TMP1, RD, BASE
|
|
+ | lwzux CARG3, RD, BASE
|
|
| lwz TMP2, -4(PC)
|
|
- | checknum cr0, TMP0
|
|
- | lwz CARG3, 4(RD)
|
|
+ | checknum cr0, CARG1
|
|
+ | lwz CARG4, 4(RD)
|
|
| decode_RD4 TMP2, TMP2
|
|
- | checknum cr1, TMP1
|
|
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
|
|
+ | checknum cr1, CARG3
|
|
+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
|
|
| bne cr0, >7
|
|
| bne cr1, >8
|
|
- | cmpw CARG2, CARG3
|
|
+ | cmpw CARG2, CARG4
|
|
if (op == BC_ISLT) {
|
|
| bge >2
|
|
} else if (op == BC_ISGE) {
|
|
@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| ble >2
|
|
}
|
|
|1:
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
|2:
|
|
| ins_next
|
|
|
|
|
|7: // RA is not an integer.
|
|
| bgt cr0, ->vmeta_comp
|
|
| // RA is a number.
|
|
- | lfd f0, 0(RA)
|
|
+ | .FPU lfd f0, 0(RA)
|
|
| bgt cr1, ->vmeta_comp
|
|
| blt cr1, >4
|
|
| // RA is a number, RD is an integer.
|
|
- | tonum_i f1, CARG3
|
|
+ |.if FPU
|
|
+ | tonum_i f1, CARG4
|
|
+ |.else
|
|
+ | bl ->vm_sfi2d_2
|
|
+ |.endif
|
|
| b >5
|
|
|
|
|
|8: // RA is an integer, RD is not an integer.
|
|
| bgt cr1, ->vmeta_comp
|
|
| // RA is an integer, RD is a number.
|
|
+ |.if FPU
|
|
| tonum_i f0, CARG2
|
|
+ |.else
|
|
+ | bl ->vm_sfi2d_1
|
|
+ |.endif
|
|
|4:
|
|
- | lfd f1, 0(RD)
|
|
+ | .FPU lfd f1, 0(RD)
|
|
|5:
|
|
+ |.if FPU
|
|
| fcmpu cr0, f0, f1
|
|
+ |.else
|
|
+ | blex __ledf2
|
|
+ | cmpwi CRET1, 0
|
|
+ |.endif
|
|
if (op == BC_ISLT) {
|
|
| bge <2
|
|
} else if (op == BC_ISGE) {
|
|
@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
vk = op == BC_ISEQV;
|
|
| // RA = src1*8, RD = src2*8, JMP with RD = target
|
|
|.if DUALNUM
|
|
- | lwzux TMP0, RA, BASE
|
|
+ | lwzux CARG1, RA, BASE
|
|
| addi PC, PC, 4
|
|
| lwz CARG2, 4(RA)
|
|
- | lwzux TMP1, RD, BASE
|
|
- | checknum cr0, TMP0
|
|
- | lwz TMP2, -4(PC)
|
|
- | checknum cr1, TMP1
|
|
- | decode_RD4 TMP2, TMP2
|
|
- | lwz CARG3, 4(RD)
|
|
+ | lwzux CARG3, RD, BASE
|
|
+ | checknum cr0, CARG1
|
|
+ | lwz SAVE0, -4(PC)
|
|
+ | checknum cr1, CARG3
|
|
+ | decode_RD4 SAVE0, SAVE0
|
|
+ | lwz CARG4, 4(RD)
|
|
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
|
|
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
|
|
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
|
|
if (vk) {
|
|
| ble cr7, ->BC_ISEQN_Z
|
|
} else {
|
|
| ble cr7, ->BC_ISNEN_Z
|
|
}
|
|
|.else
|
|
- | lwzux TMP0, RA, BASE
|
|
- | lwz TMP2, 0(PC)
|
|
+ | lwzux CARG1, RA, BASE
|
|
+ | lwz SAVE0, 0(PC)
|
|
| lfd f0, 0(RA)
|
|
| addi PC, PC, 4
|
|
- | lwzux TMP1, RD, BASE
|
|
- | checknum cr0, TMP0
|
|
- | decode_RD4 TMP2, TMP2
|
|
+ | lwzux CARG3, RD, BASE
|
|
+ | checknum cr0, CARG1
|
|
+ | decode_RD4 SAVE0, SAVE0
|
|
| lfd f1, 0(RD)
|
|
- | checknum cr1, TMP1
|
|
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
|
|
+ | checknum cr1, CARG3
|
|
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
|
|
| bge cr0, >5
|
|
| bge cr1, >5
|
|
| fcmpu cr0, f0, f1
|
|
if (vk) {
|
|
| bne >1
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
} else {
|
|
| beq >1
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
}
|
|
|1:
|
|
| ins_next
|
|
@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|5: // Either or both types are not numbers.
|
|
|.if not DUALNUM
|
|
| lwz CARG2, 4(RA)
|
|
- | lwz CARG3, 4(RD)
|
|
+ | lwz CARG4, 4(RD)
|
|
|.endif
|
|
|.if FFI
|
|
- | cmpwi cr7, TMP0, LJ_TCDATA
|
|
- | cmpwi cr5, TMP1, LJ_TCDATA
|
|
+ | cmpwi cr7, CARG1, LJ_TCDATA
|
|
+ | cmpwi cr5, CARG3, LJ_TCDATA
|
|
|.endif
|
|
- | not TMP3, TMP0
|
|
- | cmplw TMP0, TMP1
|
|
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
|
|
+ | not TMP2, CARG1
|
|
+ | cmplw CARG1, CARG3
|
|
+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
|
|
|.if FFI
|
|
| cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
|
|
|.endif
|
|
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
|
|
+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
|
|
|.if FFI
|
|
| beq cr7, ->vmeta_equal_cd
|
|
|.endif
|
|
- | cmplw cr5, CARG2, CARG3
|
|
+ | cmplw cr5, CARG2, CARG4
|
|
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
|
|
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
|
|
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
|
|
- | mr SAVE0, PC
|
|
+ | mr SAVE1, PC
|
|
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
|
|
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
|
|
if (vk) {
|
|
| bne cr0, >6
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
|6:
|
|
} else {
|
|
| beq cr0, >6
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
|6:
|
|
}
|
|
|.if DUALNUM
|
|
@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|
|
|
| // Different tables or userdatas. Need to check __eq metamethod.
|
|
| // Field metatable must be at same offset for GCtab and GCudata!
|
|
+ | mr CARG3, CARG4
|
|
| lwz TAB:TMP2, TAB:CARG2->metatable
|
|
| li CARG4, 1-vk // ne = 0 or 1.
|
|
| cmplwi TAB:TMP2, 0
|
|
@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lbz TMP2, TAB:TMP2->nomm
|
|
| andix. TMP2, TMP2, 1<<MM_eq
|
|
| bne <1 // Or 'no __eq' flag set?
|
|
- | mr PC, SAVE0 // Restore old PC.
|
|
+ | mr PC, SAVE1 // Restore old PC.
|
|
| b ->vmeta_equal // Handle __eq metamethod.
|
|
break;
|
|
|
|
@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
vk = op == BC_ISEQN;
|
|
| // RA = src*8, RD = num_const*8, JMP with RD = target
|
|
|.if DUALNUM
|
|
- | lwzux TMP0, RA, BASE
|
|
+ | lwzux CARG1, RA, BASE
|
|
| addi PC, PC, 4
|
|
| lwz CARG2, 4(RA)
|
|
- | lwzux TMP1, RD, KBASE
|
|
- | checknum cr0, TMP0
|
|
- | lwz TMP2, -4(PC)
|
|
- | checknum cr1, TMP1
|
|
- | decode_RD4 TMP2, TMP2
|
|
- | lwz CARG3, 4(RD)
|
|
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
|
|
+ | lwzux CARG3, RD, KBASE
|
|
+ | checknum cr0, CARG1
|
|
+ | lwz SAVE0, -4(PC)
|
|
+ | checknum cr1, CARG3
|
|
+ | decode_RD4 SAVE0, SAVE0
|
|
+ | lwz CARG4, 4(RD)
|
|
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
|
|
if (vk) {
|
|
|->BC_ISEQN_Z:
|
|
} else {
|
|
@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
}
|
|
| bne cr0, >7
|
|
| bne cr1, >8
|
|
- | cmpw CARG2, CARG3
|
|
+ | cmpw CARG2, CARG4
|
|
|4:
|
|
|.else
|
|
if (vk) {
|
|
@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
} else {
|
|
|->BC_ISNEN_Z: // Dummy label.
|
|
}
|
|
- | lwzx TMP0, BASE, RA
|
|
+ | lwzx CARG1, BASE, RA
|
|
| addi PC, PC, 4
|
|
| lfdx f0, BASE, RA
|
|
- | lwz TMP2, -4(PC)
|
|
+ | lwz SAVE0, -4(PC)
|
|
| lfdx f1, KBASE, RD
|
|
- | decode_RD4 TMP2, TMP2
|
|
- | checknum TMP0
|
|
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
|
|
+ | decode_RD4 SAVE0, SAVE0
|
|
+ | checknum CARG1
|
|
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
|
|
| bge >3
|
|
| fcmpu cr0, f0, f1
|
|
|.endif
|
|
if (vk) {
|
|
| bne >1
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
|1:
|
|
|.if not FFI
|
|
|3:
|
|
@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|.if not FFI
|
|
|3:
|
|
|.endif
|
|
- | add PC, PC, TMP2
|
|
+ | add PC, PC, SAVE0
|
|
|2:
|
|
}
|
|
| ins_next
|
|
|.if FFI
|
|
|3:
|
|
- | cmpwi TMP0, LJ_TCDATA
|
|
+ | cmpwi CARG1, LJ_TCDATA
|
|
| beq ->vmeta_equal_cd
|
|
| b <1
|
|
|.endif
|
|
@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|7: // RA is not an integer.
|
|
| bge cr0, <3
|
|
| // RA is a number.
|
|
- | lfd f0, 0(RA)
|
|
+ | .FPU lfd f0, 0(RA)
|
|
| blt cr1, >1
|
|
| // RA is a number, RD is an integer.
|
|
- | tonum_i f1, CARG3
|
|
+ |.if FPU
|
|
+ | tonum_i f1, CARG4
|
|
+ |.else
|
|
+ | bl ->vm_sfi2d_2
|
|
+ |.endif
|
|
| b >2
|
|
|
|
|
|8: // RA is an integer, RD is a number.
|
|
+ |.if FPU
|
|
| tonum_i f0, CARG2
|
|
+ |.else
|
|
+ | bl ->vm_sfi2d_1
|
|
+ |.endif
|
|
|1:
|
|
- | lfd f1, 0(RD)
|
|
+ | .FPU lfd f1, 0(RD)
|
|
|2:
|
|
+ |.if FPU
|
|
| fcmpu cr0, f0, f1
|
|
+ |.else
|
|
+ | blex __ledf2
|
|
+ | cmpwi CRET1, 0
|
|
+ |.endif
|
|
| b <4
|
|
|.endif
|
|
break;
|
|
@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| add PC, PC, TMP2
|
|
} else {
|
|
| li TMP1, LJ_TFALSE
|
|
+ |.if FPU
|
|
| lfdx f0, BASE, RD
|
|
+ |.else
|
|
+ | lwzux CARG1, RD, BASE
|
|
+ | lwz CARG2, 4(RD)
|
|
+ |.endif
|
|
| cmplw TMP0, TMP1
|
|
if (op == BC_ISTC) {
|
|
| bge >1
|
|
@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
}
|
|
| addis PC, PC, -(BCBIAS_J*4 >> 16)
|
|
| decode_RD4 TMP2, INS
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | stwux CARG1, RA, BASE
|
|
+ | stw CARG2, 4(RA)
|
|
+ |.endif
|
|
| add PC, PC, TMP2
|
|
|1:
|
|
}
|
|
@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
case BC_MOV:
|
|
| // RA = dst*8, RD = src*8
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| lfdx f0, BASE, RD
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | lwzux TMP0, RD, BASE
|
|
+ | lwz TMP1, 4(RD)
|
|
+ | stwux TMP0, RA, BASE
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
case BC_NOT:
|
|
@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
|
|
||switch (vk) {
|
|
||case 0:
|
|
- | lwzx TMP1, BASE, RB
|
|
+ | lwzx CARG1, BASE, RB
|
|
| .if DUALNUM
|
|
- | lwzx TMP2, KBASE, RC
|
|
+ | lwzx CARG3, KBASE, RC
|
|
| .endif
|
|
+ | .if FPU
|
|
| lfdx f14, BASE, RB
|
|
| lfdx f15, KBASE, RC
|
|
+ | .else
|
|
+ | add TMP1, BASE, RB
|
|
+ | add TMP2, KBASE, RC
|
|
+ | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG4, 4(TMP2)
|
|
+ | .endif
|
|
| .if DUALNUM
|
|
- | checknum cr0, TMP1
|
|
- | checknum cr1, TMP2
|
|
+ | checknum cr0, CARG1
|
|
+ | checknum cr1, CARG3
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| bge ->vmeta_arith_vn
|
|
| .else
|
|
- | checknum TMP1; bge ->vmeta_arith_vn
|
|
+ | checknum CARG1; bge ->vmeta_arith_vn
|
|
| .endif
|
|
|| break;
|
|
||case 1:
|
|
- | lwzx TMP1, BASE, RB
|
|
+ | lwzx CARG1, BASE, RB
|
|
| .if DUALNUM
|
|
- | lwzx TMP2, KBASE, RC
|
|
+ | lwzx CARG3, KBASE, RC
|
|
| .endif
|
|
+ | .if FPU
|
|
| lfdx f15, BASE, RB
|
|
| lfdx f14, KBASE, RC
|
|
+ | .else
|
|
+ | add TMP1, BASE, RB
|
|
+ | add TMP2, KBASE, RC
|
|
+ | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG4, 4(TMP2)
|
|
+ | .endif
|
|
| .if DUALNUM
|
|
- | checknum cr0, TMP1
|
|
- | checknum cr1, TMP2
|
|
+ | checknum cr0, CARG1
|
|
+ | checknum cr1, CARG3
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| bge ->vmeta_arith_nv
|
|
| .else
|
|
- | checknum TMP1; bge ->vmeta_arith_nv
|
|
+ | checknum CARG1; bge ->vmeta_arith_nv
|
|
| .endif
|
|
|| break;
|
|
||default:
|
|
- | lwzx TMP1, BASE, RB
|
|
- | lwzx TMP2, BASE, RC
|
|
+ | lwzx CARG1, BASE, RB
|
|
+ | lwzx CARG3, BASE, RC
|
|
+ | .if FPU
|
|
| lfdx f14, BASE, RB
|
|
| lfdx f15, BASE, RC
|
|
- | checknum cr0, TMP1
|
|
- | checknum cr1, TMP2
|
|
+ | .else
|
|
+ | add TMP1, BASE, RB
|
|
+ | add TMP2, BASE, RC
|
|
+ | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG4, 4(TMP2)
|
|
+ | .endif
|
|
+ | checknum cr0, CARG1
|
|
+ | checknum cr1, CARG3
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| bge ->vmeta_arith_vv
|
|
|| break;
|
|
@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| fsub a, b, a // b - floor(b/c)*c
|
|
|.endmacro
|
|
|
|
|
+ |.macro sfpmod
|
|
+ |->BC_MODVN_Z:
|
|
+ | stw CARG1, SFSAVE_1
|
|
+ | stw CARG2, SFSAVE_2
|
|
+ | mr SAVE0, CARG3
|
|
+ | mr SAVE1, CARG4
|
|
+ | blex __divdf3
|
|
+ | blex floor
|
|
+ | mr CARG3, SAVE0
|
|
+ | mr CARG4, SAVE1
|
|
+ | blex __muldf3
|
|
+ | mr CARG3, CRET1
|
|
+ | mr CARG4, CRET2
|
|
+ | lwz CARG1, SFSAVE_1
|
|
+ | lwz CARG2, SFSAVE_2
|
|
+ | blex __subdf3
|
|
+ |.endmacro
|
|
+ |
|
|
|.macro ins_arithfp, fpins
|
|
| ins_arithpre
|
|
|.if "fpins" == "fpmod_"
|
|
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|
|
- |.else
|
|
+ |.elif FPU
|
|
| fpins f0, f14, f15
|
|
| ins_next1
|
|
| stfdx f0, BASE, RA
|
|
| ins_next2
|
|
+ |.else
|
|
+ | blex __divdf3 // Only soft-float div uses this macro.
|
|
+ | ins_next1
|
|
+ | stwux CRET1, RA, BASE
|
|
+ | stw CRET2, 4(RA)
|
|
+ | ins_next2
|
|
|.endif
|
|
|.endmacro
|
|
|
|
|
- |.macro ins_arithdn, intins, fpins
|
|
+ |.macro ins_arithdn, intins, fpins, fpcall
|
|
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
|
|
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
|
|
||switch (vk) {
|
|
||case 0:
|
|
- | lwzux TMP1, RB, BASE
|
|
- | lwzux TMP2, RC, KBASE
|
|
- | lwz CARG1, 4(RB)
|
|
- | checknum cr0, TMP1
|
|
- | lwz CARG2, 4(RC)
|
|
+ | lwzux CARG1, RB, BASE
|
|
+ | lwzux CARG3, RC, KBASE
|
|
+ | lwz CARG2, 4(RB)
|
|
+ | checknum cr0, CARG1
|
|
+ | lwz CARG4, 4(RC)
|
|
+ | checknum cr1, CARG3
|
|
|| break;
|
|
||case 1:
|
|
- | lwzux TMP1, RB, BASE
|
|
- | lwzux TMP2, RC, KBASE
|
|
- | lwz CARG2, 4(RB)
|
|
- | checknum cr0, TMP1
|
|
- | lwz CARG1, 4(RC)
|
|
+ | lwzux CARG3, RB, BASE
|
|
+ | lwzux CARG1, RC, KBASE
|
|
+ | lwz CARG4, 4(RB)
|
|
+ | checknum cr0, CARG3
|
|
+ | lwz CARG2, 4(RC)
|
|
+ | checknum cr1, CARG1
|
|
|| break;
|
|
||default:
|
|
- | lwzux TMP1, RB, BASE
|
|
- | lwzux TMP2, RC, BASE
|
|
- | lwz CARG1, 4(RB)
|
|
- | checknum cr0, TMP1
|
|
- | lwz CARG2, 4(RC)
|
|
+ | lwzux CARG1, RB, BASE
|
|
+ | lwzux CARG3, RC, BASE
|
|
+ | lwz CARG2, 4(RB)
|
|
+ | checknum cr0, CARG1
|
|
+ | lwz CARG4, 4(RC)
|
|
+ | checknum cr1, CARG3
|
|
|| break;
|
|
||}
|
|
- | checknum cr1, TMP2
|
|
| bne >5
|
|
| bne cr1, >5
|
|
- | intins CARG1, CARG1, CARG2
|
|
+ |.if "intins" == "intmod"
|
|
+ | mr CARG1, CARG2
|
|
+ | mr CARG2, CARG4
|
|
+ |.endif
|
|
+ | intins CARG1, CARG2, CARG4
|
|
| bso >4
|
|
|1:
|
|
| ins_next1
|
|
@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| checkov TMP0, <1 // Ignore unrelated overflow.
|
|
| ins_arithfallback b
|
|
|5: // FP variant.
|
|
+ |.if FPU
|
|
||if (vk == 1) {
|
|
| lfd f15, 0(RB)
|
|
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| lfd f14, 0(RC)
|
|
||} else {
|
|
| lfd f14, 0(RB)
|
|
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| lfd f15, 0(RC)
|
|
||}
|
|
+ |.endif
|
|
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| ins_arithfallback bge
|
|
|.if "fpins" == "fpmod_"
|
|
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|
|
|.else
|
|
+ |.if FPU
|
|
| fpins f0, f14, f15
|
|
- | ins_next1
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ |.if "fpcall" == "sfpmod"
|
|
+ | sfpmod
|
|
+ |.else
|
|
+ | blex fpcall
|
|
+ |.endif
|
|
+ | stwux CRET1, RA, BASE
|
|
+ | stw CRET2, 4(RA)
|
|
+ |.endif
|
|
+ | ins_next1
|
|
| b <2
|
|
|.endif
|
|
|.endmacro
|
|
|
|
|
- |.macro ins_arith, intins, fpins
|
|
+ |.macro ins_arith, intins, fpins, fpcall
|
|
|.if DUALNUM
|
|
- | ins_arithdn intins, fpins
|
|
+ | ins_arithdn intins, fpins, fpcall
|
|
|.else
|
|
| ins_arithfp fpins
|
|
|.endif
|
|
@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| addo. TMP0, TMP0, TMP3
|
|
| add y, a, b
|
|
|.endmacro
|
|
- | ins_arith addo32., fadd
|
|
+ | ins_arith addo32., fadd, __adddf3
|
|
|.else
|
|
- | ins_arith addo., fadd
|
|
+ | ins_arith addo., fadd, __adddf3
|
|
|.endif
|
|
break;
|
|
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
|
|
@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| subo. TMP0, TMP0, TMP3
|
|
| sub y, a, b
|
|
|.endmacro
|
|
- | ins_arith subo32., fsub
|
|
+ | ins_arith subo32., fsub, __subdf3
|
|
|.else
|
|
- | ins_arith subo., fsub
|
|
+ | ins_arith subo., fsub, __subdf3
|
|
|.endif
|
|
break;
|
|
case BC_MULVN: case BC_MULNV: case BC_MULVV:
|
|
- | ins_arith mullwo., fmul
|
|
+ | ins_arith mullwo., fmul, __muldf3
|
|
break;
|
|
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
|
|
| ins_arithfp fdiv
|
|
break;
|
|
case BC_MODVN:
|
|
- | ins_arith intmod, fpmod
|
|
+ | ins_arith intmod, fpmod, sfpmod
|
|
break;
|
|
case BC_MODNV: case BC_MODVV:
|
|
- | ins_arith intmod, fpmod_
|
|
+ | ins_arith intmod, fpmod_, sfpmod
|
|
break;
|
|
case BC_POW:
|
|
| // NYI: (partial) integer arithmetic.
|
|
- | lwzx TMP1, BASE, RB
|
|
+ | lwzx CARG1, BASE, RB
|
|
+ | lwzx CARG3, BASE, RC
|
|
+ |.if FPU
|
|
| lfdx FARG1, BASE, RB
|
|
- | lwzx TMP2, BASE, RC
|
|
| lfdx FARG2, BASE, RC
|
|
- | checknum cr0, TMP1
|
|
- | checknum cr1, TMP2
|
|
+ |.else
|
|
+ | add TMP1, BASE, RB
|
|
+ | add TMP2, BASE, RC
|
|
+ | lwz CARG2, 4(TMP1)
|
|
+ | lwz CARG4, 4(TMP2)
|
|
+ |.endif
|
|
+ | checknum cr0, CARG1
|
|
+ | checknum cr1, CARG3
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
| bge ->vmeta_arith_vv
|
|
| blex pow
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx FARG1, BASE, RA
|
|
+ |.else
|
|
+ | stwux CARG1, RA, BASE
|
|
+ | stw CARG2, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
|
|
@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lp BASE, L->base
|
|
| bne ->vmeta_binop
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | lwzux TMP0, SAVE0, BASE
|
|
+ | lwz TMP1, 4(SAVE0)
|
|
+ | stwux TMP0, RA, BASE
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
|
|
@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
case BC_KNUM:
|
|
| // RA = dst*8, RD = num_const*8
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| lfdx f0, KBASE, RD
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | lwzux TMP0, RD, KBASE
|
|
+ | lwz TMP1, 4(RD)
|
|
+ | stwux TMP0, RA, BASE
|
|
+ | stw TMP1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
case BC_KPRI:
|
|
@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwzx UPVAL:RB, LFUNC:RB, RD
|
|
| ins_next1
|
|
| lwz TMP1, UPVAL:RB->v
|
|
+ |.if FPU
|
|
| lfd f0, 0(TMP1)
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | lwz TMP2, 0(TMP1)
|
|
+ | lwz TMP3, 4(TMP1)
|
|
+ | stwux TMP2, RA, BASE
|
|
+ | stw TMP3, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
case BC_USETV:
|
|
@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
|
| srwi RA, RA, 1
|
|
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
|
+ |.if FPU
|
|
| lfdux f0, RD, BASE
|
|
+ |.else
|
|
+ | lwzux CARG1, RD, BASE
|
|
+ | lwz CARG3, 4(RD)
|
|
+ |.endif
|
|
| lwzx UPVAL:RB, LFUNC:RB, RA
|
|
| lbz TMP3, UPVAL:RB->marked
|
|
| lwz CARG2, UPVAL:RB->v
|
|
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
|
|
| lbz TMP0, UPVAL:RB->closed
|
|
| lwz TMP2, 0(RD)
|
|
+ |.if FPU
|
|
| stfd f0, 0(CARG2)
|
|
+ |.else
|
|
+ | stw CARG1, 0(CARG2)
|
|
+ | stw CARG3, 4(CARG2)
|
|
+ |.endif
|
|
| cmplwi cr1, TMP0, 0
|
|
| lwz TMP1, 4(RD)
|
|
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
|
|
@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
|
| srwi RA, RA, 1
|
|
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
|
+ |.if FPU
|
|
| lfdx f0, KBASE, RD
|
|
+ |.else
|
|
+ | lwzux TMP2, RD, KBASE
|
|
+ | lwz TMP3, 4(RD)
|
|
+ |.endif
|
|
| lwzx UPVAL:RB, LFUNC:RB, RA
|
|
| ins_next1
|
|
| lwz TMP1, UPVAL:RB->v
|
|
+ |.if FPU
|
|
| stfd f0, 0(TMP1)
|
|
+ |.else
|
|
+ | stw TMP2, 0(TMP1)
|
|
+ | stw TMP3, 4(TMP1)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
case BC_USETP:
|
|
@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|.endif
|
|
| ble ->vmeta_tgetv // Integer key and in array part?
|
|
| lwzx TMP0, TMP1, TMP2
|
|
+ |.if FPU
|
|
| lfdx f14, TMP1, TMP2
|
|
+ |.else
|
|
+ | lwzux SAVE0, TMP1, TMP2
|
|
+ | lwz SAVE1, 4(TMP1)
|
|
+ |.endif
|
|
| checknil TMP0; beq >2
|
|
|1:
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | stwux SAVE0, RA, BASE
|
|
+ | stw SAVE1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
|
|
|
|2: // Check for __index if table value is nil.
|
|
@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwz TMP1, TAB:RB->asize
|
|
| lwz TMP2, TAB:RB->array
|
|
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
|
|
+ |.if FPU
|
|
| lwzx TMP1, TMP2, RC
|
|
| lfdx f0, TMP2, RC
|
|
+ |.else
|
|
+ | lwzux TMP1, TMP2, RC
|
|
+ | lwz TMP3, 4(TMP2)
|
|
+ |.endif
|
|
| checknil TMP1; beq >5
|
|
|1:
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, RA
|
|
+ |.else
|
|
+ | stwux TMP1, RA, BASE
|
|
+ | stw TMP3, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
|
|
|
|5: // Check for __index if table value is nil.
|
|
@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| cmplw TMP0, CARG2
|
|
| slwi TMP2, CARG2, 3
|
|
| ble ->vmeta_tgetr // In array part?
|
|
+ |.if FPU
|
|
| lfdx f14, TMP1, TMP2
|
|
+ |.else
|
|
+ | lwzux SAVE0, TMP2, TMP1
|
|
+ | lwz SAVE1, 4(TMP2)
|
|
+ |.endif
|
|
|->BC_TGETR_Z:
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | stwux SAVE0, RA, BASE
|
|
+ | stw SAVE1, 4(RA)
|
|
+ |.endif
|
|
| ins_next2
|
|
break;
|
|
|
|
@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| ble ->vmeta_tsetv // Integer key and in array part?
|
|
| lwzx TMP2, TMP1, TMP0
|
|
| lbz TMP3, TAB:RB->marked
|
|
+ |.if FPU
|
|
| lfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | add SAVE1, BASE, RA
|
|
+ | lwz SAVE0, 0(SAVE1)
|
|
+ | lwz SAVE1, 4(SAVE1)
|
|
+ |.endif
|
|
| checknil TMP2; beq >3
|
|
|1:
|
|
| andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ |.if FPU
|
|
| stfdx f14, TMP1, TMP0
|
|
+ |.else
|
|
+ | stwux SAVE0, TMP1, TMP0
|
|
+ | stw SAVE1, 4(TMP1)
|
|
+ |.endif
|
|
| bne >7
|
|
|2:
|
|
| ins_next
|
|
@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwz NODE:TMP2, TAB:RB->node
|
|
| stb ZERO, TAB:RB->nomm // Clear metamethod cache.
|
|
| and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
|
|
+ |.if FPU
|
|
| lfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | add CARG2, BASE, RA
|
|
+ | lwz SAVE0, 0(CARG2)
|
|
+ | lwz SAVE1, 4(CARG2)
|
|
+ |.endif
|
|
| slwi TMP0, TMP1, 5
|
|
| slwi TMP1, TMP1, 3
|
|
| sub TMP1, TMP0, TMP1
|
|
@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| checknil CARG2; beq >4 // Key found, but nil value?
|
|
|2:
|
|
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ |.if FPU
|
|
| stfd f14, NODE:TMP2->val
|
|
+ |.else
|
|
+ | stw SAVE0, NODE:TMP2->val.u32.hi
|
|
+ | stw SAVE1, NODE:TMP2->val.u32.lo
|
|
+ |.endif
|
|
| bne >7
|
|
|3:
|
|
| ins_next
|
|
@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
|
|
| // Returns TValue *.
|
|
| lp BASE, L->base
|
|
+ |.if FPU
|
|
| stfd f14, 0(CRET1)
|
|
+ |.else
|
|
+ | stw SAVE0, 0(CRET1)
|
|
+ | stw SAVE1, 4(CRET1)
|
|
+ |.endif
|
|
| b <3 // No 2nd write barrier needed.
|
|
|
|
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| lwz TMP2, TAB:RB->array
|
|
| lbz TMP3, TAB:RB->marked
|
|
| cmplw TMP0, TMP1
|
|
+ |.if FPU
|
|
| lfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | add CARG2, BASE, RA
|
|
+ | lwz SAVE0, 0(CARG2)
|
|
+ | lwz SAVE1, 4(CARG2)
|
|
+ |.endif
|
|
| bge ->vmeta_tsetb
|
|
| lwzx TMP1, TMP2, RC
|
|
| checknil TMP1; beq >5
|
|
|1:
|
|
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ |.if FPU
|
|
| stfdx f14, TMP2, RC
|
|
+ |.else
|
|
+ | stwux SAVE0, RC, TMP2
|
|
+ | stw SAVE1, 4(RC)
|
|
+ |.endif
|
|
| bne >7
|
|
|2:
|
|
| ins_next
|
|
@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|2:
|
|
| cmplw TMP0, CARG3
|
|
| slwi TMP2, CARG3, 3
|
|
+ |.if FPU
|
|
| lfdx f14, BASE, RA
|
|
+ |.else
|
|
+ | lwzux SAVE0, RA, BASE
|
|
+ | lwz SAVE1, 4(RA)
|
|
+ |.endif
|
|
| ble ->vmeta_tsetr // In array part?
|
|
| ins_next1
|
|
+ |.if FPU
|
|
| stfdx f14, TMP1, TMP2
|
|
+ |.else
|
|
+ | stwux SAVE0, TMP1, TMP2
|
|
+ | stw SAVE1, 4(TMP1)
|
|
+ |.endif
|
|
| ins_next2
|
|
|
|
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| add TMP1, TMP1, TMP0
|
|
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|
|
|3: // Copy result slots to table.
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
+ |.else
|
|
+ | lwz SAVE0, 0(RA)
|
|
+ | lwz SAVE1, 4(RA)
|
|
+ |.endif
|
|
| addi RA, RA, 8
|
|
| cmpw cr1, RA, TMP2
|
|
+ |.if FPU
|
|
| stfd f0, 0(TMP1)
|
|
+ |.else
|
|
+ | stw SAVE0, 0(TMP1)
|
|
+ | stw SAVE1, 4(TMP1)
|
|
+ |.endif
|
|
| addi TMP1, TMP1, 8
|
|
| blt cr1, <3
|
|
| bne >7
|
|
@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| beq cr1, >3
|
|
|2:
|
|
| addi TMP3, TMP2, 8
|
|
+ |.if FPU
|
|
| lfdx f0, RA, TMP2
|
|
+ |.else
|
|
+ | add CARG3, RA, TMP2
|
|
+ | lwz CARG1, 0(CARG3)
|
|
+ | lwz CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| cmplw cr1, TMP3, NARGS8:RC
|
|
+ |.if FPU
|
|
| stfdx f0, BASE, TMP2
|
|
+ |.else
|
|
+ | stwux CARG1, TMP2, BASE
|
|
+ | stw CARG2, 4(TMP2)
|
|
+ |.endif
|
|
| mr TMP2, TMP3
|
|
| bne cr1, <2
|
|
|3:
|
|
@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| add BASE, BASE, RA
|
|
| lwz TMP1, -24(BASE)
|
|
| lwz LFUNC:RB, -20(BASE)
|
|
+ |.if FPU
|
|
| lfd f1, -8(BASE)
|
|
| lfd f0, -16(BASE)
|
|
+ |.else
|
|
+ | lwz CARG1, -8(BASE)
|
|
+ | lwz CARG2, -4(BASE)
|
|
+ | lwz CARG3, -16(BASE)
|
|
+ | lwz CARG4, -12(BASE)
|
|
+ |.endif
|
|
| stw TMP1, 0(BASE) // Copy callable.
|
|
| stw LFUNC:RB, 4(BASE)
|
|
| checkfunc TMP1
|
|
- | stfd f1, 16(BASE) // Copy control var.
|
|
| li NARGS8:RC, 16 // Iterators get 2 arguments.
|
|
+ |.if FPU
|
|
+ | stfd f1, 16(BASE) // Copy control var.
|
|
| stfdu f0, 8(BASE) // Copy state.
|
|
+ |.else
|
|
+ | stw CARG1, 16(BASE) // Copy control var.
|
|
+ | stw CARG2, 20(BASE)
|
|
+ | stwu CARG3, 8(BASE) // Copy state.
|
|
+ | stw CARG4, 4(BASE)
|
|
+ |.endif
|
|
| bne ->vmeta_call
|
|
| ins_call
|
|
break;
|
|
@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| slwi TMP3, RC, 3
|
|
| bge >5 // Index points after array part?
|
|
| lwzx TMP2, TMP1, TMP3
|
|
+ |.if FPU
|
|
| lfdx f0, TMP1, TMP3
|
|
+ |.else
|
|
+ | lwzux CARG1, TMP3, TMP1
|
|
+ | lwz CARG2, 4(TMP3)
|
|
+ |.endif
|
|
| checknil TMP2
|
|
| lwz INS, -4(PC)
|
|
| beq >4
|
|
@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|.endif
|
|
| addi RC, RC, 1
|
|
| addis TMP3, PC, -(BCBIAS_J*4 >> 16)
|
|
+ |.if FPU
|
|
| stfd f0, 8(RA)
|
|
+ |.else
|
|
+ | stw CARG1, 8(RA)
|
|
+ | stw CARG2, 12(RA)
|
|
+ |.endif
|
|
| decode_RD4 TMP1, INS
|
|
| stw RC, -4(RA) // Update control var.
|
|
| add PC, TMP1, TMP3
|
|
@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| slwi RB, RC, 3
|
|
| sub TMP3, TMP3, RB
|
|
| lwzx RB, TMP2, TMP3
|
|
+ |.if FPU
|
|
| lfdx f0, TMP2, TMP3
|
|
+ |.else
|
|
+ | add CARG3, TMP2, TMP3
|
|
+ | lwz CARG1, 0(CARG3)
|
|
+ | lwz CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| add NODE:TMP3, TMP2, TMP3
|
|
| checknil RB
|
|
| lwz INS, -4(PC)
|
|
| beq >7
|
|
+ |.if FPU
|
|
| lfd f1, NODE:TMP3->key
|
|
+ |.else
|
|
+ | lwz CARG3, NODE:TMP3->key.u32.hi
|
|
+ | lwz CARG4, NODE:TMP3->key.u32.lo
|
|
+ |.endif
|
|
| addis TMP2, PC, -(BCBIAS_J*4 >> 16)
|
|
+ |.if FPU
|
|
| stfd f0, 8(RA)
|
|
+ |.else
|
|
+ | stw CARG1, 8(RA)
|
|
+ | stw CARG2, 12(RA)
|
|
+ |.endif
|
|
| add RC, RC, TMP0
|
|
| decode_RD4 TMP1, INS
|
|
+ |.if FPU
|
|
| stfd f1, 0(RA)
|
|
+ |.else
|
|
+ | stw CARG3, 0(RA)
|
|
+ | stw CARG4, 4(RA)
|
|
+ |.endif
|
|
| addi RC, RC, 1
|
|
| add PC, TMP1, TMP2
|
|
| stw RC, -4(RA) // Update control var.
|
|
@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| subi TMP2, TMP2, 16
|
|
| ble >2 // No vararg slots?
|
|
|1: // Copy vararg slots to destination slots.
|
|
+ |.if FPU
|
|
| lfd f0, 0(RC)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RC)
|
|
+ | lwz CARG2, 4(RC)
|
|
+ |.endif
|
|
| addi RC, RC, 8
|
|
+ |.if FPU
|
|
| stfd f0, 0(RA)
|
|
+ |.else
|
|
+ | stw CARG1, 0(RA)
|
|
+ | stw CARG2, 4(RA)
|
|
+ |.endif
|
|
| cmplw RA, TMP2
|
|
| cmplw cr1, RC, TMP3
|
|
| bge >3 // All destination slots filled?
|
|
@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| addi MULTRES, TMP1, 8
|
|
| bgt >7
|
|
|6:
|
|
+ |.if FPU
|
|
| lfd f0, 0(RC)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RC)
|
|
+ | lwz CARG2, 4(RC)
|
|
+ |.endif
|
|
| addi RC, RC, 8
|
|
+ |.if FPU
|
|
| stfd f0, 0(RA)
|
|
+ |.else
|
|
+ | stw CARG1, 0(RA)
|
|
+ | stw CARG2, 4(RA)
|
|
+ |.endif
|
|
| cmplw RC, TMP3
|
|
| addi RA, RA, 8
|
|
| blt <6 // More vararg slots?
|
|
@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| li TMP1, 0
|
|
|2:
|
|
| addi TMP3, TMP1, 8
|
|
+ |.if FPU
|
|
| lfdx f0, RA, TMP1
|
|
+ |.else
|
|
+ | add CARG3, RA, TMP1
|
|
+ | lwz CARG1, 0(CARG3)
|
|
+ | lwz CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| cmpw TMP3, RC
|
|
+ |.if FPU
|
|
| stfdx f0, TMP2, TMP1
|
|
+ |.else
|
|
+ | add CARG3, TMP2, TMP1
|
|
+ | stw CARG1, 0(CARG3)
|
|
+ | stw CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| beq >3
|
|
| addi TMP1, TMP3, 8
|
|
+ |.if FPU
|
|
| lfdx f1, RA, TMP3
|
|
+ |.else
|
|
+ | add CARG3, RA, TMP3
|
|
+ | lwz CARG1, 0(CARG3)
|
|
+ | lwz CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| cmpw TMP1, RC
|
|
+ |.if FPU
|
|
| stfdx f1, TMP2, TMP3
|
|
+ |.else
|
|
+ | add CARG3, TMP2, TMP3
|
|
+ | stw CARG1, 0(CARG3)
|
|
+ | stw CARG2, 4(CARG3)
|
|
+ |.endif
|
|
| bne <2
|
|
|3:
|
|
|5:
|
|
@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
| subi TMP2, BASE, 8
|
|
| decode_RB8 RB, INS
|
|
if (op == BC_RET1) {
|
|
+ |.if FPU
|
|
| lfd f0, 0(RA)
|
|
| stfd f0, 0(TMP2)
|
|
+ |.else
|
|
+ | lwz CARG1, 0(RA)
|
|
+ | lwz CARG2, 4(RA)
|
|
+ | stw CARG1, 0(TMP2)
|
|
+ | stw CARG2, 4(TMP2)
|
|
+ |.endif
|
|
}
|
|
|5:
|
|
| cmplw RB, RD
|
|
@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
|4:
|
|
| stw CARG1, FORL_IDX*8+4(RA)
|
|
} else {
|
|
- | lwz TMP3, FORL_STEP*8(RA)
|
|
+ | lwz SAVE0, FORL_STEP*8(RA)
|
|
| lwz CARG3, FORL_STEP*8+4(RA)
|
|
| lwz TMP2, FORL_STOP*8(RA)
|
|
| lwz CARG2, FORL_STOP*8+4(RA)
|
|
- | cmplw cr7, TMP3, TISNUM
|
|
+ | cmplw cr7, SAVE0, TISNUM
|
|
| cmplw cr1, TMP2, TISNUM
|
|
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
|
|
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
|
|
@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCO
|
|
if (vk) {
|
|
|.if DUALNUM
|
|
|9: // FP loop.
|
|
+ |.if FPU
|
|
| lfd f1, FORL_IDX*8(RA)
|
|
|.else
|
|
+ | lwz CARG1, FORL_IDX*8(RA)
|
|
+ | lwz CARG2, FORL_IDX*8+4(RA)
|
|
+ |.endif
|
|
+ |.else
|
|
| lfdux f1, RA, BASE
|
|
|.endif
|
|
+ |.if FPU
|
|
| lfd f3, FORL_STEP*8(RA)
|
|
| lfd f2, FORL_STOP*8(RA)
|
|
- | lwz TMP3, FORL_STEP*8(RA)
|
|
| fadd f1, f1, f3
|
|
| stfd f1, FORL_IDX*8(RA)
|
|
+ |.else
|
|
+ | lwz CARG3, FORL_STEP*8(RA)
|
|
+ | lwz CARG4, FORL_STEP*8+4(RA)
|
|
+ | mr SAVE1, RD
|
|
+ | blex __adddf3
|
|
+ | mr RD, SAVE1
|
|
+ | stw CRET1, FORL_IDX*8(RA)
|
|
+ | stw CRET2, FORL_IDX*8+4(RA)
|
|
+ | lwz CARG3, FORL_STOP*8(RA)
|
|
+ | lwz CARG4, FORL_STOP*8+4(RA)
|
|
+ |.endif
|
|
+ | lwz SAVE0, FORL_STEP*8(RA)
|
|
} else {
|
|
|.if DUALNUM
|
|
|9: // FP loop.
|
|
|.else
|
|
| lwzux TMP1, RA, BASE
|
|
- | lwz TMP3, FORL_STEP*8(RA)
|
|
+ | lwz SAVE0, FORL_STEP*8(RA)
|
|
| lwz TMP2, FORL_STOP*8(RA)
|
|
| cmplw cr0, TMP1, TISNUM
|
|
- | cmplw cr7, TMP3, TISNUM
|
|
+ | cmplw cr7, SAVE0, TISNUM
|
|
| cmplw cr1, TMP2, TISNUM
|
|
|.endif
|
|
+ |.if FPU
|
|
| lfd f1, FORL_IDX*8(RA)
|
|
+ |.else
|
|
+ | lwz CARG1, FORL_IDX*8(RA)
|
|
+ | lwz CARG2, FORL_IDX*8+4(RA)
|
|
+ |.endif
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
|
|
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
|
+ |.if FPU
|
|
| lfd f2, FORL_STOP*8(RA)
|
|
+ |.else
|
|
+ | lwz CARG3, FORL_STOP*8(RA)
|
|
+ | lwz CARG4, FORL_STOP*8+4(RA)
|
|
+ |.endif
|
|
| bge ->vmeta_for
|
|
}
|
|
- | cmpwi cr6, TMP3, 0
|
|
+ | cmpwi cr6, SAVE0, 0
|
|
if (op != BC_JFORL) {
|
|
| srwi RD, RD, 1
|
|
}
|
|
+ |.if FPU
|
|
| stfd f1, FORL_EXT*8(RA)
|
|
+ |.else
|
|
+ | stw CARG1, FORL_EXT*8(RA)
|
|
+ | stw CARG2, FORL_EXT*8+4(RA)
|
|
+ |.endif
|
|
if (op != BC_JFORL) {
|
|
| add RD, PC, RD
|
|
}
|
|
+ |.if FPU
|
|
| fcmpu cr0, f1, f2
|
|
+ |.else
|
|
+ | mr SAVE1, RD
|
|
+ | blex __ledf2
|
|
+ | cmpwi CRET1, 0
|
|
+ | mr RD, SAVE1
|
|
+ |.endif
|
|
if (op == BC_JFORI) {
|
|
| addis PC, RD, -(BCBIAS_J*4 >> 16)
|
|
}
|