You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2742 lines
72 KiB

  1. From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
  2. From: Mike Pall <mike>
  3. Date: Wed, 26 Jul 2017 09:52:19 +0200
  4. Subject: [PATCH] PPC: Add soft-float support to interpreter.
  5. Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
  6. Sponsored by Cisco Systems, Inc.
  7. ---
  8. src/host/buildvm_asm.c | 2 +-
  9. src/lj_arch.h | 29 +-
  10. src/lj_ccall.c | 38 +-
  11. src/lj_ccall.h | 4 +-
  12. src/lj_ccallback.c | 30 +-
  13. src/lj_frame.h | 2 +-
  14. src/lj_ircall.h | 2 +-
  15. src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
  16. 8 files changed, 1101 insertions(+), 255 deletions(-)
  17. --- a/src/host/buildvm_asm.c
  18. +++ b/src/host/buildvm_asm.c
  19. @@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
  20. #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
  21. fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
  22. #endif
  23. -#if LJ_TARGET_PPC && !LJ_TARGET_PS3
  24. +#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
  25. /* Hard-float ABI. */
  26. fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
  27. #endif
  28. --- a/src/lj_arch.h
  29. +++ b/src/lj_arch.h
  30. @@ -254,6 +254,29 @@
  31. #else
  32. #define LJ_ARCH_BITS 32
  33. #define LJ_ARCH_NAME "ppc"
  34. +
  35. +#if !defined(LJ_ARCH_HASFPU)
  36. +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
  37. +#define LJ_ARCH_HASFPU 0
  38. +#else
  39. +#define LJ_ARCH_HASFPU 1
  40. +#endif
  41. +#endif
  42. +
  43. +#if !defined(LJ_ABI_SOFTFP)
  44. +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
  45. +#define LJ_ABI_SOFTFP 1
  46. +#else
  47. +#define LJ_ABI_SOFTFP 0
  48. +#endif
  49. +#endif
  50. +#endif
  51. +
  52. +#if LJ_ABI_SOFTFP
  53. +#define LJ_ARCH_NOJIT 1 /* NYI */
  54. +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
  55. +#else
  56. +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
  57. #endif
  58. #define LJ_TARGET_PPC 1
  59. @@ -262,7 +285,6 @@
  60. #define LJ_TARGET_MASKSHIFT 0
  61. #define LJ_TARGET_MASKROT 1
  62. #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
  63. -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
  64. #if LJ_TARGET_CONSOLE
  65. #define LJ_ARCH_PPC32ON64 1
  66. @@ -415,16 +437,13 @@
  67. #error "No support for ILP32 model on ARM64"
  68. #endif
  69. #elif LJ_TARGET_PPC
  70. -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
  71. -#error "No support for PowerPC CPUs without double-precision FPU"
  72. -#endif
  73. #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
  74. #error "No support for little-endian PPC32"
  75. #endif
  76. #if LJ_ARCH_PPC64
  77. #error "No support for PowerPC 64 bit mode (yet)"
  78. #endif
  79. -#ifdef __NO_FPRS__
  80. +#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
  81. #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
  82. #endif
  83. #elif LJ_TARGET_MIPS32
  84. --- a/src/lj_ccall.c
  85. +++ b/src/lj_ccall.c
  86. @@ -387,6 +387,24 @@
  87. #define CCALL_HANDLE_COMPLEXARG \
  88. /* Pass complex by value in 2 or 4 GPRs. */
  89. +#define CCALL_HANDLE_GPR \
  90. + /* Try to pass argument in GPRs. */ \
  91. + if (n > 1) { \
  92. + lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
  93. + if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
  94. + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
  95. + else if (ngpr + n > maxgpr) \
  96. + ngpr = maxgpr; /* Prevent reordering. */ \
  97. + } \
  98. + if (ngpr + n <= maxgpr) { \
  99. + dp = &cc->gpr[ngpr]; \
  100. + ngpr += n; \
  101. + goto done; \
  102. + } \
  103. +
  104. +#if LJ_ABI_SOFTFP
  105. +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
  106. +#else
  107. #define CCALL_HANDLE_REGARG \
  108. if (isfp) { /* Try to pass argument in FPRs. */ \
  109. if (nfpr + 1 <= CCALL_NARG_FPR) { \
  110. @@ -395,24 +413,16 @@
  111. d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
  112. goto done; \
  113. } \
  114. - } else { /* Try to pass argument in GPRs. */ \
  115. - if (n > 1) { \
  116. - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
  117. - if (ctype_isinteger(d->info)) \
  118. - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
  119. - else if (ngpr + n > maxgpr) \
  120. - ngpr = maxgpr; /* Prevent reordering. */ \
  121. - } \
  122. - if (ngpr + n <= maxgpr) { \
  123. - dp = &cc->gpr[ngpr]; \
  124. - ngpr += n; \
  125. - goto done; \
  126. - } \
  127. + } else { \
  128. + CCALL_HANDLE_GPR \
  129. }
  130. +#endif
  131. +#if !LJ_ABI_SOFTFP
  132. #define CCALL_HANDLE_RET \
  133. if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
  134. ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
  135. +#endif
  136. #elif LJ_TARGET_MIPS32
  137. /* -- MIPS o32 calling conventions ---------------------------------------- */
  138. @@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L,
  139. }
  140. if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
  141. -#if LJ_TARGET_X64 || LJ_TARGET_PPC
  142. +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
  143. cc->nfpr = nfpr; /* Required for vararg functions. */
  144. #endif
  145. cc->nsp = nsp;
  146. --- a/src/lj_ccall.h
  147. +++ b/src/lj_ccall.h
  148. @@ -86,9 +86,9 @@ typedef union FPRArg {
  149. #elif LJ_TARGET_PPC
  150. #define CCALL_NARG_GPR 8
  151. -#define CCALL_NARG_FPR 8
  152. +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
  153. #define CCALL_NRET_GPR 4 /* For complex double. */
  154. -#define CCALL_NRET_FPR 1
  155. +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
  156. #define CCALL_SPS_EXTRA 4
  157. #define CCALL_SPS_FREE 0
  158. --- a/src/lj_ccallback.c
  159. +++ b/src/lj_ccallback.c
  160. @@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *ct
  161. #elif LJ_TARGET_PPC
  162. +#define CALLBACK_HANDLE_GPR \
  163. + if (n > 1) { \
  164. + lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
  165. + ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
  166. + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
  167. + } \
  168. + if (ngpr + n <= maxgpr) { \
  169. + sp = &cts->cb.gpr[ngpr]; \
  170. + ngpr += n; \
  171. + goto done; \
  172. + }
  173. +
  174. +#if LJ_ABI_SOFTFP
  175. +#define CALLBACK_HANDLE_REGARG \
  176. + CALLBACK_HANDLE_GPR \
  177. + UNUSED(isfp);
  178. +#else
  179. #define CALLBACK_HANDLE_REGARG \
  180. if (isfp) { \
  181. if (nfpr + 1 <= CCALL_NARG_FPR) { \
  182. @@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *ct
  183. goto done; \
  184. } \
  185. } else { /* Try to pass argument in GPRs. */ \
  186. - if (n > 1) { \
  187. - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
  188. - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
  189. - } \
  190. - if (ngpr + n <= maxgpr) { \
  191. - sp = &cts->cb.gpr[ngpr]; \
  192. - ngpr += n; \
  193. - goto done; \
  194. - } \
  195. + CALLBACK_HANDLE_GPR \
  196. }
  197. +#endif
  198. +#if !LJ_ABI_SOFTFP
  199. #define CALLBACK_HANDLE_RET \
  200. if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
  201. *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
  202. +#endif
  203. #elif LJ_TARGET_MIPS32
  204. --- a/src/lj_frame.h
  205. +++ b/src/lj_frame.h
  206. @@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
  207. #define CFRAME_OFS_L 36
  208. #define CFRAME_OFS_PC 32
  209. #define CFRAME_OFS_MULTRES 28
  210. -#define CFRAME_SIZE 272
  211. +#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
  212. #define CFRAME_SHIFT_MULTRES 3
  213. #endif
  214. #elif LJ_TARGET_MIPS32
  215. --- a/src/lj_ircall.h
  216. +++ b/src/lj_ircall.h
  217. @@ -272,7 +272,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[I
  218. #define fp64_f2l __aeabi_f2lz
  219. #define fp64_f2ul __aeabi_f2ulz
  220. #endif
  221. -#elif LJ_TARGET_MIPS
  222. +#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
  223. #define softfp_add __adddf3
  224. #define softfp_sub __subdf3
  225. #define softfp_mul __muldf3
  226. --- a/src/vm_ppc.dasc
  227. +++ b/src/vm_ppc.dasc
  228. @@ -103,6 +103,18 @@
  229. |// Fixed register assignments for the interpreter.
  230. |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
  231. |
  232. +|.macro .FPU, a, b
  233. +|.if FPU
  234. +| a, b
  235. +|.endif
  236. +|.endmacro
  237. +|
  238. +|.macro .FPU, a, b, c
  239. +|.if FPU
  240. +| a, b, c
  241. +|.endif
  242. +|.endmacro
  243. +|
  244. |// The following must be C callee-save (but BASE is often refetched).
  245. |.define BASE, r14 // Base of current Lua stack frame.
  246. |.define KBASE, r15 // Constants of current Lua function.
  247. @@ -116,8 +128,10 @@
  248. |.define TISNUM, r22
  249. |.define TISNIL, r23
  250. |.define ZERO, r24
  251. +|.if FPU
  252. |.define TOBIT, f30 // 2^52 + 2^51.
  253. |.define TONUM, f31 // 2^52 + 2^51 + 2^31.
  254. +|.endif
  255. |
  256. |// The following temporaries are not saved across C calls, except for RA.
  257. |.define RA, r20 // Callee-save.
  258. @@ -133,6 +147,7 @@
  259. |
  260. |// Saved temporaries.
  261. |.define SAVE0, r21
  262. +|.define SAVE1, r25
  263. |
  264. |// Calling conventions.
  265. |.define CARG1, r3
  266. @@ -141,8 +156,10 @@
  267. |.define CARG4, r6 // Overlaps TMP3.
  268. |.define CARG5, r7 // Overlaps INS.
  269. |
  270. +|.if FPU
  271. |.define FARG1, f1
  272. |.define FARG2, f2
  273. +|.endif
  274. |
  275. |.define CRET1, r3
  276. |.define CRET2, r4
  277. @@ -213,10 +230,16 @@
  278. |.endif
  279. |.else
  280. |
  281. +|.if FPU
  282. |.define SAVE_LR, 276(sp)
  283. |.define CFRAME_SPACE, 272 // Delta for sp.
  284. |// Back chain for sp: 272(sp) <-- sp entering interpreter
  285. |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
  286. +|.else
  287. +|.define SAVE_LR, 132(sp)
  288. +|.define CFRAME_SPACE, 128 // Delta for sp.
  289. +|// Back chain for sp: 128(sp) <-- sp entering interpreter
  290. +|.endif
  291. |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
  292. |.define SAVE_CR, 52(sp) // 32 bit CR save.
  293. |.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
  294. @@ -226,16 +249,25 @@
  295. |.define SAVE_PC, 32(sp)
  296. |.define SAVE_MULTRES, 28(sp)
  297. |.define UNUSED1, 24(sp)
  298. +|.if FPU
  299. |.define TMPD_LO, 20(sp)
  300. |.define TMPD_HI, 16(sp)
  301. |.define TONUM_LO, 12(sp)
  302. |.define TONUM_HI, 8(sp)
  303. +|.else
  304. +|.define SFSAVE_4, 20(sp)
  305. +|.define SFSAVE_3, 16(sp)
  306. +|.define SFSAVE_2, 12(sp)
  307. +|.define SFSAVE_1, 8(sp)
  308. +|.endif
  309. |// Next frame lr: 4(sp)
  310. |// Back chain for sp: 0(sp) <-- sp while in interpreter
  311. |
  312. +|.if FPU
  313. |.define TMPD_BLO, 23(sp)
  314. |.define TMPD, TMPD_HI
  315. |.define TONUM_D, TONUM_HI
  316. +|.endif
  317. |
  318. |.endif
  319. |
  320. @@ -245,7 +277,7 @@
  321. |.else
  322. | stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
  323. |.endif
  324. -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
  325. +| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
  326. |.endmacro
  327. |.macro rest_, reg
  328. |.if GPR64
  329. @@ -253,7 +285,7 @@
  330. |.else
  331. | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
  332. |.endif
  333. -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
  334. +| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
  335. |.endmacro
  336. |
  337. |.macro saveregs
  338. @@ -323,6 +355,7 @@
  339. |// Trap for not-yet-implemented parts.
  340. |.macro NYI; tw 4, sp, sp; .endmacro
  341. |
  342. +|.if FPU
  343. |// int/FP conversions.
  344. |.macro tonum_i, freg, reg
  345. | xoris reg, reg, 0x8000
  346. @@ -346,6 +379,7 @@
  347. |.macro toint, reg, freg
  348. | toint reg, freg, freg
  349. |.endmacro
  350. +|.endif
  351. |
  352. |//-----------------------------------------------------------------------
  353. |
  354. @@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *
  355. | beq >2
  356. |1:
  357. | addic. TMP1, TMP1, -8
  358. + |.if FPU
  359. | lfd f0, 0(RA)
  360. + |.else
  361. + | lwz CARG1, 0(RA)
  362. + | lwz CARG2, 4(RA)
  363. + |.endif
  364. | addi RA, RA, 8
  365. + |.if FPU
  366. | stfd f0, 0(BASE)
  367. + |.else
  368. + | stw CARG1, 0(BASE)
  369. + | stw CARG2, 4(BASE)
  370. + |.endif
  371. | addi BASE, BASE, 8
  372. | bney <1
  373. |
  374. @@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *
  375. | .toc ld TOCREG, SAVE_TOC
  376. | li TISNUM, LJ_TISNUM // Setup type comparison constants.
  377. | lp BASE, L->base
  378. - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  379. + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  380. | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
  381. | li ZERO, 0
  382. - | stw TMP3, TMPD
  383. + | .FPU stw TMP3, TMPD
  384. | li TMP1, LJ_TFALSE
  385. - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  386. + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  387. | li TISNIL, LJ_TNIL
  388. | li_vmstate INTERP
  389. - | lfs TOBIT, TMPD
  390. + | .FPU lfs TOBIT, TMPD
  391. | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
  392. | la RA, -8(BASE) // Results start at BASE-8.
  393. - | stw TMP3, TMPD
  394. + | .FPU stw TMP3, TMPD
  395. | addi DISPATCH, DISPATCH, GG_G2DISP
  396. | stw TMP1, 0(RA) // Prepend false to error message.
  397. | li RD, 16 // 2 results: false + error message.
  398. | st_vmstate
  399. - | lfs TONUM, TMPD
  400. + | .FPU lfs TONUM, TMPD
  401. | b ->vm_returnc
  402. |
  403. |//-----------------------------------------------------------------------
  404. @@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *
  405. | li TISNUM, LJ_TISNUM // Setup type comparison constants.
  406. | lp TMP1, L->top
  407. | lwz PC, FRAME_PC(BASE)
  408. - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  409. + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  410. | stb CARG3, L->status
  411. - | stw TMP3, TMPD
  412. - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  413. - | lfs TOBIT, TMPD
  414. + | .FPU stw TMP3, TMPD
  415. + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  416. + | .FPU lfs TOBIT, TMPD
  417. | sub RD, TMP1, BASE
  418. - | stw TMP3, TMPD
  419. - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  420. + | .FPU stw TMP3, TMPD
  421. + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  422. | addi RD, RD, 8
  423. - | stw TMP0, TONUM_HI
  424. + | .FPU stw TMP0, TONUM_HI
  425. | li_vmstate INTERP
  426. | li ZERO, 0
  427. | st_vmstate
  428. | andix. TMP0, PC, FRAME_TYPE
  429. | mr MULTRES, RD
  430. - | lfs TONUM, TMPD
  431. + | .FPU lfs TONUM, TMPD
  432. | li TISNIL, LJ_TNIL
  433. | beq ->BC_RET_Z
  434. | b ->vm_return
  435. @@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *
  436. | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
  437. | li TISNUM, LJ_TISNUM // Setup type comparison constants.
  438. | lp TMP1, L->top
  439. - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  440. + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  441. | add PC, PC, BASE
  442. - | stw TMP3, TMPD
  443. + | .FPU stw TMP3, TMPD
  444. | li ZERO, 0
  445. - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  446. - | lfs TOBIT, TMPD
  447. + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  448. + | .FPU lfs TOBIT, TMPD
  449. | sub PC, PC, TMP2 // PC = frame delta + frame type
  450. - | stw TMP3, TMPD
  451. - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  452. + | .FPU stw TMP3, TMPD
  453. + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  454. | sub NARGS8:RC, TMP1, BASE
  455. - | stw TMP0, TONUM_HI
  456. + | .FPU stw TMP0, TONUM_HI
  457. | li_vmstate INTERP
  458. - | lfs TONUM, TMPD
  459. + | .FPU lfs TONUM, TMPD
  460. | li TISNIL, LJ_TNIL
  461. | st_vmstate
  462. |
  463. @@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *
  464. | lwz INS, -4(PC)
  465. | subi CARG2, RB, 16
  466. | decode_RB8 SAVE0, INS
  467. + |.if FPU
  468. | lfd f0, 0(RA)
  469. + |.else
  470. + | lwz TMP2, 0(RA)
  471. + | lwz TMP3, 4(RA)
  472. + |.endif
  473. | add TMP1, BASE, SAVE0
  474. | stp BASE, L->base
  475. | cmplw TMP1, CARG2
  476. | sub CARG3, CARG2, TMP1
  477. | decode_RA8 RA, INS
  478. + |.if FPU
  479. | stfd f0, 0(CARG2)
  480. + |.else
  481. + | stw TMP2, 0(CARG2)
  482. + | stw TMP3, 4(CARG2)
  483. + |.endif
  484. | bney ->BC_CAT_Z
  485. + |.if FPU
  486. | stfdx f0, BASE, RA
  487. + |.else
  488. + | stwux TMP2, RA, BASE
  489. + | stw TMP3, 4(RA)
  490. + |.endif
  491. | b ->cont_nop
  492. |
  493. |//-- Table indexing metamethods -----------------------------------------
  494. @@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *
  495. | // Returns TValue * (finished) or NULL (metamethod).
  496. | cmplwi CRET1, 0
  497. | beq >3
  498. + |.if FPU
  499. | lfd f0, 0(CRET1)
  500. + |.else
  501. + | lwz TMP0, 0(CRET1)
  502. + | lwz TMP1, 4(CRET1)
  503. + |.endif
  504. | ins_next1
  505. + |.if FPU
  506. | stfdx f0, BASE, RA
  507. + |.else
  508. + | stwux TMP0, RA, BASE
  509. + | stw TMP1, 4(RA)
  510. + |.endif
  511. | ins_next2
  512. |
  513. |3: // Call __index metamethod.
  514. @@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *
  515. | // Returns cTValue * or NULL.
  516. | cmplwi CRET1, 0
  517. | beq >1
  518. + |.if FPU
  519. | lfd f14, 0(CRET1)
  520. + |.else
  521. + | lwz SAVE0, 0(CRET1)
  522. + | lwz SAVE1, 4(CRET1)
  523. + |.endif
  524. | b ->BC_TGETR_Z
  525. |1:
  526. | stwx TISNIL, BASE, RA
  527. @@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *
  528. | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
  529. | // Returns TValue * (finished) or NULL (metamethod).
  530. | cmplwi CRET1, 0
  531. + |.if FPU
  532. | lfdx f0, BASE, RA
  533. + |.else
  534. + | lwzux TMP2, RA, BASE
  535. + | lwz TMP3, 4(RA)
  536. + |.endif
  537. | beq >3
  538. | // NOBARRIER: lj_meta_tset ensures the table is not black.
  539. | ins_next1
  540. + |.if FPU
  541. | stfd f0, 0(CRET1)
  542. + |.else
  543. + | stw TMP2, 0(CRET1)
  544. + | stw TMP3, 4(CRET1)
  545. + |.endif
  546. | ins_next2
  547. |
  548. |3: // Call __newindex metamethod.
  549. @@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *
  550. | add PC, TMP1, BASE
  551. | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
  552. | li NARGS8:RC, 24 // 3 args for func(t, k, v)
  553. + |.if FPU
  554. | stfd f0, 16(BASE) // Copy value to third argument.
  555. + |.else
  556. + | stw TMP2, 16(BASE)
  557. + | stw TMP3, 20(BASE)
  558. + |.endif
  559. | b ->vm_call_dispatch_f
  560. |
  561. |->vmeta_tsetr:
  562. @@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *
  563. | stw PC, SAVE_PC
  564. | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
  565. | // Returns TValue *.
  566. + |.if FPU
  567. | stfd f14, 0(CRET1)
  568. + |.else
  569. + | stw SAVE0, 0(CRET1)
  570. + | stw SAVE1, 4(CRET1)
  571. + |.endif
  572. | b ->cont_nop
  573. |
  574. |//-- Comparison metamethods ---------------------------------------------
  575. @@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *
  576. |
  577. |->cont_ra: // RA = resultptr
  578. | lwz INS, -4(PC)
  579. + |.if FPU
  580. | lfd f0, 0(RA)
  581. + |.else
  582. + | lwz CARG1, 0(RA)
  583. + | lwz CARG2, 4(RA)
  584. + |.endif
  585. | decode_RA8 TMP1, INS
  586. + |.if FPU
  587. | stfdx f0, BASE, TMP1
  588. + |.else
  589. + | stwux CARG1, TMP1, BASE
  590. + | stw CARG2, 4(TMP1)
  591. + |.endif
  592. | b ->cont_nop
  593. |
  594. |->cont_condt: // RA = resultptr
  595. @@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *
  596. |.macro .ffunc_n, name
  597. |->ff_ .. name:
  598. | cmplwi NARGS8:RC, 8
  599. - | lwz CARG3, 0(BASE)
  600. + | lwz CARG1, 0(BASE)
  601. + |.if FPU
  602. | lfd FARG1, 0(BASE)
  603. + |.else
  604. + | lwz CARG2, 4(BASE)
  605. + |.endif
  606. | blt ->fff_fallback
  607. - | checknum CARG3; bge ->fff_fallback
  608. + | checknum CARG1; bge ->fff_fallback
  609. |.endmacro
  610. |
  611. |.macro .ffunc_nn, name
  612. |->ff_ .. name:
  613. | cmplwi NARGS8:RC, 16
  614. - | lwz CARG3, 0(BASE)
  615. + | lwz CARG1, 0(BASE)
  616. + |.if FPU
  617. | lfd FARG1, 0(BASE)
  618. - | lwz CARG4, 8(BASE)
  619. + | lwz CARG3, 8(BASE)
  620. | lfd FARG2, 8(BASE)
  621. + |.else
  622. + | lwz CARG2, 4(BASE)
  623. + | lwz CARG3, 8(BASE)
  624. + | lwz CARG4, 12(BASE)
  625. + |.endif
  626. | blt ->fff_fallback
  627. + | checknum CARG1; bge ->fff_fallback
  628. | checknum CARG3; bge ->fff_fallback
  629. - | checknum CARG4; bge ->fff_fallback
  630. |.endmacro
  631. |
  632. |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
  633. @@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *
  634. | bge cr1, ->fff_fallback
  635. | stw CARG3, 0(RA)
  636. | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
  637. + | addi TMP1, BASE, 8
  638. + | add TMP2, RA, NARGS8:RC
  639. | stw CARG1, 4(RA)
  640. | beq ->fff_res // Done if exactly 1 argument.
  641. - | li TMP1, 8
  642. - | subi RC, RC, 8
  643. |1:
  644. - | cmplw TMP1, RC
  645. - | lfdx f0, BASE, TMP1
  646. - | stfdx f0, RA, TMP1
  647. + | cmplw TMP1, TMP2
  648. + |.if FPU
  649. + | lfd f0, 0(TMP1)
  650. + | stfd f0, 0(TMP1)
  651. + |.else
  652. + | lwz CARG1, 0(TMP1)
  653. + | lwz CARG2, 4(TMP1)
  654. + | stw CARG1, -8(TMP1)
  655. + | stw CARG2, -4(TMP1)
  656. + |.endif
  657. | addi TMP1, TMP1, 8
  658. | bney <1
  659. | b ->fff_res
  660. @@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *
  661. | orc TMP1, TMP2, TMP0
  662. | addi TMP1, TMP1, ~LJ_TISNUM+1
  663. | slwi TMP1, TMP1, 3
  664. + |.if FPU
  665. | la TMP2, CFUNC:RB->upvalue
  666. | lfdx FARG1, TMP2, TMP1
  667. + |.else
  668. + | add TMP1, CFUNC:RB, TMP1
  669. + | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
  670. + | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
  671. + |.endif
  672. | b ->fff_resn
  673. |
  674. |//-- Base library: getters and setters ---------------------------------
  675. @@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *
  676. | mr CARG1, L
  677. | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
  678. | // Returns cTValue *.
  679. + |.if FPU
  680. | lfd FARG1, 0(CRET1)
  681. + |.else
  682. + | lwz CARG2, 4(CRET1)
  683. + | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
  684. + |.endif
  685. | b ->fff_resn
  686. |
  687. |//-- Base library: conversions ------------------------------------------
  688. @@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *
  689. | // Only handles the number case inline (without a base argument).
  690. | cmplwi NARGS8:RC, 8
  691. | lwz CARG1, 0(BASE)
  692. + |.if FPU
  693. | lfd FARG1, 0(BASE)
  694. + |.else
  695. + | lwz CARG2, 4(BASE)
  696. + |.endif
  697. | bne ->fff_fallback // Exactly one argument.
  698. | checknum CARG1; bgt ->fff_fallback
  699. | b ->fff_resn
  700. @@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *
  701. | cmplwi CRET1, 0
  702. | li CARG3, LJ_TNIL
  703. | beq ->fff_restv // End of traversal: return nil.
  704. - | lfd f0, 8(BASE) // Copy key and value to results.
  705. | la RA, -8(BASE)
  706. + |.if FPU
  707. + | lfd f0, 8(BASE) // Copy key and value to results.
  708. | lfd f1, 16(BASE)
  709. | stfd f0, 0(RA)
  710. - | li RD, (2+1)*8
  711. | stfd f1, 8(RA)
  712. + |.else
  713. + | lwz CARG1, 8(BASE)
  714. + | lwz CARG2, 12(BASE)
  715. + | lwz CARG3, 16(BASE)
  716. + | lwz CARG4, 20(BASE)
  717. + | stw CARG1, 0(RA)
  718. + | stw CARG2, 4(RA)
  719. + | stw CARG3, 8(RA)
  720. + | stw CARG4, 12(RA)
  721. + |.endif
  722. + | li RD, (2+1)*8
  723. | b ->fff_res
  724. |
  725. |.ffunc_1 pairs
  726. @@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *
  727. | bne ->fff_fallback
  728. #if LJ_52
  729. | lwz TAB:TMP2, TAB:CARG1->metatable
  730. + |.if FPU
  731. | lfd f0, CFUNC:RB->upvalue[0]
  732. + |.else
  733. + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
  734. + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
  735. + |.endif
  736. | cmplwi TAB:TMP2, 0
  737. | la RA, -8(BASE)
  738. | bne ->fff_fallback
  739. #else
  740. + |.if FPU
  741. | lfd f0, CFUNC:RB->upvalue[0]
  742. + |.else
  743. + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
  744. + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
  745. + |.endif
  746. | la RA, -8(BASE)
  747. #endif
  748. | stw TISNIL, 8(BASE)
  749. | li RD, (3+1)*8
  750. + |.if FPU
  751. | stfd f0, 0(RA)
  752. + |.else
  753. + | stw TMP0, 0(RA)
  754. + | stw TMP1, 4(RA)
  755. + |.endif
  756. | b ->fff_res
  757. |
  758. |.ffunc ipairs_aux
  759. @@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *
  760. | stfd FARG2, 0(RA)
  761. |.endif
  762. | ble >2 // Not in array part?
  763. + |.if FPU
  764. | lwzx TMP2, TMP1, TMP3
  765. | lfdx f0, TMP1, TMP3
  766. + |.else
  767. + | lwzux TMP2, TMP1, TMP3
  768. + | lwz TMP3, 4(TMP1)
  769. + |.endif
  770. |1:
  771. | checknil TMP2
  772. | li RD, (0+1)*8
  773. | beq ->fff_res // End of iteration, return 0 results.
  774. | li RD, (2+1)*8
  775. + |.if FPU
  776. | stfd f0, 8(RA)
  777. + |.else
  778. + | stw TMP2, 8(RA)
  779. + | stw TMP3, 12(RA)
  780. + |.endif
  781. | b ->fff_res
  782. |2: // Check for empty hash part first. Otherwise call C function.
  783. | lwz TMP0, TAB:CARG1->hmask
  784. @@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *
  785. | li RD, (0+1)*8
  786. | beq ->fff_res
  787. | lwz TMP2, 0(CRET1)
  788. + |.if FPU
  789. | lfd f0, 0(CRET1)
  790. + |.else
  791. + | lwz TMP3, 4(CRET1)
  792. + |.endif
  793. | b <1
  794. |
  795. |.ffunc_1 ipairs
  796. @@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *
  797. | bne ->fff_fallback
  798. #if LJ_52
  799. | lwz TAB:TMP2, TAB:CARG1->metatable
  800. + |.if FPU
  801. | lfd f0, CFUNC:RB->upvalue[0]
  802. + |.else
  803. + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
  804. + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
  805. + |.endif
  806. | cmplwi TAB:TMP2, 0
  807. | la RA, -8(BASE)
  808. | bne ->fff_fallback
  809. #else
  810. + |.if FPU
  811. | lfd f0, CFUNC:RB->upvalue[0]
  812. + |.else
  813. + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
  814. + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
  815. + |.endif
  816. | la RA, -8(BASE)
  817. #endif
  818. |.if DUALNUM
  819. @@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *
  820. |.endif
  821. | stw ZERO, 12(BASE)
  822. | li RD, (3+1)*8
  823. + |.if FPU
  824. | stfd f0, 0(RA)
  825. + |.else
  826. + | stw TMP0, 0(RA)
  827. + | stw TMP1, 4(RA)
  828. + |.endif
  829. | b ->fff_res
  830. |
  831. |//-- Base library: catch errors ----------------------------------------
  832. @@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *
  833. |
  834. |.ffunc xpcall
  835. | cmplwi NARGS8:RC, 16
  836. - | lwz CARG4, 8(BASE)
  837. + | lwz CARG3, 8(BASE)
  838. + |.if FPU
  839. | lfd FARG2, 8(BASE)
  840. | lfd FARG1, 0(BASE)
  841. + |.else
  842. + | lwz CARG1, 0(BASE)
  843. + | lwz CARG2, 4(BASE)
  844. + | lwz CARG4, 12(BASE)
  845. + |.endif
  846. | blt ->fff_fallback
  847. | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
  848. | mr TMP2, BASE
  849. - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
  850. + | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
  851. | la BASE, 16(BASE)
  852. | // Remember active hook before pcall.
  853. | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
  854. + |.if FPU
  855. | stfd FARG2, 0(TMP2) // Swap function and traceback.
  856. - | subi NARGS8:RC, NARGS8:RC, 16
  857. | stfd FARG1, 8(TMP2)
  858. + |.else
  859. + | stw CARG3, 0(TMP2)
  860. + | stw CARG4, 4(TMP2)
  861. + | stw CARG1, 8(TMP2)
  862. + | stw CARG2, 12(TMP2)
  863. + |.endif
  864. + | subi NARGS8:RC, NARGS8:RC, 16
  865. | addi PC, TMP1, 16+FRAME_PCALL
  866. | b ->vm_call_dispatch
  867. |
  868. @@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *
  869. | stp BASE, L->top
  870. |2: // Move args to coroutine.
  871. | cmpw TMP1, NARGS8:RC
  872. + |.if FPU
  873. | lfdx f0, BASE, TMP1
  874. + |.else
  875. + | add CARG3, BASE, TMP1
  876. + | lwz TMP2, 0(CARG3)
  877. + | lwz TMP3, 4(CARG3)
  878. + |.endif
  879. | beq >3
  880. + |.if FPU
  881. | stfdx f0, CARG2, TMP1
  882. + |.else
  883. + | add CARG3, CARG2, TMP1
  884. + | stw TMP2, 0(CARG3)
  885. + | stw TMP3, 4(CARG3)
  886. + |.endif
  887. | addi TMP1, TMP1, 8
  888. | b <2
  889. |3:
  890. @@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *
  891. | stp TMP2, L:SAVE0->top // Clear coroutine stack.
  892. |5: // Move results from coroutine.
  893. | cmplw TMP1, TMP3
  894. + |.if FPU
  895. | lfdx f0, TMP2, TMP1
  896. | stfdx f0, BASE, TMP1
  897. + |.else
  898. + | add CARG3, TMP2, TMP1
  899. + | lwz CARG1, 0(CARG3)
  900. + | lwz CARG2, 4(CARG3)
  901. + | add CARG3, BASE, TMP1
  902. + | stw CARG1, 0(CARG3)
  903. + | stw CARG2, 4(CARG3)
  904. + |.endif
  905. | addi TMP1, TMP1, 8
  906. | bne <5
  907. |6:
  908. @@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *
  909. | andix. TMP0, PC, FRAME_TYPE
  910. | la TMP3, -8(TMP3)
  911. | li TMP1, LJ_TFALSE
  912. + |.if FPU
  913. | lfd f0, 0(TMP3)
  914. + |.else
  915. + | lwz CARG1, 0(TMP3)
  916. + | lwz CARG2, 4(TMP3)
  917. + |.endif
  918. | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
  919. | li RD, (2+1)*8
  920. | stw TMP1, -8(BASE) // Prepend false to results.
  921. | la RA, -8(BASE)
  922. + |.if FPU
  923. | stfd f0, 0(BASE) // Copy error message.
  924. + |.else
  925. + | stw CARG1, 0(BASE) // Copy error message.
  926. + | stw CARG2, 4(BASE)
  927. + |.endif
  928. | b <7
  929. |.else
  930. | mr CARG1, L
  931. @@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *
  932. | lus CARG1, 0x8000 // -(2^31).
  933. | beqy ->fff_resi
  934. |5:
  935. + |.if FPU
  936. | lfd FARG1, 0(BASE)
  937. + |.else
  938. + | lwz CARG1, 0(BASE)
  939. + | lwz CARG2, 4(BASE)
  940. + |.endif
  941. | blex func
  942. | b ->fff_resn
  943. |.endmacro
  944. @@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *
  945. |
  946. |.ffunc math_log
  947. | cmplwi NARGS8:RC, 8
  948. - | lwz CARG3, 0(BASE)
  949. - | lfd FARG1, 0(BASE)
  950. + | lwz CARG1, 0(BASE)
  951. | bne ->fff_fallback // Need exactly 1 argument.
  952. - | checknum CARG3; bge ->fff_fallback
  953. + | checknum CARG1; bge ->fff_fallback
  954. + |.if FPU
  955. + | lfd FARG1, 0(BASE)
  956. + |.else
  957. + | lwz CARG2, 4(BASE)
  958. + |.endif
  959. | blex log
  960. | b ->fff_resn
  961. |
  962. @@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *
  963. |.if DUALNUM
  964. |.ffunc math_ldexp
  965. | cmplwi NARGS8:RC, 16
  966. - | lwz CARG3, 0(BASE)
  967. + | lwz TMP0, 0(BASE)
  968. + |.if FPU
  969. | lfd FARG1, 0(BASE)
  970. - | lwz CARG4, 8(BASE)
  971. + |.else
  972. + | lwz CARG1, 0(BASE)
  973. + | lwz CARG2, 4(BASE)
  974. + |.endif
  975. + | lwz TMP1, 8(BASE)
  976. |.if GPR64
  977. | lwz CARG2, 12(BASE)
  978. - |.else
  979. + |.elif FPU
  980. | lwz CARG1, 12(BASE)
  981. + |.else
  982. + | lwz CARG3, 12(BASE)
  983. |.endif
  984. | blt ->fff_fallback
  985. - | checknum CARG3; bge ->fff_fallback
  986. - | checknum CARG4; bne ->fff_fallback
  987. + | checknum TMP0; bge ->fff_fallback
  988. + | checknum TMP1; bne ->fff_fallback
  989. |.else
  990. |.ffunc_nn math_ldexp
  991. |.if GPR64
  992. @@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *
  993. |.ffunc_n math_frexp
  994. |.if GPR64
  995. | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
  996. - |.else
  997. + |.elif FPU
  998. | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
  999. + |.else
  1000. + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
  1001. |.endif
  1002. | lwz PC, FRAME_PC(BASE)
  1003. | blex frexp
  1004. @@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *
  1005. |.if not DUALNUM
  1006. | tonum_i FARG2, TMP1
  1007. |.endif
  1008. + |.if FPU
  1009. | stfd FARG1, 0(RA)
  1010. + |.else
  1011. + | stw CRET1, 0(RA)
  1012. + | stw CRET2, 4(RA)
  1013. + |.endif
  1014. | li RD, (2+1)*8
  1015. |.if DUALNUM
  1016. | stw TISNUM, 8(RA)
  1017. @@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *
  1018. |.ffunc_n math_modf
  1019. |.if GPR64
  1020. | la CARG2, -8(BASE)
  1021. - |.else
  1022. + |.elif FPU
  1023. | la CARG1, -8(BASE)
  1024. + |.else
  1025. + | la CARG3, -8(BASE)
  1026. |.endif
  1027. | lwz PC, FRAME_PC(BASE)
  1028. | blex modf
  1029. | la RA, -8(BASE)
  1030. + |.if FPU
  1031. | stfd FARG1, 0(BASE)
  1032. + |.else
  1033. + | stw CRET1, 0(BASE)
  1034. + | stw CRET2, 4(BASE)
  1035. + |.endif
  1036. | li RD, (2+1)*8
  1037. | b ->fff_res
  1038. |
  1039. @@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *
  1040. |.if DUALNUM
  1041. | .ffunc_1 name
  1042. | checknum CARG3
  1043. - | addi TMP1, BASE, 8
  1044. - | add TMP2, BASE, NARGS8:RC
  1045. + | addi SAVE0, BASE, 8
  1046. + | add SAVE1, BASE, NARGS8:RC
  1047. | bne >4
  1048. |1: // Handle integers.
  1049. - | lwz CARG4, 0(TMP1)
  1050. - | cmplw cr1, TMP1, TMP2
  1051. - | lwz CARG2, 4(TMP1)
  1052. + | lwz CARG4, 0(SAVE0)
  1053. + | cmplw cr1, SAVE0, SAVE1
  1054. + | lwz CARG2, 4(SAVE0)
  1055. | bge cr1, ->fff_resi
  1056. | checknum CARG4
  1057. | xoris TMP0, CARG1, 0x8000
  1058. @@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *
  1059. |.if GPR64
  1060. | rldicl CARG1, CARG1, 0, 32
  1061. |.endif
  1062. - | addi TMP1, TMP1, 8
  1063. + | addi SAVE0, SAVE0, 8
  1064. | b <1
  1065. |3:
  1066. | bge ->fff_fallback
  1067. | // Convert intermediate result to number and continue below.
  1068. + |.if FPU
  1069. | tonum_i FARG1, CARG1
  1070. - | lfd FARG2, 0(TMP1)
  1071. + | lfd FARG2, 0(SAVE0)
  1072. + |.else
  1073. + | mr CARG2, CARG1
  1074. + | bl ->vm_sfi2d_1
  1075. + | lwz CARG3, 0(SAVE0)
  1076. + | lwz CARG4, 4(SAVE0)
  1077. + |.endif
  1078. | b >6
  1079. |4:
  1080. + |.if FPU
  1081. | lfd FARG1, 0(BASE)
  1082. + |.else
  1083. + | lwz CARG1, 0(BASE)
  1084. + | lwz CARG2, 4(BASE)
  1085. + |.endif
  1086. | bge ->fff_fallback
  1087. |5: // Handle numbers.
  1088. - | lwz CARG4, 0(TMP1)
  1089. - | cmplw cr1, TMP1, TMP2
  1090. - | lfd FARG2, 0(TMP1)
  1091. + | lwz CARG3, 0(SAVE0)
  1092. + | cmplw cr1, SAVE0, SAVE1
  1093. + |.if FPU
  1094. + | lfd FARG2, 0(SAVE0)
  1095. + |.else
  1096. + | lwz CARG4, 4(SAVE0)
  1097. + |.endif
  1098. | bge cr1, ->fff_resn
  1099. - | checknum CARG4; bge >7
  1100. + | checknum CARG3; bge >7
  1101. |6:
  1102. + | addi SAVE0, SAVE0, 8
  1103. + |.if FPU
  1104. | fsub f0, FARG1, FARG2
  1105. - | addi TMP1, TMP1, 8
  1106. |.if ismax
  1107. | fsel FARG1, f0, FARG1, FARG2
  1108. |.else
  1109. | fsel FARG1, f0, FARG2, FARG1
  1110. |.endif
  1111. + |.else
  1112. + | stw CARG1, SFSAVE_1
  1113. + | stw CARG2, SFSAVE_2
  1114. + | stw CARG3, SFSAVE_3
  1115. + | stw CARG4, SFSAVE_4
  1116. + | blex __ledf2
  1117. + | cmpwi CRET1, 0
  1118. + |.if ismax
  1119. + | blt >8
  1120. + |.else
  1121. + | bge >8
  1122. + |.endif
  1123. + | lwz CARG1, SFSAVE_1
  1124. + | lwz CARG2, SFSAVE_2
  1125. + | b <5
  1126. + |8:
  1127. + | lwz CARG1, SFSAVE_3
  1128. + | lwz CARG2, SFSAVE_4
  1129. + |.endif
  1130. | b <5
  1131. |7: // Convert integer to number and continue above.
  1132. - | lwz CARG2, 4(TMP1)
  1133. + | lwz CARG3, 4(SAVE0)
  1134. | bne ->fff_fallback
  1135. - | tonum_i FARG2, CARG2
  1136. + |.if FPU
  1137. + | tonum_i FARG2, CARG3
  1138. + |.else
  1139. + | bl ->vm_sfi2d_2
  1140. + |.endif
  1141. | b <6
  1142. |.else
  1143. | .ffunc_n name
  1144. @@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *
  1145. |
  1146. |.macro .ffunc_bit_op, name, ins
  1147. | .ffunc_bit name
  1148. - | addi TMP1, BASE, 8
  1149. - | add TMP2, BASE, NARGS8:RC
  1150. + | addi SAVE0, BASE, 8
  1151. + | add SAVE1, BASE, NARGS8:RC
  1152. |1:
  1153. - | lwz CARG4, 0(TMP1)
  1154. - | cmplw cr1, TMP1, TMP2
  1155. + | lwz CARG4, 0(SAVE0)
  1156. + | cmplw cr1, SAVE0, SAVE1
  1157. |.if DUALNUM
  1158. - | lwz CARG2, 4(TMP1)
  1159. + | lwz CARG2, 4(SAVE0)
  1160. |.else
  1161. - | lfd FARG1, 0(TMP1)
  1162. + | lfd FARG1, 0(SAVE0)
  1163. |.endif
  1164. | bgey cr1, ->fff_resi
  1165. | checknum CARG4
  1166. |.if DUALNUM
  1167. + |.if FPU
  1168. | bnel ->fff_bitop_fb
  1169. |.else
  1170. + | beq >3
  1171. + | stw CARG1, SFSAVE_1
  1172. + | bl ->fff_bitop_fb
  1173. + | mr CARG2, CARG1
  1174. + | lwz CARG1, SFSAVE_1
  1175. + |3:
  1176. + |.endif
  1177. + |.else
  1178. | fadd FARG1, FARG1, TOBIT
  1179. | bge ->fff_fallback
  1180. | stfd FARG1, TMPD
  1181. | lwz CARG2, TMPD_LO
  1182. |.endif
  1183. | ins CARG1, CARG1, CARG2
  1184. - | addi TMP1, TMP1, 8
  1185. + | addi SAVE0, SAVE0, 8
  1186. | b <1
  1187. |.endmacro
  1188. |
  1189. @@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *
  1190. |.macro .ffunc_bit_sh, name, ins, shmod
  1191. |.if DUALNUM
  1192. | .ffunc_2 bit_..name
  1193. + |.if FPU
  1194. | checknum CARG3; bnel ->fff_tobit_fb
  1195. + |.else
  1196. + | checknum CARG3; beq >1
  1197. + | bl ->fff_tobit_fb
  1198. + | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
  1199. + |1:
  1200. + |.endif
  1201. | // Note: no inline conversion from number for 2nd argument!
  1202. | checknum CARG4; bne ->fff_fallback
  1203. |.else
  1204. @@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *
  1205. |->fff_resn:
  1206. | lwz PC, FRAME_PC(BASE)
  1207. | la RA, -8(BASE)
  1208. + |.if FPU
  1209. | stfd FARG1, -8(BASE)
  1210. + |.else
  1211. + | stw CARG1, -8(BASE)
  1212. + | stw CARG2, -4(BASE)
  1213. + |.endif
  1214. | b ->fff_res1
  1215. |
  1216. |// Fallback FP number to bit conversion.
  1217. |->fff_tobit_fb:
  1218. |.if DUALNUM
  1219. + |.if FPU
  1220. | lfd FARG1, 0(BASE)
  1221. | bgt ->fff_fallback
  1222. | fadd FARG1, FARG1, TOBIT
  1223. | stfd FARG1, TMPD
  1224. | lwz CARG1, TMPD_LO
  1225. | blr
  1226. + |.else
  1227. + | bgt ->fff_fallback
  1228. + | mr CARG2, CARG1
  1229. + | mr CARG1, CARG3
  1230. + |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
  1231. + |->vm_tobit:
  1232. + | slwi TMP2, CARG1, 1
  1233. + | addis TMP2, TMP2, 0x0020
  1234. + | cmpwi TMP2, 0
  1235. + | bge >2
  1236. + | li TMP1, 0x3e0
  1237. + | srawi TMP2, TMP2, 21
  1238. + | not TMP1, TMP1
  1239. + | sub. TMP2, TMP1, TMP2
  1240. + | cmpwi cr7, CARG1, 0
  1241. + | blt >1
  1242. + | slwi TMP1, CARG1, 11
  1243. + | srwi TMP0, CARG2, 21
  1244. + | oris TMP1, TMP1, 0x8000
  1245. + | or TMP1, TMP1, TMP0
  1246. + | srw CARG1, TMP1, TMP2
  1247. + | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
  1248. + | neg CARG1, CARG1
  1249. + | blr
  1250. + |1:
  1251. + | addi TMP2, TMP2, 21
  1252. + | srw TMP1, CARG2, TMP2
  1253. + | slwi CARG2, CARG1, 12
  1254. + | subfic TMP2, TMP2, 20
  1255. + | slw TMP0, CARG2, TMP2
  1256. + | or CARG1, TMP1, TMP0
  1257. + | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
  1258. + | neg CARG1, CARG1
  1259. + | blr
  1260. + |2:
  1261. + | li CARG1, 0
  1262. + | blr
  1263. + |.endif
  1264. |.endif
  1265. |->fff_bitop_fb:
  1266. |.if DUALNUM
  1267. - | lfd FARG1, 0(TMP1)
  1268. + |.if FPU
  1269. + | lfd FARG1, 0(SAVE0)
  1270. | bgt ->fff_fallback
  1271. | fadd FARG1, FARG1, TOBIT
  1272. | stfd FARG1, TMPD
  1273. | lwz CARG2, TMPD_LO
  1274. | blr
  1275. + |.else
  1276. + | bgt ->fff_fallback
  1277. + | mr CARG1, CARG4
  1278. + | b ->vm_tobit
  1279. + |.endif
  1280. |.endif
  1281. |
  1282. |//-----------------------------------------------------------------------
  1283. @@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *
  1284. | decode_RA8 RC, INS // Call base.
  1285. | beq >2
  1286. |1: // Move results down.
  1287. + |.if FPU
  1288. | lfd f0, 0(RA)
  1289. + |.else
  1290. + | lwz CARG1, 0(RA)
  1291. + | lwz CARG2, 4(RA)
  1292. + |.endif
  1293. | addic. TMP1, TMP1, -8
  1294. | addi RA, RA, 8
  1295. + |.if FPU
  1296. | stfdx f0, BASE, RC
  1297. + |.else
  1298. + | add CARG3, BASE, RC
  1299. + | stw CARG1, 0(CARG3)
  1300. + | stw CARG2, 4(CARG3)
  1301. + |.endif
  1302. | addi RC, RC, 8
  1303. | bne <1
  1304. |2:
  1305. @@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *
  1306. |//-----------------------------------------------------------------------
  1307. |
  1308. |.macro savex_, a, b, c, d
  1309. + |.if FPU
  1310. | stfd f..a, 16+a*8(sp)
  1311. | stfd f..b, 16+b*8(sp)
  1312. | stfd f..c, 16+c*8(sp)
  1313. | stfd f..d, 16+d*8(sp)
  1314. + |.endif
  1315. |.endmacro
  1316. |
  1317. |->vm_exit_handler:
  1318. @@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *
  1319. | lwz KBASE, PC2PROTO(k)(TMP1)
  1320. | // Setup type comparison constants.
  1321. | li TISNUM, LJ_TISNUM
  1322. - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  1323. - | stw TMP3, TMPD
  1324. + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  1325. + | .FPU stw TMP3, TMPD
  1326. | li ZERO, 0
  1327. - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  1328. - | lfs TOBIT, TMPD
  1329. - | stw TMP3, TMPD
  1330. - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  1331. + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  1332. + | .FPU lfs TOBIT, TMPD
  1333. + | .FPU stw TMP3, TMPD
  1334. + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  1335. | li TISNIL, LJ_TNIL
  1336. - | stw TMP0, TONUM_HI
  1337. - | lfs TONUM, TMPD
  1338. + | .FPU stw TMP0, TONUM_HI
  1339. + | .FPU lfs TONUM, TMPD
  1340. | // Modified copy of ins_next which handles function header dispatch, too.
  1341. | lwz INS, 0(PC)
  1342. | addi PC, PC, 4
  1343. @@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *
  1344. |//-- Math helper functions ----------------------------------------------
  1345. |//-----------------------------------------------------------------------
  1346. |
  1347. - |// NYI: Use internal implementations of floor, ceil, trunc.
  1348. + |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
  1349. + |
  1350. + |.macro sfi2d, AHI, ALO
  1351. + |.if not FPU
  1352. + | mr. AHI, ALO
  1353. + | bclr 12, 2 // Handle zero first.
  1354. + | srawi TMP0, ALO, 31
  1355. + | xor TMP1, ALO, TMP0
  1356. + | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
  1357. + | cntlzw AHI, TMP1
  1358. + | andix. TMP0, TMP0, 0x800 // Mask sign bit.
  1359. + | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
  1360. + | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
  1361. + | slwi ALO, TMP1, 21
  1362. + | or AHI, AHI, TMP0 // Sign | Exponent.
  1363. + | srwi TMP1, TMP1, 11
  1364. + | slwi AHI, AHI, 20 // Align left.
  1365. + | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
  1366. + | blr
  1367. + |.endif
  1368. + |.endmacro
  1369. + |
  1370. + |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
  1371. + |->vm_sfi2d_1:
  1372. + | sfi2d CARG1, CARG2
  1373. + |
  1374. + |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
  1375. + |->vm_sfi2d_2:
  1376. + | sfi2d CARG3, CARG4
  1377. |
  1378. |->vm_modi:
  1379. | divwo. TMP0, CARG1, CARG2
  1380. @@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *
  1381. | addi DISPATCH, r12, GG_G2DISP
  1382. | stw r11, CTSTATE->cb.slot
  1383. | stw r3, CTSTATE->cb.gpr[0]
  1384. - | stfd f1, CTSTATE->cb.fpr[0]
  1385. + | .FPU stfd f1, CTSTATE->cb.fpr[0]
  1386. | stw r4, CTSTATE->cb.gpr[1]
  1387. - | stfd f2, CTSTATE->cb.fpr[1]
  1388. + | .FPU stfd f2, CTSTATE->cb.fpr[1]
  1389. | stw r5, CTSTATE->cb.gpr[2]
  1390. - | stfd f3, CTSTATE->cb.fpr[2]
  1391. + | .FPU stfd f3, CTSTATE->cb.fpr[2]
  1392. | stw r6, CTSTATE->cb.gpr[3]
  1393. - | stfd f4, CTSTATE->cb.fpr[3]
  1394. + | .FPU stfd f4, CTSTATE->cb.fpr[3]
  1395. | stw r7, CTSTATE->cb.gpr[4]
  1396. - | stfd f5, CTSTATE->cb.fpr[4]
  1397. + | .FPU stfd f5, CTSTATE->cb.fpr[4]
  1398. | stw r8, CTSTATE->cb.gpr[5]
  1399. - | stfd f6, CTSTATE->cb.fpr[5]
  1400. + | .FPU stfd f6, CTSTATE->cb.fpr[5]
  1401. | stw r9, CTSTATE->cb.gpr[6]
  1402. - | stfd f7, CTSTATE->cb.fpr[6]
  1403. + | .FPU stfd f7, CTSTATE->cb.fpr[6]
  1404. | stw r10, CTSTATE->cb.gpr[7]
  1405. - | stfd f8, CTSTATE->cb.fpr[7]
  1406. + | .FPU stfd f8, CTSTATE->cb.fpr[7]
  1407. | addi TMP0, sp, CFRAME_SPACE+8
  1408. | stw TMP0, CTSTATE->cb.stack
  1409. | mr CARG1, CTSTATE
  1410. @@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *
  1411. | lp BASE, L:CRET1->base
  1412. | li TISNUM, LJ_TISNUM // Setup type comparison constants.
  1413. | lp RC, L:CRET1->top
  1414. - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  1415. + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
  1416. | li ZERO, 0
  1417. | mr L, CRET1
  1418. - | stw TMP3, TMPD
  1419. - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  1420. + | .FPU stw TMP3, TMPD
  1421. + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
  1422. | lwz LFUNC:RB, FRAME_FUNC(BASE)
  1423. - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  1424. - | stw TMP0, TONUM_HI
  1425. + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
  1426. + | .FPU stw TMP0, TONUM_HI
  1427. | li TISNIL, LJ_TNIL
  1428. | li_vmstate INTERP
  1429. - | lfs TOBIT, TMPD
  1430. - | stw TMP3, TMPD
  1431. + | .FPU lfs TOBIT, TMPD
  1432. + | .FPU stw TMP3, TMPD
  1433. | sub RC, RC, BASE
  1434. | st_vmstate
  1435. - | lfs TONUM, TMPD
  1436. + | .FPU lfs TONUM, TMPD
  1437. | ins_callt
  1438. |.endif
  1439. |
  1440. @@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *
  1441. | mr CARG2, RA
  1442. | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
  1443. | lwz CRET1, CTSTATE->cb.gpr[0]
  1444. - | lfd FARG1, CTSTATE->cb.fpr[0]
  1445. + | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
  1446. | lwz CRET2, CTSTATE->cb.gpr[1]
  1447. | b ->vm_leave_unw
  1448. |.endif
  1449. @@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *
  1450. | bge <1
  1451. |2:
  1452. | bney cr1, >3
  1453. - | lfd f1, CCSTATE->fpr[0]
  1454. - | lfd f2, CCSTATE->fpr[1]
  1455. - | lfd f3, CCSTATE->fpr[2]
  1456. - | lfd f4, CCSTATE->fpr[3]
  1457. - | lfd f5, CCSTATE->fpr[4]
  1458. - | lfd f6, CCSTATE->fpr[5]
  1459. - | lfd f7, CCSTATE->fpr[6]
  1460. - | lfd f8, CCSTATE->fpr[7]
  1461. + | .FPU lfd f1, CCSTATE->fpr[0]
  1462. + | .FPU lfd f2, CCSTATE->fpr[1]
  1463. + | .FPU lfd f3, CCSTATE->fpr[2]
  1464. + | .FPU lfd f4, CCSTATE->fpr[3]
  1465. + | .FPU lfd f5, CCSTATE->fpr[4]
  1466. + | .FPU lfd f6, CCSTATE->fpr[5]
  1467. + | .FPU lfd f7, CCSTATE->fpr[6]
  1468. + | .FPU lfd f8, CCSTATE->fpr[7]
  1469. |3:
  1470. | lp TMP0, CCSTATE->func
  1471. | lwz CARG2, CCSTATE->gpr[1]
  1472. @@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *
  1473. | lwz TMP2, -4(r14)
  1474. | lwz TMP0, 4(r14)
  1475. | stw CARG1, CCSTATE:TMP1->gpr[0]
  1476. - | stfd FARG1, CCSTATE:TMP1->fpr[0]
  1477. + | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
  1478. | stw CARG2, CCSTATE:TMP1->gpr[1]
  1479. | mtlr TMP0
  1480. | stw CARG3, CCSTATE:TMP1->gpr[2]
  1481. @@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCO
  1482. case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
  1483. | // RA = src1*8, RD = src2*8, JMP with RD = target
  1484. |.if DUALNUM
  1485. - | lwzux TMP0, RA, BASE
  1486. + | lwzux CARG1, RA, BASE
  1487. | addi PC, PC, 4
  1488. | lwz CARG2, 4(RA)
  1489. - | lwzux TMP1, RD, BASE
  1490. + | lwzux CARG3, RD, BASE
  1491. | lwz TMP2, -4(PC)
  1492. - | checknum cr0, TMP0
  1493. - | lwz CARG3, 4(RD)
  1494. + | checknum cr0, CARG1
  1495. + | lwz CARG4, 4(RD)
  1496. | decode_RD4 TMP2, TMP2
  1497. - | checknum cr1, TMP1
  1498. - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
  1499. + | checknum cr1, CARG3
  1500. + | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
  1501. | bne cr0, >7
  1502. | bne cr1, >8
  1503. - | cmpw CARG2, CARG3
  1504. + | cmpw CARG2, CARG4
  1505. if (op == BC_ISLT) {
  1506. | bge >2
  1507. } else if (op == BC_ISGE) {
  1508. @@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCO
  1509. | ble >2
  1510. }
  1511. |1:
  1512. - | add PC, PC, TMP2
  1513. + | add PC, PC, SAVE0
  1514. |2:
  1515. | ins_next
  1516. |
  1517. |7: // RA is not an integer.
  1518. | bgt cr0, ->vmeta_comp
  1519. | // RA is a number.
  1520. - | lfd f0, 0(RA)
  1521. + | .FPU lfd f0, 0(RA)
  1522. | bgt cr1, ->vmeta_comp
  1523. | blt cr1, >4
  1524. | // RA is a number, RD is an integer.
  1525. - | tonum_i f1, CARG3
  1526. + |.if FPU
  1527. + | tonum_i f1, CARG4
  1528. + |.else
  1529. + | bl ->vm_sfi2d_2
  1530. + |.endif
  1531. | b >5
  1532. |
  1533. |8: // RA is an integer, RD is not an integer.
  1534. | bgt cr1, ->vmeta_comp
  1535. | // RA is an integer, RD is a number.
  1536. + |.if FPU
  1537. | tonum_i f0, CARG2
  1538. + |.else
  1539. + | bl ->vm_sfi2d_1
  1540. + |.endif
  1541. |4:
  1542. - | lfd f1, 0(RD)
  1543. + | .FPU lfd f1, 0(RD)
  1544. |5:
  1545. + |.if FPU
  1546. | fcmpu cr0, f0, f1
  1547. + |.else
  1548. + | blex __ledf2
  1549. + | cmpwi CRET1, 0
  1550. + |.endif
  1551. if (op == BC_ISLT) {
  1552. | bge <2
  1553. } else if (op == BC_ISGE) {
  1554. @@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCO
  1555. vk = op == BC_ISEQV;
  1556. | // RA = src1*8, RD = src2*8, JMP with RD = target
  1557. |.if DUALNUM
  1558. - | lwzux TMP0, RA, BASE
  1559. + | lwzux CARG1, RA, BASE
  1560. | addi PC, PC, 4
  1561. | lwz CARG2, 4(RA)
  1562. - | lwzux TMP1, RD, BASE
  1563. - | checknum cr0, TMP0
  1564. - | lwz TMP2, -4(PC)
  1565. - | checknum cr1, TMP1
  1566. - | decode_RD4 TMP2, TMP2
  1567. - | lwz CARG3, 4(RD)
  1568. + | lwzux CARG3, RD, BASE
  1569. + | checknum cr0, CARG1
  1570. + | lwz SAVE0, -4(PC)
  1571. + | checknum cr1, CARG3
  1572. + | decode_RD4 SAVE0, SAVE0
  1573. + | lwz CARG4, 4(RD)
  1574. | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
  1575. - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
  1576. + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
  1577. if (vk) {
  1578. | ble cr7, ->BC_ISEQN_Z
  1579. } else {
  1580. | ble cr7, ->BC_ISNEN_Z
  1581. }
  1582. |.else
  1583. - | lwzux TMP0, RA, BASE
  1584. - | lwz TMP2, 0(PC)
  1585. + | lwzux CARG1, RA, BASE
  1586. + | lwz SAVE0, 0(PC)
  1587. | lfd f0, 0(RA)
  1588. | addi PC, PC, 4
  1589. - | lwzux TMP1, RD, BASE
  1590. - | checknum cr0, TMP0
  1591. - | decode_RD4 TMP2, TMP2
  1592. + | lwzux CARG3, RD, BASE
  1593. + | checknum cr0, CARG1
  1594. + | decode_RD4 SAVE0, SAVE0
  1595. | lfd f1, 0(RD)
  1596. - | checknum cr1, TMP1
  1597. - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
  1598. + | checknum cr1, CARG3
  1599. + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
  1600. | bge cr0, >5
  1601. | bge cr1, >5
  1602. | fcmpu cr0, f0, f1
  1603. if (vk) {
  1604. | bne >1
  1605. - | add PC, PC, TMP2
  1606. + | add PC, PC, SAVE0
  1607. } else {
  1608. | beq >1
  1609. - | add PC, PC, TMP2
  1610. + | add PC, PC, SAVE0
  1611. }
  1612. |1:
  1613. | ins_next
  1614. @@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCO
  1615. |5: // Either or both types are not numbers.
  1616. |.if not DUALNUM
  1617. | lwz CARG2, 4(RA)
  1618. - | lwz CARG3, 4(RD)
  1619. + | lwz CARG4, 4(RD)
  1620. |.endif
  1621. |.if FFI
  1622. - | cmpwi cr7, TMP0, LJ_TCDATA
  1623. - | cmpwi cr5, TMP1, LJ_TCDATA
  1624. + | cmpwi cr7, CARG1, LJ_TCDATA
  1625. + | cmpwi cr5, CARG3, LJ_TCDATA
  1626. |.endif
  1627. - | not TMP3, TMP0
  1628. - | cmplw TMP0, TMP1
  1629. - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
  1630. + | not TMP2, CARG1
  1631. + | cmplw CARG1, CARG3
  1632. + | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
  1633. |.if FFI
  1634. | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
  1635. |.endif
  1636. - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
  1637. + | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
  1638. |.if FFI
  1639. | beq cr7, ->vmeta_equal_cd
  1640. |.endif
  1641. - | cmplw cr5, CARG2, CARG3
  1642. + | cmplw cr5, CARG2, CARG4
  1643. | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
  1644. | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
  1645. | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
  1646. - | mr SAVE0, PC
  1647. + | mr SAVE1, PC
  1648. | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
  1649. | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
  1650. if (vk) {
  1651. | bne cr0, >6
  1652. - | add PC, PC, TMP2
  1653. + | add PC, PC, SAVE0
  1654. |6:
  1655. } else {
  1656. | beq cr0, >6
  1657. - | add PC, PC, TMP2
  1658. + | add PC, PC, SAVE0
  1659. |6:
  1660. }
  1661. |.if DUALNUM
  1662. @@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCO
  1663. |
  1664. | // Different tables or userdatas. Need to check __eq metamethod.
  1665. | // Field metatable must be at same offset for GCtab and GCudata!
  1666. + | mr CARG3, CARG4
  1667. | lwz TAB:TMP2, TAB:CARG2->metatable
  1668. | li CARG4, 1-vk // ne = 0 or 1.
  1669. | cmplwi TAB:TMP2, 0
  1670. @@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCO
  1671. | lbz TMP2, TAB:TMP2->nomm
  1672. | andix. TMP2, TMP2, 1<<MM_eq
  1673. | bne <1 // Or 'no __eq' flag set?
  1674. - | mr PC, SAVE0 // Restore old PC.
  1675. + | mr PC, SAVE1 // Restore old PC.
  1676. | b ->vmeta_equal // Handle __eq metamethod.
  1677. break;
  1678. @@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCO
  1679. vk = op == BC_ISEQN;
  1680. | // RA = src*8, RD = num_const*8, JMP with RD = target
  1681. |.if DUALNUM
  1682. - | lwzux TMP0, RA, BASE
  1683. + | lwzux CARG1, RA, BASE
  1684. | addi PC, PC, 4
  1685. | lwz CARG2, 4(RA)
  1686. - | lwzux TMP1, RD, KBASE
  1687. - | checknum cr0, TMP0
  1688. - | lwz TMP2, -4(PC)
  1689. - | checknum cr1, TMP1
  1690. - | decode_RD4 TMP2, TMP2
  1691. - | lwz CARG3, 4(RD)
  1692. - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
  1693. + | lwzux CARG3, RD, KBASE
  1694. + | checknum cr0, CARG1
  1695. + | lwz SAVE0, -4(PC)
  1696. + | checknum cr1, CARG3
  1697. + | decode_RD4 SAVE0, SAVE0
  1698. + | lwz CARG4, 4(RD)
  1699. + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
  1700. if (vk) {
  1701. |->BC_ISEQN_Z:
  1702. } else {
  1703. @@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCO
  1704. }
  1705. | bne cr0, >7
  1706. | bne cr1, >8
  1707. - | cmpw CARG2, CARG3
  1708. + | cmpw CARG2, CARG4
  1709. |4:
  1710. |.else
  1711. if (vk) {
  1712. @@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCO
  1713. } else {
  1714. |->BC_ISNEN_Z: // Dummy label.
  1715. }
  1716. - | lwzx TMP0, BASE, RA
  1717. + | lwzx CARG1, BASE, RA
  1718. | addi PC, PC, 4
  1719. | lfdx f0, BASE, RA
  1720. - | lwz TMP2, -4(PC)
  1721. + | lwz SAVE0, -4(PC)
  1722. | lfdx f1, KBASE, RD
  1723. - | decode_RD4 TMP2, TMP2
  1724. - | checknum TMP0
  1725. - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
  1726. + | decode_RD4 SAVE0, SAVE0
  1727. + | checknum CARG1
  1728. + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
  1729. | bge >3
  1730. | fcmpu cr0, f0, f1
  1731. |.endif
  1732. if (vk) {
  1733. | bne >1
  1734. - | add PC, PC, TMP2
  1735. + | add PC, PC, SAVE0
  1736. |1:
  1737. |.if not FFI
  1738. |3:
  1739. @@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCO
  1740. |.if not FFI
  1741. |3:
  1742. |.endif
  1743. - | add PC, PC, TMP2
  1744. + | add PC, PC, SAVE0
  1745. |2:
  1746. }
  1747. | ins_next
  1748. |.if FFI
  1749. |3:
  1750. - | cmpwi TMP0, LJ_TCDATA
  1751. + | cmpwi CARG1, LJ_TCDATA
  1752. | beq ->vmeta_equal_cd
  1753. | b <1
  1754. |.endif
  1755. @@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCO
  1756. |7: // RA is not an integer.
  1757. | bge cr0, <3
  1758. | // RA is a number.
  1759. - | lfd f0, 0(RA)
  1760. + | .FPU lfd f0, 0(RA)
  1761. | blt cr1, >1
  1762. | // RA is a number, RD is an integer.
  1763. - | tonum_i f1, CARG3
  1764. + |.if FPU
  1765. + | tonum_i f1, CARG4
  1766. + |.else
  1767. + | bl ->vm_sfi2d_2
  1768. + |.endif
  1769. | b >2
  1770. |
  1771. |8: // RA is an integer, RD is a number.
  1772. + |.if FPU
  1773. | tonum_i f0, CARG2
  1774. + |.else
  1775. + | bl ->vm_sfi2d_1
  1776. + |.endif
  1777. |1:
  1778. - | lfd f1, 0(RD)
  1779. + | .FPU lfd f1, 0(RD)
  1780. |2:
  1781. + |.if FPU
  1782. | fcmpu cr0, f0, f1
  1783. + |.else
  1784. + | blex __ledf2
  1785. + | cmpwi CRET1, 0
  1786. + |.endif
  1787. | b <4
  1788. |.endif
  1789. break;
  1790. @@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCO
  1791. | add PC, PC, TMP2
  1792. } else {
  1793. | li TMP1, LJ_TFALSE
  1794. + |.if FPU
  1795. | lfdx f0, BASE, RD
  1796. + |.else
  1797. + | lwzux CARG1, RD, BASE
  1798. + | lwz CARG2, 4(RD)
  1799. + |.endif
  1800. | cmplw TMP0, TMP1
  1801. if (op == BC_ISTC) {
  1802. | bge >1
  1803. @@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCO
  1804. }
  1805. | addis PC, PC, -(BCBIAS_J*4 >> 16)
  1806. | decode_RD4 TMP2, INS
  1807. + |.if FPU
  1808. | stfdx f0, BASE, RA
  1809. + |.else
  1810. + | stwux CARG1, RA, BASE
  1811. + | stw CARG2, 4(RA)
  1812. + |.endif
  1813. | add PC, PC, TMP2
  1814. |1:
  1815. }
  1816. @@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCO
  1817. case BC_MOV:
  1818. | // RA = dst*8, RD = src*8
  1819. | ins_next1
  1820. + |.if FPU
  1821. | lfdx f0, BASE, RD
  1822. | stfdx f0, BASE, RA
  1823. + |.else
  1824. + | lwzux TMP0, RD, BASE
  1825. + | lwz TMP1, 4(RD)
  1826. + | stwux TMP0, RA, BASE
  1827. + | stw TMP1, 4(RA)
  1828. + |.endif
  1829. | ins_next2
  1830. break;
  1831. case BC_NOT:
  1832. @@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCO
  1833. ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
  1834. ||switch (vk) {
  1835. ||case 0:
  1836. - | lwzx TMP1, BASE, RB
  1837. + | lwzx CARG1, BASE, RB
  1838. | .if DUALNUM
  1839. - | lwzx TMP2, KBASE, RC
  1840. + | lwzx CARG3, KBASE, RC
  1841. | .endif
  1842. + | .if FPU
  1843. | lfdx f14, BASE, RB
  1844. | lfdx f15, KBASE, RC
  1845. + | .else
  1846. + | add TMP1, BASE, RB
  1847. + | add TMP2, KBASE, RC
  1848. + | lwz CARG2, 4(TMP1)
  1849. + | lwz CARG4, 4(TMP2)
  1850. + | .endif
  1851. | .if DUALNUM
  1852. - | checknum cr0, TMP1
  1853. - | checknum cr1, TMP2
  1854. + | checknum cr0, CARG1
  1855. + | checknum cr1, CARG3
  1856. | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  1857. | bge ->vmeta_arith_vn
  1858. | .else
  1859. - | checknum TMP1; bge ->vmeta_arith_vn
  1860. + | checknum CARG1; bge ->vmeta_arith_vn
  1861. | .endif
  1862. || break;
  1863. ||case 1:
  1864. - | lwzx TMP1, BASE, RB
  1865. + | lwzx CARG1, BASE, RB
  1866. | .if DUALNUM
  1867. - | lwzx TMP2, KBASE, RC
  1868. + | lwzx CARG3, KBASE, RC
  1869. | .endif
  1870. + | .if FPU
  1871. | lfdx f15, BASE, RB
  1872. | lfdx f14, KBASE, RC
  1873. + | .else
  1874. + | add TMP1, BASE, RB
  1875. + | add TMP2, KBASE, RC
  1876. + | lwz CARG2, 4(TMP1)
  1877. + | lwz CARG4, 4(TMP2)
  1878. + | .endif
  1879. | .if DUALNUM
  1880. - | checknum cr0, TMP1
  1881. - | checknum cr1, TMP2
  1882. + | checknum cr0, CARG1
  1883. + | checknum cr1, CARG3
  1884. | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  1885. | bge ->vmeta_arith_nv
  1886. | .else
  1887. - | checknum TMP1; bge ->vmeta_arith_nv
  1888. + | checknum CARG1; bge ->vmeta_arith_nv
  1889. | .endif
  1890. || break;
  1891. ||default:
  1892. - | lwzx TMP1, BASE, RB
  1893. - | lwzx TMP2, BASE, RC
  1894. + | lwzx CARG1, BASE, RB
  1895. + | lwzx CARG3, BASE, RC
  1896. + | .if FPU
  1897. | lfdx f14, BASE, RB
  1898. | lfdx f15, BASE, RC
  1899. - | checknum cr0, TMP1
  1900. - | checknum cr1, TMP2
  1901. + | .else
  1902. + | add TMP1, BASE, RB
  1903. + | add TMP2, BASE, RC
  1904. + | lwz CARG2, 4(TMP1)
  1905. + | lwz CARG4, 4(TMP2)
  1906. + | .endif
  1907. + | checknum cr0, CARG1
  1908. + | checknum cr1, CARG3
  1909. | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  1910. | bge ->vmeta_arith_vv
  1911. || break;
  1912. @@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCO
  1913. | fsub a, b, a // b - floor(b/c)*c
  1914. |.endmacro
  1915. |
  1916. + |.macro sfpmod
  1917. + |->BC_MODVN_Z:
  1918. + | stw CARG1, SFSAVE_1
  1919. + | stw CARG2, SFSAVE_2
  1920. + | mr SAVE0, CARG3
  1921. + | mr SAVE1, CARG4
  1922. + | blex __divdf3
  1923. + | blex floor
  1924. + | mr CARG3, SAVE0
  1925. + | mr CARG4, SAVE1
  1926. + | blex __muldf3
  1927. + | mr CARG3, CRET1
  1928. + | mr CARG4, CRET2
  1929. + | lwz CARG1, SFSAVE_1
  1930. + | lwz CARG2, SFSAVE_2
  1931. + | blex __subdf3
  1932. + |.endmacro
  1933. + |
  1934. |.macro ins_arithfp, fpins
  1935. | ins_arithpre
  1936. |.if "fpins" == "fpmod_"
  1937. | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
  1938. - |.else
  1939. + |.elif FPU
  1940. | fpins f0, f14, f15
  1941. | ins_next1
  1942. | stfdx f0, BASE, RA
  1943. | ins_next2
  1944. + |.else
  1945. + | blex __divdf3 // Only soft-float div uses this macro.
  1946. + | ins_next1
  1947. + | stwux CRET1, RA, BASE
  1948. + | stw CRET2, 4(RA)
  1949. + | ins_next2
  1950. |.endif
  1951. |.endmacro
  1952. |
  1953. - |.macro ins_arithdn, intins, fpins
  1954. + |.macro ins_arithdn, intins, fpins, fpcall
  1955. | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
  1956. ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
  1957. ||switch (vk) {
  1958. ||case 0:
  1959. - | lwzux TMP1, RB, BASE
  1960. - | lwzux TMP2, RC, KBASE
  1961. - | lwz CARG1, 4(RB)
  1962. - | checknum cr0, TMP1
  1963. - | lwz CARG2, 4(RC)
  1964. + | lwzux CARG1, RB, BASE
  1965. + | lwzux CARG3, RC, KBASE
  1966. + | lwz CARG2, 4(RB)
  1967. + | checknum cr0, CARG1
  1968. + | lwz CARG4, 4(RC)
  1969. + | checknum cr1, CARG3
  1970. || break;
  1971. ||case 1:
  1972. - | lwzux TMP1, RB, BASE
  1973. - | lwzux TMP2, RC, KBASE
  1974. - | lwz CARG2, 4(RB)
  1975. - | checknum cr0, TMP1
  1976. - | lwz CARG1, 4(RC)
  1977. + | lwzux CARG3, RB, BASE
  1978. + | lwzux CARG1, RC, KBASE
  1979. + | lwz CARG4, 4(RB)
  1980. + | checknum cr0, CARG3
  1981. + | lwz CARG2, 4(RC)
  1982. + | checknum cr1, CARG1
  1983. || break;
  1984. ||default:
  1985. - | lwzux TMP1, RB, BASE
  1986. - | lwzux TMP2, RC, BASE
  1987. - | lwz CARG1, 4(RB)
  1988. - | checknum cr0, TMP1
  1989. - | lwz CARG2, 4(RC)
  1990. + | lwzux CARG1, RB, BASE
  1991. + | lwzux CARG3, RC, BASE
  1992. + | lwz CARG2, 4(RB)
  1993. + | checknum cr0, CARG1
  1994. + | lwz CARG4, 4(RC)
  1995. + | checknum cr1, CARG3
  1996. || break;
  1997. ||}
  1998. - | checknum cr1, TMP2
  1999. | bne >5
  2000. | bne cr1, >5
  2001. - | intins CARG1, CARG1, CARG2
  2002. + |.if "intins" == "intmod"
  2003. + | mr CARG1, CARG2
  2004. + | mr CARG2, CARG4
  2005. + |.endif
  2006. + | intins CARG1, CARG2, CARG4
  2007. | bso >4
  2008. |1:
  2009. | ins_next1
  2010. @@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCO
  2011. | checkov TMP0, <1 // Ignore unrelated overflow.
  2012. | ins_arithfallback b
  2013. |5: // FP variant.
  2014. + |.if FPU
  2015. ||if (vk == 1) {
  2016. | lfd f15, 0(RB)
  2017. - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  2018. | lfd f14, 0(RC)
  2019. ||} else {
  2020. | lfd f14, 0(RB)
  2021. - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  2022. | lfd f15, 0(RC)
  2023. ||}
  2024. + |.endif
  2025. + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  2026. | ins_arithfallback bge
  2027. |.if "fpins" == "fpmod_"
  2028. | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
  2029. |.else
  2030. + |.if FPU
  2031. | fpins f0, f14, f15
  2032. - | ins_next1
  2033. | stfdx f0, BASE, RA
  2034. + |.else
  2035. + |.if "fpcall" == "sfpmod"
  2036. + | sfpmod
  2037. + |.else
  2038. + | blex fpcall
  2039. + |.endif
  2040. + | stwux CRET1, RA, BASE
  2041. + | stw CRET2, 4(RA)
  2042. + |.endif
  2043. + | ins_next1
  2044. | b <2
  2045. |.endif
  2046. |.endmacro
  2047. |
  2048. - |.macro ins_arith, intins, fpins
  2049. + |.macro ins_arith, intins, fpins, fpcall
  2050. |.if DUALNUM
  2051. - | ins_arithdn intins, fpins
  2052. + | ins_arithdn intins, fpins, fpcall
  2053. |.else
  2054. | ins_arithfp fpins
  2055. |.endif
  2056. @@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCO
  2057. | addo. TMP0, TMP0, TMP3
  2058. | add y, a, b
  2059. |.endmacro
  2060. - | ins_arith addo32., fadd
  2061. + | ins_arith addo32., fadd, __adddf3
  2062. |.else
  2063. - | ins_arith addo., fadd
  2064. + | ins_arith addo., fadd, __adddf3
  2065. |.endif
  2066. break;
  2067. case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
  2068. @@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCO
  2069. | subo. TMP0, TMP0, TMP3
  2070. | sub y, a, b
  2071. |.endmacro
  2072. - | ins_arith subo32., fsub
  2073. + | ins_arith subo32., fsub, __subdf3
  2074. |.else
  2075. - | ins_arith subo., fsub
  2076. + | ins_arith subo., fsub, __subdf3
  2077. |.endif
  2078. break;
  2079. case BC_MULVN: case BC_MULNV: case BC_MULVV:
  2080. - | ins_arith mullwo., fmul
  2081. + | ins_arith mullwo., fmul, __muldf3
  2082. break;
  2083. case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
  2084. | ins_arithfp fdiv
  2085. break;
  2086. case BC_MODVN:
  2087. - | ins_arith intmod, fpmod
  2088. + | ins_arith intmod, fpmod, sfpmod
  2089. break;
  2090. case BC_MODNV: case BC_MODVV:
  2091. - | ins_arith intmod, fpmod_
  2092. + | ins_arith intmod, fpmod_, sfpmod
  2093. break;
  2094. case BC_POW:
  2095. | // NYI: (partial) integer arithmetic.
  2096. - | lwzx TMP1, BASE, RB
  2097. + | lwzx CARG1, BASE, RB
  2098. + | lwzx CARG3, BASE, RC
  2099. + |.if FPU
  2100. | lfdx FARG1, BASE, RB
  2101. - | lwzx TMP2, BASE, RC
  2102. | lfdx FARG2, BASE, RC
  2103. - | checknum cr0, TMP1
  2104. - | checknum cr1, TMP2
  2105. + |.else
  2106. + | add TMP1, BASE, RB
  2107. + | add TMP2, BASE, RC
  2108. + | lwz CARG2, 4(TMP1)
  2109. + | lwz CARG4, 4(TMP2)
  2110. + |.endif
  2111. + | checknum cr0, CARG1
  2112. + | checknum cr1, CARG3
  2113. | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  2114. | bge ->vmeta_arith_vv
  2115. | blex pow
  2116. | ins_next1
  2117. + |.if FPU
  2118. | stfdx FARG1, BASE, RA
  2119. + |.else
  2120. + | stwux CARG1, RA, BASE
  2121. + | stw CARG2, 4(RA)
  2122. + |.endif
  2123. | ins_next2
  2124. break;
  2125. @@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCO
  2126. | lp BASE, L->base
  2127. | bne ->vmeta_binop
  2128. | ins_next1
  2129. + |.if FPU
  2130. | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
  2131. | stfdx f0, BASE, RA
  2132. + |.else
  2133. + | lwzux TMP0, SAVE0, BASE
  2134. + | lwz TMP1, 4(SAVE0)
  2135. + | stwux TMP0, RA, BASE
  2136. + | stw TMP1, 4(RA)
  2137. + |.endif
  2138. | ins_next2
  2139. break;
  2140. @@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCO
  2141. case BC_KNUM:
  2142. | // RA = dst*8, RD = num_const*8
  2143. | ins_next1
  2144. + |.if FPU
  2145. | lfdx f0, KBASE, RD
  2146. | stfdx f0, BASE, RA
  2147. + |.else
  2148. + | lwzux TMP0, RD, KBASE
  2149. + | lwz TMP1, 4(RD)
  2150. + | stwux TMP0, RA, BASE
  2151. + | stw TMP1, 4(RA)
  2152. + |.endif
  2153. | ins_next2
  2154. break;
  2155. case BC_KPRI:
  2156. @@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCO
  2157. | lwzx UPVAL:RB, LFUNC:RB, RD
  2158. | ins_next1
  2159. | lwz TMP1, UPVAL:RB->v
  2160. + |.if FPU
  2161. | lfd f0, 0(TMP1)
  2162. | stfdx f0, BASE, RA
  2163. + |.else
  2164. + | lwz TMP2, 0(TMP1)
  2165. + | lwz TMP3, 4(TMP1)
  2166. + | stwux TMP2, RA, BASE
  2167. + | stw TMP3, 4(RA)
  2168. + |.endif
  2169. | ins_next2
  2170. break;
  2171. case BC_USETV:
  2172. @@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCO
  2173. | lwz LFUNC:RB, FRAME_FUNC(BASE)
  2174. | srwi RA, RA, 1
  2175. | addi RA, RA, offsetof(GCfuncL, uvptr)
  2176. + |.if FPU
  2177. | lfdux f0, RD, BASE
  2178. + |.else
  2179. + | lwzux CARG1, RD, BASE
  2180. + | lwz CARG3, 4(RD)
  2181. + |.endif
  2182. | lwzx UPVAL:RB, LFUNC:RB, RA
  2183. | lbz TMP3, UPVAL:RB->marked
  2184. | lwz CARG2, UPVAL:RB->v
  2185. | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
  2186. | lbz TMP0, UPVAL:RB->closed
  2187. | lwz TMP2, 0(RD)
  2188. + |.if FPU
  2189. | stfd f0, 0(CARG2)
  2190. + |.else
  2191. + | stw CARG1, 0(CARG2)
  2192. + | stw CARG3, 4(CARG2)
  2193. + |.endif
  2194. | cmplwi cr1, TMP0, 0
  2195. | lwz TMP1, 4(RD)
  2196. | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
  2197. @@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCO
  2198. | lwz LFUNC:RB, FRAME_FUNC(BASE)
  2199. | srwi RA, RA, 1
  2200. | addi RA, RA, offsetof(GCfuncL, uvptr)
  2201. + |.if FPU
  2202. | lfdx f0, KBASE, RD
  2203. + |.else
  2204. + | lwzux TMP2, RD, KBASE
  2205. + | lwz TMP3, 4(RD)
  2206. + |.endif
  2207. | lwzx UPVAL:RB, LFUNC:RB, RA
  2208. | ins_next1
  2209. | lwz TMP1, UPVAL:RB->v
  2210. + |.if FPU
  2211. | stfd f0, 0(TMP1)
  2212. + |.else
  2213. + | stw TMP2, 0(TMP1)
  2214. + | stw TMP3, 4(TMP1)
  2215. + |.endif
  2216. | ins_next2
  2217. break;
  2218. case BC_USETP:
  2219. @@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCO
  2220. |.endif
  2221. | ble ->vmeta_tgetv // Integer key and in array part?
  2222. | lwzx TMP0, TMP1, TMP2
  2223. + |.if FPU
  2224. | lfdx f14, TMP1, TMP2
  2225. + |.else
  2226. + | lwzux SAVE0, TMP1, TMP2
  2227. + | lwz SAVE1, 4(TMP1)
  2228. + |.endif
  2229. | checknil TMP0; beq >2
  2230. |1:
  2231. | ins_next1
  2232. + |.if FPU
  2233. | stfdx f14, BASE, RA
  2234. + |.else
  2235. + | stwux SAVE0, RA, BASE
  2236. + | stw SAVE1, 4(RA)
  2237. + |.endif
  2238. | ins_next2
  2239. |
  2240. |2: // Check for __index if table value is nil.
  2241. @@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCO
  2242. | lwz TMP1, TAB:RB->asize
  2243. | lwz TMP2, TAB:RB->array
  2244. | cmplw TMP0, TMP1; bge ->vmeta_tgetb
  2245. + |.if FPU
  2246. | lwzx TMP1, TMP2, RC
  2247. | lfdx f0, TMP2, RC
  2248. + |.else
  2249. + | lwzux TMP1, TMP2, RC
  2250. + | lwz TMP3, 4(TMP2)
  2251. + |.endif
  2252. | checknil TMP1; beq >5
  2253. |1:
  2254. | ins_next1
  2255. + |.if FPU
  2256. | stfdx f0, BASE, RA
  2257. + |.else
  2258. + | stwux TMP1, RA, BASE
  2259. + | stw TMP3, 4(RA)
  2260. + |.endif
  2261. | ins_next2
  2262. |
  2263. |5: // Check for __index if table value is nil.
  2264. @@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCO
  2265. | cmplw TMP0, CARG2
  2266. | slwi TMP2, CARG2, 3
  2267. | ble ->vmeta_tgetr // In array part?
  2268. + |.if FPU
  2269. | lfdx f14, TMP1, TMP2
  2270. + |.else
  2271. + | lwzux SAVE0, TMP2, TMP1
  2272. + | lwz SAVE1, 4(TMP2)
  2273. + |.endif
  2274. |->BC_TGETR_Z:
  2275. | ins_next1
  2276. + |.if FPU
  2277. | stfdx f14, BASE, RA
  2278. + |.else
  2279. + | stwux SAVE0, RA, BASE
  2280. + | stw SAVE1, 4(RA)
  2281. + |.endif
  2282. | ins_next2
  2283. break;
  2284. @@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCO
  2285. | ble ->vmeta_tsetv // Integer key and in array part?
  2286. | lwzx TMP2, TMP1, TMP0
  2287. | lbz TMP3, TAB:RB->marked
  2288. + |.if FPU
  2289. | lfdx f14, BASE, RA
  2290. + |.else
  2291. + | add SAVE1, BASE, RA
  2292. + | lwz SAVE0, 0(SAVE1)
  2293. + | lwz SAVE1, 4(SAVE1)
  2294. + |.endif
  2295. | checknil TMP2; beq >3
  2296. |1:
  2297. | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
  2298. + |.if FPU
  2299. | stfdx f14, TMP1, TMP0
  2300. + |.else
  2301. + | stwux SAVE0, TMP1, TMP0
  2302. + | stw SAVE1, 4(TMP1)
  2303. + |.endif
  2304. | bne >7
  2305. |2:
  2306. | ins_next
  2307. @@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCO
  2308. | lwz NODE:TMP2, TAB:RB->node
  2309. | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
  2310. | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
  2311. + |.if FPU
  2312. | lfdx f14, BASE, RA
  2313. + |.else
  2314. + | add CARG2, BASE, RA
  2315. + | lwz SAVE0, 0(CARG2)
  2316. + | lwz SAVE1, 4(CARG2)
  2317. + |.endif
  2318. | slwi TMP0, TMP1, 5
  2319. | slwi TMP1, TMP1, 3
  2320. | sub TMP1, TMP0, TMP1
  2321. @@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCO
  2322. | checknil CARG2; beq >4 // Key found, but nil value?
  2323. |2:
  2324. | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
  2325. + |.if FPU
  2326. | stfd f14, NODE:TMP2->val
  2327. + |.else
  2328. + | stw SAVE0, NODE:TMP2->val.u32.hi
  2329. + | stw SAVE1, NODE:TMP2->val.u32.lo
  2330. + |.endif
  2331. | bne >7
  2332. |3:
  2333. | ins_next
  2334. @@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCO
  2335. | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
  2336. | // Returns TValue *.
  2337. | lp BASE, L->base
  2338. + |.if FPU
  2339. | stfd f14, 0(CRET1)
  2340. + |.else
  2341. + | stw SAVE0, 0(CRET1)
  2342. + | stw SAVE1, 4(CRET1)
  2343. + |.endif
  2344. | b <3 // No 2nd write barrier needed.
  2345. |
  2346. |7: // Possible table write barrier for the value. Skip valiswhite check.
  2347. @@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCO
  2348. | lwz TMP2, TAB:RB->array
  2349. | lbz TMP3, TAB:RB->marked
  2350. | cmplw TMP0, TMP1
  2351. + |.if FPU
  2352. | lfdx f14, BASE, RA
  2353. + |.else
  2354. + | add CARG2, BASE, RA
  2355. + | lwz SAVE0, 0(CARG2)
  2356. + | lwz SAVE1, 4(CARG2)
  2357. + |.endif
  2358. | bge ->vmeta_tsetb
  2359. | lwzx TMP1, TMP2, RC
  2360. | checknil TMP1; beq >5
  2361. |1:
  2362. | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
  2363. + |.if FPU
  2364. | stfdx f14, TMP2, RC
  2365. + |.else
  2366. + | stwux SAVE0, RC, TMP2
  2367. + | stw SAVE1, 4(RC)
  2368. + |.endif
  2369. | bne >7
  2370. |2:
  2371. | ins_next
  2372. @@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCO
  2373. |2:
  2374. | cmplw TMP0, CARG3
  2375. | slwi TMP2, CARG3, 3
  2376. + |.if FPU
  2377. | lfdx f14, BASE, RA
  2378. + |.else
  2379. + | lwzux SAVE0, RA, BASE
  2380. + | lwz SAVE1, 4(RA)
  2381. + |.endif
  2382. | ble ->vmeta_tsetr // In array part?
  2383. | ins_next1
  2384. + |.if FPU
  2385. | stfdx f14, TMP1, TMP2
  2386. + |.else
  2387. + | stwux SAVE0, TMP1, TMP2
  2388. + | stw SAVE1, 4(TMP1)
  2389. + |.endif
  2390. | ins_next2
  2391. |
  2392. |7: // Possible table write barrier for the value. Skip valiswhite check.
  2393. @@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCO
  2394. | add TMP1, TMP1, TMP0
  2395. | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
  2396. |3: // Copy result slots to table.
  2397. + |.if FPU
  2398. | lfd f0, 0(RA)
  2399. + |.else
  2400. + | lwz SAVE0, 0(RA)
  2401. + | lwz SAVE1, 4(RA)
  2402. + |.endif
  2403. | addi RA, RA, 8
  2404. | cmpw cr1, RA, TMP2
  2405. + |.if FPU
  2406. | stfd f0, 0(TMP1)
  2407. + |.else
  2408. + | stw SAVE0, 0(TMP1)
  2409. + | stw SAVE1, 4(TMP1)
  2410. + |.endif
  2411. | addi TMP1, TMP1, 8
  2412. | blt cr1, <3
  2413. | bne >7
  2414. @@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCO
  2415. | beq cr1, >3
  2416. |2:
  2417. | addi TMP3, TMP2, 8
  2418. + |.if FPU
  2419. | lfdx f0, RA, TMP2
  2420. + |.else
  2421. + | add CARG3, RA, TMP2
  2422. + | lwz CARG1, 0(CARG3)
  2423. + | lwz CARG2, 4(CARG3)
  2424. + |.endif
  2425. | cmplw cr1, TMP3, NARGS8:RC
  2426. + |.if FPU
  2427. | stfdx f0, BASE, TMP2
  2428. + |.else
  2429. + | stwux CARG1, TMP2, BASE
  2430. + | stw CARG2, 4(TMP2)
  2431. + |.endif
  2432. | mr TMP2, TMP3
  2433. | bne cr1, <2
  2434. |3:
  2435. @@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCO
  2436. | add BASE, BASE, RA
  2437. | lwz TMP1, -24(BASE)
  2438. | lwz LFUNC:RB, -20(BASE)
  2439. + |.if FPU
  2440. | lfd f1, -8(BASE)
  2441. | lfd f0, -16(BASE)
  2442. + |.else
  2443. + | lwz CARG1, -8(BASE)
  2444. + | lwz CARG2, -4(BASE)
  2445. + | lwz CARG3, -16(BASE)
  2446. + | lwz CARG4, -12(BASE)
  2447. + |.endif
  2448. | stw TMP1, 0(BASE) // Copy callable.
  2449. | stw LFUNC:RB, 4(BASE)
  2450. | checkfunc TMP1
  2451. - | stfd f1, 16(BASE) // Copy control var.
  2452. | li NARGS8:RC, 16 // Iterators get 2 arguments.
  2453. + |.if FPU
  2454. + | stfd f1, 16(BASE) // Copy control var.
  2455. | stfdu f0, 8(BASE) // Copy state.
  2456. + |.else
  2457. + | stw CARG1, 16(BASE) // Copy control var.
  2458. + | stw CARG2, 20(BASE)
  2459. + | stwu CARG3, 8(BASE) // Copy state.
  2460. + | stw CARG4, 4(BASE)
  2461. + |.endif
  2462. | bne ->vmeta_call
  2463. | ins_call
  2464. break;
  2465. @@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCO
  2466. | slwi TMP3, RC, 3
  2467. | bge >5 // Index points after array part?
  2468. | lwzx TMP2, TMP1, TMP3
  2469. + |.if FPU
  2470. | lfdx f0, TMP1, TMP3
  2471. + |.else
  2472. + | lwzux CARG1, TMP3, TMP1
  2473. + | lwz CARG2, 4(TMP3)
  2474. + |.endif
  2475. | checknil TMP2
  2476. | lwz INS, -4(PC)
  2477. | beq >4
  2478. @@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCO
  2479. |.endif
  2480. | addi RC, RC, 1
  2481. | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
  2482. + |.if FPU
  2483. | stfd f0, 8(RA)
  2484. + |.else
  2485. + | stw CARG1, 8(RA)
  2486. + | stw CARG2, 12(RA)
  2487. + |.endif
  2488. | decode_RD4 TMP1, INS
  2489. | stw RC, -4(RA) // Update control var.
  2490. | add PC, TMP1, TMP3
  2491. @@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCO
  2492. | slwi RB, RC, 3
  2493. | sub TMP3, TMP3, RB
  2494. | lwzx RB, TMP2, TMP3
  2495. + |.if FPU
  2496. | lfdx f0, TMP2, TMP3
  2497. + |.else
  2498. + | add CARG3, TMP2, TMP3
  2499. + | lwz CARG1, 0(CARG3)
  2500. + | lwz CARG2, 4(CARG3)
  2501. + |.endif
  2502. | add NODE:TMP3, TMP2, TMP3
  2503. | checknil RB
  2504. | lwz INS, -4(PC)
  2505. | beq >7
  2506. + |.if FPU
  2507. | lfd f1, NODE:TMP3->key
  2508. + |.else
  2509. + | lwz CARG3, NODE:TMP3->key.u32.hi
  2510. + | lwz CARG4, NODE:TMP3->key.u32.lo
  2511. + |.endif
  2512. | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
  2513. + |.if FPU
  2514. | stfd f0, 8(RA)
  2515. + |.else
  2516. + | stw CARG1, 8(RA)
  2517. + | stw CARG2, 12(RA)
  2518. + |.endif
  2519. | add RC, RC, TMP0
  2520. | decode_RD4 TMP1, INS
  2521. + |.if FPU
  2522. | stfd f1, 0(RA)
  2523. + |.else
  2524. + | stw CARG3, 0(RA)
  2525. + | stw CARG4, 4(RA)
  2526. + |.endif
  2527. | addi RC, RC, 1
  2528. | add PC, TMP1, TMP2
  2529. | stw RC, -4(RA) // Update control var.
  2530. @@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCO
  2531. | subi TMP2, TMP2, 16
  2532. | ble >2 // No vararg slots?
  2533. |1: // Copy vararg slots to destination slots.
  2534. + |.if FPU
  2535. | lfd f0, 0(RC)
  2536. + |.else
  2537. + | lwz CARG1, 0(RC)
  2538. + | lwz CARG2, 4(RC)
  2539. + |.endif
  2540. | addi RC, RC, 8
  2541. + |.if FPU
  2542. | stfd f0, 0(RA)
  2543. + |.else
  2544. + | stw CARG1, 0(RA)
  2545. + | stw CARG2, 4(RA)
  2546. + |.endif
  2547. | cmplw RA, TMP2
  2548. | cmplw cr1, RC, TMP3
  2549. | bge >3 // All destination slots filled?
  2550. @@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCO
  2551. | addi MULTRES, TMP1, 8
  2552. | bgt >7
  2553. |6:
  2554. + |.if FPU
  2555. | lfd f0, 0(RC)
  2556. + |.else
  2557. + | lwz CARG1, 0(RC)
  2558. + | lwz CARG2, 4(RC)
  2559. + |.endif
  2560. | addi RC, RC, 8
  2561. + |.if FPU
  2562. | stfd f0, 0(RA)
  2563. + |.else
  2564. + | stw CARG1, 0(RA)
  2565. + | stw CARG2, 4(RA)
  2566. + |.endif
  2567. | cmplw RC, TMP3
  2568. | addi RA, RA, 8
  2569. | blt <6 // More vararg slots?
  2570. @@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCO
  2571. | li TMP1, 0
  2572. |2:
  2573. | addi TMP3, TMP1, 8
  2574. + |.if FPU
  2575. | lfdx f0, RA, TMP1
  2576. + |.else
  2577. + | add CARG3, RA, TMP1
  2578. + | lwz CARG1, 0(CARG3)
  2579. + | lwz CARG2, 4(CARG3)
  2580. + |.endif
  2581. | cmpw TMP3, RC
  2582. + |.if FPU
  2583. | stfdx f0, TMP2, TMP1
  2584. + |.else
  2585. + | add CARG3, TMP2, TMP1
  2586. + | stw CARG1, 0(CARG3)
  2587. + | stw CARG2, 4(CARG3)
  2588. + |.endif
  2589. | beq >3
  2590. | addi TMP1, TMP3, 8
  2591. + |.if FPU
  2592. | lfdx f1, RA, TMP3
  2593. + |.else
  2594. + | add CARG3, RA, TMP3
  2595. + | lwz CARG1, 0(CARG3)
  2596. + | lwz CARG2, 4(CARG3)
  2597. + |.endif
  2598. | cmpw TMP1, RC
  2599. + |.if FPU
  2600. | stfdx f1, TMP2, TMP3
  2601. + |.else
  2602. + | add CARG3, TMP2, TMP3
  2603. + | stw CARG1, 0(CARG3)
  2604. + | stw CARG2, 4(CARG3)
  2605. + |.endif
  2606. | bne <2
  2607. |3:
  2608. |5:
  2609. @@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCO
  2610. | subi TMP2, BASE, 8
  2611. | decode_RB8 RB, INS
  2612. if (op == BC_RET1) {
  2613. + |.if FPU
  2614. | lfd f0, 0(RA)
  2615. | stfd f0, 0(TMP2)
  2616. + |.else
  2617. + | lwz CARG1, 0(RA)
  2618. + | lwz CARG2, 4(RA)
  2619. + | stw CARG1, 0(TMP2)
  2620. + | stw CARG2, 4(TMP2)
  2621. + |.endif
  2622. }
  2623. |5:
  2624. | cmplw RB, RD
  2625. @@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCO
  2626. |4:
  2627. | stw CARG1, FORL_IDX*8+4(RA)
  2628. } else {
  2629. - | lwz TMP3, FORL_STEP*8(RA)
  2630. + | lwz SAVE0, FORL_STEP*8(RA)
  2631. | lwz CARG3, FORL_STEP*8+4(RA)
  2632. | lwz TMP2, FORL_STOP*8(RA)
  2633. | lwz CARG2, FORL_STOP*8+4(RA)
  2634. - | cmplw cr7, TMP3, TISNUM
  2635. + | cmplw cr7, SAVE0, TISNUM
  2636. | cmplw cr1, TMP2, TISNUM
  2637. | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
  2638. | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
  2639. @@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCO
  2640. if (vk) {
  2641. |.if DUALNUM
  2642. |9: // FP loop.
  2643. + |.if FPU
  2644. | lfd f1, FORL_IDX*8(RA)
  2645. |.else
  2646. + | lwz CARG1, FORL_IDX*8(RA)
  2647. + | lwz CARG2, FORL_IDX*8+4(RA)
  2648. + |.endif
  2649. + |.else
  2650. | lfdux f1, RA, BASE
  2651. |.endif
  2652. + |.if FPU
  2653. | lfd f3, FORL_STEP*8(RA)
  2654. | lfd f2, FORL_STOP*8(RA)
  2655. - | lwz TMP3, FORL_STEP*8(RA)
  2656. | fadd f1, f1, f3
  2657. | stfd f1, FORL_IDX*8(RA)
  2658. + |.else
  2659. + | lwz CARG3, FORL_STEP*8(RA)
  2660. + | lwz CARG4, FORL_STEP*8+4(RA)
  2661. + | mr SAVE1, RD
  2662. + | blex __adddf3
  2663. + | mr RD, SAVE1
  2664. + | stw CRET1, FORL_IDX*8(RA)
  2665. + | stw CRET2, FORL_IDX*8+4(RA)
  2666. + | lwz CARG3, FORL_STOP*8(RA)
  2667. + | lwz CARG4, FORL_STOP*8+4(RA)
  2668. + |.endif
  2669. + | lwz SAVE0, FORL_STEP*8(RA)
  2670. } else {
  2671. |.if DUALNUM
  2672. |9: // FP loop.
  2673. |.else
  2674. | lwzux TMP1, RA, BASE
  2675. - | lwz TMP3, FORL_STEP*8(RA)
  2676. + | lwz SAVE0, FORL_STEP*8(RA)
  2677. | lwz TMP2, FORL_STOP*8(RA)
  2678. | cmplw cr0, TMP1, TISNUM
  2679. - | cmplw cr7, TMP3, TISNUM
  2680. + | cmplw cr7, SAVE0, TISNUM
  2681. | cmplw cr1, TMP2, TISNUM
  2682. |.endif
  2683. + |.if FPU
  2684. | lfd f1, FORL_IDX*8(RA)
  2685. + |.else
  2686. + | lwz CARG1, FORL_IDX*8(RA)
  2687. + | lwz CARG2, FORL_IDX*8+4(RA)
  2688. + |.endif
  2689. | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
  2690. | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
  2691. + |.if FPU
  2692. | lfd f2, FORL_STOP*8(RA)
  2693. + |.else
  2694. + | lwz CARG3, FORL_STOP*8(RA)
  2695. + | lwz CARG4, FORL_STOP*8+4(RA)
  2696. + |.endif
  2697. | bge ->vmeta_for
  2698. }
  2699. - | cmpwi cr6, TMP3, 0
  2700. + | cmpwi cr6, SAVE0, 0
  2701. if (op != BC_JFORL) {
  2702. | srwi RD, RD, 1
  2703. }
  2704. + |.if FPU
  2705. | stfd f1, FORL_EXT*8(RA)
  2706. + |.else
  2707. + | stw CARG1, FORL_EXT*8(RA)
  2708. + | stw CARG2, FORL_EXT*8+4(RA)
  2709. + |.endif
  2710. if (op != BC_JFORL) {
  2711. | add RD, PC, RD
  2712. }
  2713. + |.if FPU
  2714. | fcmpu cr0, f1, f2
  2715. + |.else
  2716. + | mr SAVE1, RD
  2717. + | blex __ledf2
  2718. + | cmpwi CRET1, 0
  2719. + | mr RD, SAVE1
  2720. + |.endif
  2721. if (op == BC_JFORI) {
  2722. | addis PC, RD, -(BCBIAS_J*4 >> 16)
  2723. }