You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

744 lines
25 KiB

  1. From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001
  2. From: Mike Pall <mike>
  3. Date: Sun, 3 Sep 2017 23:20:53 +0200
  4. Subject: [PATCH] PPC: Add soft-float support to JIT compiler backend.
  5. Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
  6. Sponsored by Cisco Systems, Inc.
  7. ---
  8. src/lj_arch.h | 1 -
  9. src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++-------
  10. 2 files changed, 278 insertions(+), 44 deletions(-)
  11. --- a/src/lj_arch.h
  12. +++ b/src/lj_arch.h
  13. @@ -273,7 +273,6 @@
  14. #endif
  15. #if LJ_ABI_SOFTFP
  16. -#define LJ_ARCH_NOJIT 1 /* NYI */
  17. #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
  18. #else
  19. #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
  20. --- a/src/lj_asm_ppc.h
  21. +++ b/src/lj_asm_ppc.h
  22. @@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as,
  23. emit_tab(as, pi, rt, left, right);
  24. }
  25. +#if !LJ_SOFTFP
  26. /* Fuse to multiply-add/sub instruction. */
  27. static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
  28. {
  29. @@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IR
  30. }
  31. return 0;
  32. }
  33. +#endif
  34. /* -- Calls --------------------------------------------------------------- */
  35. @@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, co
  36. {
  37. uint32_t n, nargs = CCI_XNARGS(ci);
  38. int32_t ofs = 8;
  39. - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
  40. + Reg gpr = REGARG_FIRSTGPR;
  41. +#if !LJ_SOFTFP
  42. + Reg fpr = REGARG_FIRSTFPR;
  43. +#endif
  44. if ((void *)ci->func)
  45. emit_call(as, (void *)ci->func);
  46. for (n = 0; n < nargs; n++) { /* Setup args. */
  47. IRRef ref = args[n];
  48. if (ref) {
  49. IRIns *ir = IR(ref);
  50. +#if !LJ_SOFTFP
  51. if (irt_isfp(ir->t)) {
  52. if (fpr <= REGARG_LASTFPR) {
  53. lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
  54. @@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, co
  55. emit_spstore(as, ir, r, ofs);
  56. ofs += irt_isnum(ir->t) ? 8 : 4;
  57. }
  58. - } else {
  59. + } else
  60. +#endif
  61. + {
  62. if (gpr <= REGARG_LASTGPR) {
  63. lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
  64. ra_leftov(as, gpr, ref);
  65. @@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, co
  66. }
  67. checkmclim(as);
  68. }
  69. +#if !LJ_SOFTFP
  70. if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
  71. emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
  72. +#endif
  73. }
  74. /* Setup result reg/sp for call. Evict scratch regs. */
  75. @@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as
  76. {
  77. RegSet drop = RSET_SCRATCH;
  78. int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
  79. +#if !LJ_SOFTFP
  80. if ((ci->flags & CCI_NOFPRCLOBBER))
  81. drop &= ~RSET_FPR;
  82. +#endif
  83. if (ra_hasreg(ir->r))
  84. rset_clear(drop, ir->r); /* Dest reg handled below. */
  85. if (hiop && ra_hasreg((ir+1)->r))
  86. @@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as
  87. ra_evictset(as, drop); /* Evictions must be performed first. */
  88. if (ra_used(ir)) {
  89. lua_assert(!irt_ispri(ir->t));
  90. - if (irt_isfp(ir->t)) {
  91. + if (!LJ_SOFTFP && irt_isfp(ir->t)) {
  92. if ((ci->flags & CCI_CASTU64)) {
  93. /* Use spill slot or temp slots. */
  94. int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
  95. @@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns
  96. /* -- Type conversions ---------------------------------------------------- */
  97. +#if !LJ_SOFTFP
  98. static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
  99. {
  100. RegSet allow = RSET_FPR;
  101. @@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIn
  102. emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
  103. emit_fab(as, PPCI_FADD, tmp, left, right);
  104. }
  105. +#endif
  106. static void asm_conv(ASMState *as, IRIns *ir)
  107. {
  108. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  109. +#if !LJ_SOFTFP
  110. int stfp = (st == IRT_NUM || st == IRT_FLOAT);
  111. +#endif
  112. IRRef lref = ir->op1;
  113. - lua_assert(irt_type(ir->t) != st);
  114. lua_assert(!(irt_isint64(ir->t) ||
  115. (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
  116. +#if LJ_SOFTFP
  117. + /* FP conversions are handled by SPLIT. */
  118. + lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
  119. + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
  120. +#else
  121. + lua_assert(irt_type(ir->t) != st);
  122. if (irt_isfp(ir->t)) {
  123. Reg dest = ra_dest(as, ir, RSET_FPR);
  124. if (stfp) { /* FP to FP conversion. */
  125. @@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns
  126. emit_fb(as, PPCI_FCTIWZ, tmp, left);
  127. }
  128. }
  129. - } else {
  130. + } else
  131. +#endif
  132. + {
  133. Reg dest = ra_dest(as, ir, RSET_GPR);
  134. if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
  135. Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  136. @@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIn
  137. {
  138. const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
  139. IRRef args[2];
  140. - int32_t ofs;
  141. + int32_t ofs = SPOFS_TMP;
  142. +#if LJ_SOFTFP
  143. + ra_evictset(as, RSET_SCRATCH);
  144. + if (ra_used(ir)) {
  145. + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
  146. + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
  147. + int i;
  148. + for (i = 0; i < 2; i++) {
  149. + Reg r = (ir+i)->r;
  150. + if (ra_hasreg(r)) {
  151. + ra_free(as, r);
  152. + ra_modified(as, r);
  153. + emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
  154. + }
  155. + }
  156. + ofs = sps_scale(ir->s & ~1);
  157. + } else {
  158. + Reg rhi = ra_dest(as, ir+1, RSET_GPR);
  159. + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
  160. + emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
  161. + emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
  162. + }
  163. + }
  164. +#else
  165. RegSet drop = RSET_SCRATCH;
  166. if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
  167. ra_evictset(as, drop);
  168. + if (ir->s) ofs = sps_scale(ir->s);
  169. +#endif
  170. asm_guardcc(as, CC_EQ);
  171. emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
  172. args[0] = ir->op1; /* GCstr *str */
  173. args[1] = ASMREF_TMP1; /* TValue *n */
  174. asm_gencall(as, ci, args);
  175. /* Store the result to the spill slot or temp slots. */
  176. - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
  177. emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
  178. }
  179. @@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg
  180. Reg src = ra_alloc1(as, ref, allow);
  181. emit_setgl(as, src, tmptv.gcr);
  182. }
  183. - type = ra_allock(as, irt_toitype(ir->t), allow);
  184. + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
  185. + type = ra_alloc1(as, ref+1, allow);
  186. + else
  187. + type = ra_allock(as, irt_toitype(ir->t), allow);
  188. emit_setgl(as, type, tmptv.it);
  189. }
  190. }
  191. @@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns
  192. Reg tisnum = RID_NONE, tmpnum = RID_NONE;
  193. IRRef refkey = ir->op2;
  194. IRIns *irkey = IR(refkey);
  195. + int isk = irref_isk(refkey);
  196. IRType1 kt = irkey->t;
  197. uint32_t khash;
  198. MCLabel l_end, l_loop, l_next;
  199. rset_clear(allow, tab);
  200. +#if LJ_SOFTFP
  201. + if (!isk) {
  202. + key = ra_alloc1(as, refkey, allow);
  203. + rset_clear(allow, key);
  204. + if (irkey[1].o == IR_HIOP) {
  205. + if (ra_hasreg((irkey+1)->r)) {
  206. + tmpnum = (irkey+1)->r;
  207. + ra_noweak(as, tmpnum);
  208. + } else {
  209. + tmpnum = ra_allocref(as, refkey+1, allow);
  210. + }
  211. + rset_clear(allow, tmpnum);
  212. + }
  213. + }
  214. +#else
  215. if (irt_isnum(kt)) {
  216. key = ra_alloc1(as, refkey, RSET_FPR);
  217. tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
  218. @@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns
  219. key = ra_alloc1(as, refkey, allow);
  220. rset_clear(allow, key);
  221. }
  222. +#endif
  223. tmp2 = ra_scratch(as, allow);
  224. rset_clear(allow, tmp2);
  225. @@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns
  226. asm_guardcc(as, CC_EQ);
  227. else
  228. emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
  229. - if (irt_isnum(kt)) {
  230. + if (!LJ_SOFTFP && irt_isnum(kt)) {
  231. emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
  232. emit_condbranch(as, PPCI_BC, CC_GE, l_next);
  233. emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
  234. @@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns
  235. emit_ab(as, PPCI_CMPW, tmp2, key);
  236. emit_condbranch(as, PPCI_BC, CC_NE, l_next);
  237. }
  238. - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
  239. + if (LJ_SOFTFP && ra_hasreg(tmpnum))
  240. + emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
  241. + else
  242. + emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
  243. if (!irt_ispri(kt))
  244. emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
  245. }
  246. @@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns
  247. (((char *)as->mcp-(char *)l_loop) & 0xffffu);
  248. /* Load main position relative to tab->node into dest. */
  249. - khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
  250. + khash = isk ? ir_khash(irkey) : 1;
  251. if (khash == 0) {
  252. emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
  253. } else {
  254. Reg tmphash = tmp1;
  255. - if (irref_isk(refkey))
  256. + if (isk)
  257. tmphash = ra_allock(as, khash, allow);
  258. emit_tab(as, PPCI_ADD, dest, dest, tmp1);
  259. emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
  260. emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
  261. emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
  262. emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
  263. - if (irref_isk(refkey)) {
  264. + if (isk) {
  265. /* Nothing to do. */
  266. } else if (irt_isstr(kt)) {
  267. emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
  268. @@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns
  269. emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
  270. emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
  271. emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
  272. - if (irt_isnum(kt)) {
  273. + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
  274. +#if LJ_SOFTFP
  275. + emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
  276. + emit_rotlwi(as, dest, tmp1, HASH_ROT1);
  277. + emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
  278. +#else
  279. int32_t ofs = ra_spill(as, irkey);
  280. emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
  281. emit_rotlwi(as, dest, tmp1, HASH_ROT1);
  282. emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
  283. emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
  284. emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
  285. +#endif
  286. } else {
  287. emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
  288. emit_rotlwi(as, dest, tmp1, HASH_ROT1);
  289. @@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
  290. case IRT_U8: return PPCI_LBZ;
  291. case IRT_I16: return PPCI_LHA;
  292. case IRT_U16: return PPCI_LHZ;
  293. - case IRT_NUM: return PPCI_LFD;
  294. - case IRT_FLOAT: return PPCI_LFS;
  295. + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
  296. + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
  297. default: return PPCI_LWZ;
  298. }
  299. }
  300. @@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
  301. switch (irt_type(ir->t)) {
  302. case IRT_I8: case IRT_U8: return PPCI_STB;
  303. case IRT_I16: case IRT_U16: return PPCI_STH;
  304. - case IRT_NUM: return PPCI_STFD;
  305. - case IRT_FLOAT: return PPCI_STFS;
  306. + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
  307. + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
  308. default: return PPCI_STW;
  309. }
  310. }
  311. @@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRI
  312. static void asm_xload(ASMState *as, IRIns *ir)
  313. {
  314. - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
  315. + Reg dest = ra_dest(as, ir,
  316. + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
  317. lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
  318. if (irt_isi8(ir->t))
  319. emit_as(as, PPCI_EXTSB, dest, dest);
  320. @@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IR
  321. Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
  322. asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
  323. } else {
  324. - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
  325. + Reg src = ra_alloc1(as, ir->op2,
  326. + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
  327. asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
  328. rset_exclude(RSET_GPR, src), ofs);
  329. }
  330. @@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, I
  331. Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
  332. RegSet allow = RSET_GPR;
  333. int32_t ofs = AHUREF_LSX;
  334. + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
  335. + t.irt = IRT_NUM;
  336. + if (ra_used(ir+1)) {
  337. + type = ra_dest(as, ir+1, allow);
  338. + rset_clear(allow, type);
  339. + }
  340. + ofs = 0;
  341. + }
  342. if (ra_used(ir)) {
  343. - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
  344. - if (!irt_isnum(t)) ofs = 0;
  345. - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
  346. + lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
  347. + irt_isint(ir->t) || irt_isaddr(ir->t));
  348. + if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
  349. + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
  350. rset_clear(allow, dest);
  351. }
  352. idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
  353. @@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, I
  354. asm_guardcc(as, CC_GE);
  355. emit_ab(as, PPCI_CMPLW, type, tisnum);
  356. if (ra_hasreg(dest)) {
  357. - if (ofs == AHUREF_LSX) {
  358. + if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
  359. tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
  360. (idx&255)), (idx>>8)));
  361. emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
  362. } else {
  363. - emit_fai(as, PPCI_LFD, dest, idx, ofs);
  364. + emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
  365. + ofs+4*LJ_SOFTFP);
  366. }
  367. }
  368. } else {
  369. @@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, I
  370. int32_t ofs = AHUREF_LSX;
  371. if (ir->r == RID_SINK)
  372. return;
  373. - if (irt_isnum(ir->t)) {
  374. + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
  375. src = ra_alloc1(as, ir->op2, RSET_FPR);
  376. } else {
  377. if (!irt_ispri(ir->t)) {
  378. @@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, I
  379. rset_clear(allow, src);
  380. ofs = 0;
  381. }
  382. - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  383. + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
  384. + type = ra_alloc1(as, (ir+1)->op2, allow);
  385. + else
  386. + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  387. rset_clear(allow, type);
  388. }
  389. idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
  390. - if (irt_isnum(ir->t)) {
  391. + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
  392. if (ofs == AHUREF_LSX) {
  393. emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
  394. emit_slwi(as, RID_TMP, (idx>>8), 3);
  395. @@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIn
  396. IRType1 t = ir->t;
  397. Reg dest = RID_NONE, type = RID_NONE, base;
  398. RegSet allow = RSET_GPR;
  399. + int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
  400. + if (hiop)
  401. + t.irt = IRT_NUM;
  402. lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
  403. - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
  404. + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
  405. lua_assert(LJ_DUALNUM ||
  406. !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
  407. +#if LJ_SOFTFP
  408. + lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
  409. + if (hiop && ra_used(ir+1)) {
  410. + type = ra_dest(as, ir+1, allow);
  411. + rset_clear(allow, type);
  412. + }
  413. +#else
  414. if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
  415. dest = ra_scratch(as, RSET_FPR);
  416. asm_tointg(as, ir, dest);
  417. t.irt = IRT_NUM; /* Continue with a regular number type check. */
  418. - } else if (ra_used(ir)) {
  419. + } else
  420. +#endif
  421. + if (ra_used(ir)) {
  422. lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
  423. - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
  424. + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
  425. rset_clear(allow, dest);
  426. base = ra_alloc1(as, REF_BASE, allow);
  427. rset_clear(allow, base);
  428. - if ((ir->op2 & IRSLOAD_CONVERT)) {
  429. + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
  430. if (irt_isint(t)) {
  431. emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
  432. dest = ra_scratch(as, RSET_FPR);
  433. @@ -994,10 +1097,13 @@ dotypecheck:
  434. if ((ir->op2 & IRSLOAD_TYPECHECK)) {
  435. Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
  436. asm_guardcc(as, CC_GE);
  437. - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
  438. +#if !LJ_SOFTFP
  439. type = RID_TMP;
  440. +#endif
  441. + emit_ab(as, PPCI_CMPLW, type, tisnum);
  442. }
  443. - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
  444. + if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
  445. + base, ofs-(LJ_SOFTFP?0:4));
  446. } else {
  447. if ((ir->op2 & IRSLOAD_TYPECHECK)) {
  448. asm_guardcc(as, CC_NE);
  449. @@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns
  450. /* -- Arithmetic and logic operations ------------------------------------- */
  451. +#if !LJ_SOFTFP
  452. static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
  453. {
  454. Reg dest = ra_dest(as, ir, RSET_FPR);
  455. @@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRI
  456. else
  457. asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
  458. }
  459. +#endif
  460. static void asm_add(ASMState *as, IRIns *ir)
  461. {
  462. +#if !LJ_SOFTFP
  463. if (irt_isnum(ir->t)) {
  464. if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
  465. asm_fparith(as, ir, PPCI_FADD);
  466. - } else {
  467. + } else
  468. +#endif
  469. + {
  470. Reg dest = ra_dest(as, ir, RSET_GPR);
  471. Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  472. PPCIns pi;
  473. @@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns
  474. static void asm_sub(ASMState *as, IRIns *ir)
  475. {
  476. +#if !LJ_SOFTFP
  477. if (irt_isnum(ir->t)) {
  478. if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
  479. asm_fparith(as, ir, PPCI_FSUB);
  480. - } else {
  481. + } else
  482. +#endif
  483. + {
  484. PPCIns pi = PPCI_SUBF;
  485. Reg dest = ra_dest(as, ir, RSET_GPR);
  486. Reg left, right;
  487. @@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns
  488. static void asm_mul(ASMState *as, IRIns *ir)
  489. {
  490. +#if !LJ_SOFTFP
  491. if (irt_isnum(ir->t)) {
  492. asm_fparith(as, ir, PPCI_FMUL);
  493. - } else {
  494. + } else
  495. +#endif
  496. + {
  497. PPCIns pi = PPCI_MULLW;
  498. Reg dest = ra_dest(as, ir, RSET_GPR);
  499. Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  500. @@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns
  501. static void asm_neg(ASMState *as, IRIns *ir)
  502. {
  503. +#if !LJ_SOFTFP
  504. if (irt_isnum(ir->t)) {
  505. asm_fpunary(as, ir, PPCI_FNEG);
  506. - } else {
  507. + } else
  508. +#endif
  509. + {
  510. Reg dest, left;
  511. PPCIns pi = PPCI_NEG;
  512. if (as->flagmcp == as->mcp) {
  513. @@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, I
  514. PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
  515. #define asm_bror(as, ir) lua_assert(0)
  516. +#if LJ_SOFTFP
  517. +static void asm_sfpmin_max(ASMState *as, IRIns *ir)
  518. +{
  519. + CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
  520. + IRRef args[4];
  521. + MCLabel l_right, l_end;
  522. + Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
  523. + Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
  524. + Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
  525. + PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
  526. + righthi = (lefthi >> 8); lefthi &= 255;
  527. + rightlo = (leftlo >> 8); leftlo &= 255;
  528. + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
  529. + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
  530. + l_end = emit_label(as);
  531. + if (desthi != righthi) emit_mr(as, desthi, righthi);
  532. + if (destlo != rightlo) emit_mr(as, destlo, rightlo);
  533. + l_right = emit_label(as);
  534. + if (l_end != l_right) emit_jmp(as, l_end);
  535. + if (desthi != lefthi) emit_mr(as, desthi, lefthi);
  536. + if (destlo != leftlo) emit_mr(as, destlo, leftlo);
  537. + if (l_right == as->mcp+1) {
  538. + cond ^= 4; l_right = l_end; ++as->mcp;
  539. + }
  540. + emit_condbranch(as, PPCI_BC, cond, l_right);
  541. + ra_evictset(as, RSET_SCRATCH);
  542. + emit_cmpi(as, RID_RET, 1);
  543. + asm_gencall(as, &ci, args);
  544. +}
  545. +#endif
  546. +
  547. static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
  548. {
  549. - if (irt_isnum(ir->t)) {
  550. + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
  551. Reg dest = ra_dest(as, ir, RSET_FPR);
  552. Reg tmp = dest;
  553. Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  554. @@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, I
  555. static void asm_comp(ASMState *as, IRIns *ir)
  556. {
  557. PPCCC cc = asm_compmap[ir->o];
  558. - if (irt_isnum(ir->t)) {
  559. + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
  560. Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  561. right = (left >> 8); left &= 255;
  562. asm_guardcc(as, (cc >> 4));
  563. @@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns
  564. #define asm_equal(as, ir) asm_comp(as, ir)
  565. +#if LJ_SOFTFP
  566. +/* SFP comparisons. */
  567. +static void asm_sfpcomp(ASMState *as, IRIns *ir)
  568. +{
  569. + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
  570. + RegSet drop = RSET_SCRATCH;
  571. + Reg r;
  572. + IRRef args[4];
  573. + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
  574. + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
  575. +
  576. + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
  577. + if (!rset_test(as->freeset, r) &&
  578. + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
  579. + rset_clear(drop, r);
  580. + }
  581. + ra_evictset(as, drop);
  582. + asm_setupresult(as, ir, ci);
  583. + switch ((IROp)ir->o) {
  584. + case IR_ULT:
  585. + asm_guardcc(as, CC_EQ);
  586. + emit_ai(as, PPCI_CMPWI, RID_RET, 0);
  587. + case IR_ULE:
  588. + asm_guardcc(as, CC_EQ);
  589. + emit_ai(as, PPCI_CMPWI, RID_RET, 1);
  590. + break;
  591. + case IR_GE: case IR_GT:
  592. + asm_guardcc(as, CC_EQ);
  593. + emit_ai(as, PPCI_CMPWI, RID_RET, 2);
  594. + default:
  595. + asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
  596. + emit_ai(as, PPCI_CMPWI, RID_RET, 0);
  597. + break;
  598. + }
  599. + asm_gencall(as, ci, args);
  600. +}
  601. +#endif
  602. +
  603. #if LJ_HASFFI
  604. /* 64 bit integer comparisons. */
  605. static void asm_comp64(ASMState *as, IRIns *ir)
  606. @@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRI
  607. /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
  608. static void asm_hiop(ASMState *as, IRIns *ir)
  609. {
  610. -#if LJ_HASFFI
  611. +#if LJ_HASFFI || LJ_SOFTFP
  612. /* HIOP is marked as a store because it needs its own DCE logic. */
  613. int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
  614. if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
  615. if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
  616. as->curins--; /* Always skip the CONV. */
  617. +#if LJ_HASFFI && !LJ_SOFTFP
  618. if (usehi || uselo)
  619. asm_conv64(as, ir);
  620. return;
  621. +#endif
  622. } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
  623. as->curins--; /* Always skip the loword comparison. */
  624. +#if LJ_SOFTFP
  625. + if (!irt_isint(ir->t)) {
  626. + asm_sfpcomp(as, ir-1);
  627. + return;
  628. + }
  629. +#endif
  630. +#if LJ_HASFFI
  631. asm_comp64(as, ir);
  632. +#endif
  633. + return;
  634. +#if LJ_SOFTFP
  635. + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
  636. + as->curins--; /* Always skip the loword min/max. */
  637. + if (uselo || usehi)
  638. + asm_sfpmin_max(as, ir-1);
  639. return;
  640. +#endif
  641. } else if ((ir-1)->o == IR_XSTORE) {
  642. as->curins--; /* Handle both stores here. */
  643. if ((ir-1)->r != RID_SINK) {
  644. @@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns
  645. }
  646. if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
  647. switch ((ir-1)->o) {
  648. +#if LJ_HASFFI
  649. case IR_ADD: as->curins--; asm_add64(as, ir); break;
  650. case IR_SUB: as->curins--; asm_sub64(as, ir); break;
  651. case IR_NEG: as->curins--; asm_neg64(as, ir); break;
  652. +#endif
  653. +#if LJ_SOFTFP
  654. + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
  655. + case IR_STRTO:
  656. + if (!uselo)
  657. + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
  658. + break;
  659. +#endif
  660. case IR_CALLN:
  661. + case IR_CALLS:
  662. case IR_CALLXS:
  663. if (!uselo)
  664. ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
  665. break;
  666. +#if LJ_SOFTFP
  667. + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
  668. +#endif
  669. case IR_CNEWI:
  670. /* Nothing to do here. Handled by lo op itself. */
  671. break;
  672. @@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *
  673. if ((sn & SNAP_NORESTORE))
  674. continue;
  675. if (irt_isnum(ir->t)) {
  676. +#if LJ_SOFTFP
  677. + Reg tmp;
  678. + RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
  679. + lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
  680. + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
  681. + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
  682. + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
  683. + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
  684. + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
  685. +#else
  686. Reg src = ra_alloc1(as, ref, RSET_FPR);
  687. emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
  688. +#endif
  689. } else {
  690. Reg type;
  691. RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
  692. @@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *
  693. if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  694. if (s == 0) continue; /* Do not overwrite link to previous frame. */
  695. type = ra_allock(as, (int32_t)(*flinks--), allow);
  696. +#if LJ_SOFTFP
  697. + } else if ((sn & SNAP_SOFTFPNUM)) {
  698. + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
  699. +#endif
  700. } else {
  701. type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  702. }
  703. @@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState
  704. int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
  705. asm_collectargs(as, ir, ci, args);
  706. for (i = 0; i < nargs; i++)
  707. - if (args[i] && irt_isfp(IR(args[i])->t)) {
  708. + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
  709. if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
  710. } else {
  711. if (ngpr > 0) ngpr--; else nslots++;
  712. }
  713. if (nslots > as->evenspill) /* Leave room for args in stack slots. */
  714. as->evenspill = nslots;
  715. - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
  716. + return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
  717. + REGSP_HINT(RID_RET);
  718. }
  719. static void asm_setup_target(ASMState *as)