You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

361 lines
13 KiB

  1. /*
  2. Public domain by Andrew M. <liquidsun@gmail.com>
  3. */
  4. /*
  5. Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
  6. k = 32
  7. b = 1 << 8 = 256
  8. m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
  9. mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
  10. */
  11. #define bignum256modm_bits_per_limb 56
  12. #define bignum256modm_limb_size 5
  13. typedef uint64_t bignum256modm_element_t;
  14. typedef bignum256modm_element_t bignum256modm[5];
  15. static const bignum256modm modm_m = {
  16. 0x12631a5cf5d3ed,
  17. 0xf9dea2f79cd658,
  18. 0x000000000014de,
  19. 0x00000000000000,
  20. 0x00000010000000
  21. };
  22. static const bignum256modm modm_mu = {
  23. 0x9ce5a30a2c131b,
  24. 0x215d086329a7ed,
  25. 0xffffffffeb2106,
  26. 0xffffffffffffff,
  27. 0x00000fffffffff
  28. };
  29. static bignum256modm_element_t
  30. lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
  31. return (a - b) >> 63;
  32. }
  33. static void
  34. reduce256_modm(bignum256modm r) {
  35. bignum256modm t;
  36. bignum256modm_element_t b = 0, pb, mask;
  37. /* t = r - m */
  38. pb = 0;
  39. pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;
  40. pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;
  41. pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;
  42. pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;
  43. pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32));
  44. /* keep r if r was smaller than m */
  45. mask = b - 1;
  46. r[0] ^= mask & (r[0] ^ t[0]);
  47. r[1] ^= mask & (r[1] ^ t[1]);
  48. r[2] ^= mask & (r[2] ^ t[2]);
  49. r[3] ^= mask & (r[3] ^ t[3]);
  50. r[4] ^= mask & (r[4] ^ t[4]);
  51. }
  52. static void
  53. barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
  54. bignum256modm q3, r2;
  55. uint128_t c, mul;
  56. bignum256modm_element_t f, b, pb;
  57. /* q1 = x >> 248 = 264 bits = 5 56 bit elements
  58. q2 = mu * q1
  59. q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
  60. mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);
  61. mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)
  62. f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);
  63. mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)
  64. f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);
  65. mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)
  66. f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);
  67. mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)
  68. f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);
  69. mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)
  70. f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);
  71. q3[4] |= (f << 16);
  72. mul64x64_128(c, modm_m[0], q3[0])
  73. r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
  74. mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)
  75. r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
  76. mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)
  77. r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
  78. mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)
  79. r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
  80. mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)
  81. r2[4] = lo128(c) & 0x0000ffffffffff;
  82. pb = 0;
  83. pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;
  84. pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;
  85. pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;
  86. pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;
  87. pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40));
  88. reduce256_modm(r);
  89. reduce256_modm(r);
  90. }
  91. static void
  92. add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
  93. bignum256modm_element_t c;
  94. c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;
  95. c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;
  96. c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;
  97. c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;
  98. c += x[4] + y[4]; r[4] = c;
  99. reduce256_modm(r);
  100. }
  101. static void
  102. mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
  103. bignum256modm q1, r1;
  104. uint128_t c, mul;
  105. bignum256modm_element_t f;
  106. mul64x64_128(c, x[0], y[0])
  107. f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);
  108. mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul)
  109. f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);
  110. mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul)
  111. f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);
  112. mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul)
  113. f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);
  114. mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul)
  115. f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
  116. mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul)
  117. f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
  118. mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul)
  119. f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
  120. mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul)
  121. f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
  122. mul64x64_128(c, x[4], y[4]) add128_64(c, f)
  123. f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
  124. q1[4] |= (f << 32);
  125. barrett_reduce256_modm(r, q1, r1);
  126. }
  127. static void
  128. expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
  129. unsigned char work[64] = {0};
  130. bignum256modm_element_t x[16];
  131. bignum256modm q1;
  132. memcpy(work, in, len);
  133. x[0] = U8TO64_LE(work + 0);
  134. x[1] = U8TO64_LE(work + 8);
  135. x[2] = U8TO64_LE(work + 16);
  136. x[3] = U8TO64_LE(work + 24);
  137. x[4] = U8TO64_LE(work + 32);
  138. x[5] = U8TO64_LE(work + 40);
  139. x[6] = U8TO64_LE(work + 48);
  140. x[7] = U8TO64_LE(work + 56);
  141. /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
  142. out[0] = ( x[0]) & 0xffffffffffffff;
  143. out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
  144. out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
  145. out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
  146. out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;
  147. /* under 252 bits, no need to reduce */
  148. if (len < 32)
  149. return;
  150. /* q1 = x >> 248 = 264 bits */
  151. q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff;
  152. q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;
  153. q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;
  154. q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;
  155. q1[4] = ((x[ 7] >> 24) );
  156. barrett_reduce256_modm(out, q1, out);
  157. }
  158. static void
  159. expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
  160. bignum256modm_element_t x[4];
  161. x[0] = U8TO64_LE(in + 0);
  162. x[1] = U8TO64_LE(in + 8);
  163. x[2] = U8TO64_LE(in + 16);
  164. x[3] = U8TO64_LE(in + 24);
  165. out[0] = ( x[0]) & 0xffffffffffffff;
  166. out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
  167. out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
  168. out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
  169. out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff;
  170. }
  171. static void
  172. contract256_modm(unsigned char out[32], const bignum256modm in) {
  173. U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56));
  174. U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48));
  175. U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));
  176. U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));
  177. }
  178. static void
  179. contract256_window4_modm(signed char r[64], const bignum256modm in) {
  180. char carry;
  181. signed char *quads = r;
  182. bignum256modm_element_t i, j, v, m;
  183. for (i = 0; i < 5; i++) {
  184. v = in[i];
  185. m = (i == 4) ? 8 : 14;
  186. for (j = 0; j < m; j++) {
  187. *quads++ = (v & 15);
  188. v >>= 4;
  189. }
  190. }
  191. /* making it signed */
  192. carry = 0;
  193. for(i = 0; i < 63; i++) {
  194. r[i] += carry;
  195. r[i+1] += (r[i] >> 4);
  196. r[i] &= 15;
  197. carry = (r[i] >> 3);
  198. r[i] -= (carry << 4);
  199. }
  200. r[63] += carry;
  201. }
  202. static void
  203. contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
  204. int i,j,k,b;
  205. int m = (1 << (windowsize - 1)) - 1, soplen = 256;
  206. signed char *bits = r;
  207. bignum256modm_element_t v;
  208. /* first put the binary expansion into r */
  209. for (i = 0; i < 4; i++) {
  210. v = s[i];
  211. for (j = 0; j < 56; j++, v >>= 1)
  212. *bits++ = (v & 1);
  213. }
  214. v = s[4];
  215. for (j = 0; j < 32; j++, v >>= 1)
  216. *bits++ = (v & 1);
  217. /* Making it sliding window */
  218. for (j = 0; j < soplen; j++) {
  219. if (!r[j])
  220. continue;
  221. for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
  222. if ((r[j] + (r[j + b] << b)) <= m) {
  223. r[j] += r[j + b] << b;
  224. r[j + b] = 0;
  225. } else if ((r[j] - (r[j + b] << b)) >= -m) {
  226. r[j] -= r[j + b] << b;
  227. for (k = j + b; k < soplen; k++) {
  228. if (!r[k]) {
  229. r[k] = 1;
  230. break;
  231. }
  232. r[k] = 0;
  233. }
  234. } else if (r[j + b]) {
  235. break;
  236. }
  237. }
  238. }
  239. }
  240. /*
  241. helpers for batch verifcation, are allowed to be vartime
  242. */
  243. /* out = a - b, a must be larger than b */
  244. static void
  245. sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
  246. size_t i = 0;
  247. bignum256modm_element_t carry = 0;
  248. switch (limbsize) {
  249. case 4: out[i] = (a[i] - b[i]) ; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
  250. case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
  251. case 2: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
  252. case 1: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
  253. case 0:
  254. default: out[i] = (a[i] - b[i]) - carry;
  255. }
  256. }
  257. /* is a < b */
  258. static int
  259. lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
  260. size_t i = 0;
  261. bignum256modm_element_t t, carry = 0;
  262. switch (limbsize) {
  263. case 4: t = (a[i] - b[i]) ; carry = (t >> 63); i++;
  264. case 3: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
  265. case 2: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
  266. case 1: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
  267. case 0: t = (a[i] - b[i]) - carry; carry = (t >> 63);
  268. }
  269. return (int)carry;
  270. }
  271. /* is a <= b */
  272. static int
  273. lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
  274. size_t i = 0;
  275. bignum256modm_element_t t, carry = 0;
  276. switch (limbsize) {
  277. case 4: t = (b[i] - a[i]) ; carry = (t >> 63); i++;
  278. case 3: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
  279. case 2: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
  280. case 1: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
  281. case 0: t = (b[i] - a[i]) - carry; carry = (t >> 63);
  282. }
  283. return (int)!carry;
  284. }
  285. /* is a == 0 */
  286. static int
  287. iszero256_modm_batch(const bignum256modm a) {
  288. size_t i;
  289. for (i = 0; i < 5; i++)
  290. if (a[i])
  291. return 0;
  292. return 1;
  293. }
  294. /* is a == 1 */
  295. static int
  296. isone256_modm_batch(const bignum256modm a) {
  297. size_t i;
  298. for (i = 0; i < 5; i++)
  299. if (a[i] != ((i) ? 0 : 1))
  300. return 0;
  301. return 1;
  302. }
  303. /* can a fit in to (at most) 128 bits */
  304. static int
  305. isatmost128bits256_modm_batch(const bignum256modm a) {
  306. uint64_t mask =
  307. ((a[4] ) | /* 32 */
  308. (a[3] ) | /* 88 */
  309. (a[2] & 0xffffffffff0000));
  310. return (mask == 0);
  311. }