You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

413 lines
14 KiB

  1. /*
  2. Public domain by Adam Langley <agl@imperialviolet.org> &
  3. Andrew M. <liquidsun@gmail.com>
  4. See: https://github.com/floodyberry/curve25519-donna
  5. 64bit integer curve25519 implementation
  6. */
  7. typedef uint64_t bignum25519[5];
  8. static const uint64_t reduce_mask_40 = ((uint64_t)1 << 40) - 1;
  9. static const uint64_t reduce_mask_51 = ((uint64_t)1 << 51) - 1;
  10. static const uint64_t reduce_mask_56 = ((uint64_t)1 << 56) - 1;
  11. /* out = in */
  12. DONNA_INLINE static void
  13. curve25519_copy(bignum25519 out, const bignum25519 in) {
  14. out[0] = in[0];
  15. out[1] = in[1];
  16. out[2] = in[2];
  17. out[3] = in[3];
  18. out[4] = in[4];
  19. }
  20. /* out = a + b */
  21. DONNA_INLINE static void
  22. curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  23. out[0] = a[0] + b[0];
  24. out[1] = a[1] + b[1];
  25. out[2] = a[2] + b[2];
  26. out[3] = a[3] + b[3];
  27. out[4] = a[4] + b[4];
  28. }
  29. /* out = a + b, where a and/or b are the result of a basic op (add,sub) */
  30. DONNA_INLINE static void
  31. curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  32. out[0] = a[0] + b[0];
  33. out[1] = a[1] + b[1];
  34. out[2] = a[2] + b[2];
  35. out[3] = a[3] + b[3];
  36. out[4] = a[4] + b[4];
  37. }
  38. DONNA_INLINE static void
  39. curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  40. uint64_t c;
  41. out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
  42. out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
  43. out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
  44. out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
  45. out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
  46. out[0] += c * 19;
  47. }
  48. /* multiples of p */
  49. static const uint64_t twoP0 = 0x0fffffffffffda;
  50. static const uint64_t twoP1234 = 0x0ffffffffffffe;
  51. static const uint64_t fourP0 = 0x1fffffffffffb4;
  52. static const uint64_t fourP1234 = 0x1ffffffffffffc;
  53. /* out = a - b */
  54. DONNA_INLINE static void
  55. curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  56. out[0] = a[0] + twoP0 - b[0];
  57. out[1] = a[1] + twoP1234 - b[1];
  58. out[2] = a[2] + twoP1234 - b[2];
  59. out[3] = a[3] + twoP1234 - b[3];
  60. out[4] = a[4] + twoP1234 - b[4];
  61. }
  62. /* out = a - b, where a and/or b are the result of a basic op (add,sub) */
  63. DONNA_INLINE static void
  64. curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  65. out[0] = a[0] + fourP0 - b[0];
  66. out[1] = a[1] + fourP1234 - b[1];
  67. out[2] = a[2] + fourP1234 - b[2];
  68. out[3] = a[3] + fourP1234 - b[3];
  69. out[4] = a[4] + fourP1234 - b[4];
  70. }
  71. DONNA_INLINE static void
  72. curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
  73. uint64_t c;
  74. out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
  75. out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
  76. out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
  77. out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
  78. out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
  79. out[0] += c * 19;
  80. }
  81. /* out = -a */
  82. DONNA_INLINE static void
  83. curve25519_neg(bignum25519 out, const bignum25519 a) {
  84. uint64_t c;
  85. out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
  86. out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
  87. out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
  88. out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
  89. out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
  90. out[0] += c * 19;
  91. }
  92. /* out = a * b */
  93. DONNA_INLINE static void
  94. curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
  95. #if !defined(HAVE_NATIVE_UINT128)
  96. uint128_t mul;
  97. #endif
  98. uint128_t t[5];
  99. uint64_t r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
  100. r0 = in[0];
  101. r1 = in[1];
  102. r2 = in[2];
  103. r3 = in[3];
  104. r4 = in[4];
  105. s0 = in2[0];
  106. s1 = in2[1];
  107. s2 = in2[2];
  108. s3 = in2[3];
  109. s4 = in2[4];
  110. #if defined(HAVE_NATIVE_UINT128)
  111. t[0] = ((uint128_t) r0) * s0;
  112. t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
  113. t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
  114. t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
  115. t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;
  116. #else
  117. mul64x64_128(t[0], r0, s0)
  118. mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)
  119. mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)
  120. mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)
  121. mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)
  122. #endif
  123. r1 *= 19;
  124. r2 *= 19;
  125. r3 *= 19;
  126. r4 *= 19;
  127. #if defined(HAVE_NATIVE_UINT128)
  128. t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
  129. t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
  130. t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
  131. t[3] += ((uint128_t) r4) * s4;
  132. #else
  133. mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)
  134. mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)
  135. mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)
  136. mul64x64_128(mul, r4, s4) add128(t[3], mul)
  137. #endif
  138. r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
  139. add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
  140. add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
  141. add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
  142. add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
  143. r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
  144. r1 += c;
  145. out[0] = r0;
  146. out[1] = r1;
  147. out[2] = r2;
  148. out[3] = r3;
  149. out[4] = r4;
  150. }
  151. DONNA_NOINLINE static void
  152. curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
  153. curve25519_mul(out, in2, in);
  154. }
  155. /* out = in^(2 * count) */
  156. DONNA_NOINLINE static void
  157. curve25519_square_times(bignum25519 out, const bignum25519 in, uint64_t count) {
  158. #if !defined(HAVE_NATIVE_UINT128)
  159. uint128_t mul;
  160. #endif
  161. uint128_t t[5];
  162. uint64_t r0,r1,r2,r3,r4,c;
  163. uint64_t d0,d1,d2,d4,d419;
  164. r0 = in[0];
  165. r1 = in[1];
  166. r2 = in[2];
  167. r3 = in[3];
  168. r4 = in[4];
  169. do {
  170. d0 = r0 * 2;
  171. d1 = r1 * 2;
  172. d2 = r2 * 2 * 19;
  173. d419 = r4 * 19;
  174. d4 = d419 * 2;
  175. #if defined(HAVE_NATIVE_UINT128)
  176. t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 ));
  177. t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
  178. t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 ));
  179. t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 ));
  180. t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 ));
  181. #else
  182. mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
  183. mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
  184. mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
  185. mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
  186. mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
  187. #endif
  188. r0 = lo128(t[0]) & reduce_mask_51;
  189. r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c;
  190. r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c;
  191. r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c;
  192. r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c;
  193. shl128(c, t[4], 13); r0 += c * 19;
  194. c = r0 >> 51; r0 &= reduce_mask_51;
  195. r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51;
  196. r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51;
  197. r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51;
  198. r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51;
  199. r0 += c * 19;
  200. } while(--count);
  201. out[0] = r0;
  202. out[1] = r1;
  203. out[2] = r2;
  204. out[3] = r3;
  205. out[4] = r4;
  206. }
  207. DONNA_INLINE static void
  208. curve25519_square(bignum25519 out, const bignum25519 in) {
  209. #if !defined(HAVE_NATIVE_UINT128)
  210. uint128_t mul;
  211. #endif
  212. uint128_t t[5];
  213. uint64_t r0,r1,r2,r3,r4,c;
  214. uint64_t d0,d1,d2,d4,d419;
  215. r0 = in[0];
  216. r1 = in[1];
  217. r2 = in[2];
  218. r3 = in[3];
  219. r4 = in[4];
  220. d0 = r0 * 2;
  221. d1 = r1 * 2;
  222. d2 = r2 * 2 * 19;
  223. d419 = r4 * 19;
  224. d4 = d419 * 2;
  225. #if defined(HAVE_NATIVE_UINT128)
  226. t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 ));
  227. t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
  228. t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 ));
  229. t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 ));
  230. t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 ));
  231. #else
  232. mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
  233. mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
  234. mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
  235. mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
  236. mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
  237. #endif
  238. r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
  239. add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
  240. add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
  241. add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
  242. add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
  243. r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
  244. r1 += c;
  245. out[0] = r0;
  246. out[1] = r1;
  247. out[2] = r2;
  248. out[3] = r3;
  249. out[4] = r4;
  250. }
  251. /* Take a little-endian, 32-byte number and expand it into polynomial form */
  252. DONNA_INLINE static void
  253. curve25519_expand(bignum25519 out, const unsigned char *in) {
  254. static const union { uint8_t b[2]; uint16_t s; } endian_check = {{1,0}};
  255. uint64_t x0,x1,x2,x3;
  256. if (endian_check.s == 1) {
  257. x0 = *(uint64_t *)(in + 0);
  258. x1 = *(uint64_t *)(in + 8);
  259. x2 = *(uint64_t *)(in + 16);
  260. x3 = *(uint64_t *)(in + 24);
  261. } else {
  262. #define F(s) \
  263. ((((uint64_t)in[s + 0]) ) | \
  264. (((uint64_t)in[s + 1]) << 8) | \
  265. (((uint64_t)in[s + 2]) << 16) | \
  266. (((uint64_t)in[s + 3]) << 24) | \
  267. (((uint64_t)in[s + 4]) << 32) | \
  268. (((uint64_t)in[s + 5]) << 40) | \
  269. (((uint64_t)in[s + 6]) << 48) | \
  270. (((uint64_t)in[s + 7]) << 56))
  271. x0 = F(0);
  272. x1 = F(8);
  273. x2 = F(16);
  274. x3 = F(24);
  275. }
  276. out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);
  277. out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);
  278. out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);
  279. out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);
  280. out[4] = x3 & reduce_mask_51;
  281. }
  282. /* Take a fully reduced polynomial form number and contract it into a
  283. * little-endian, 32-byte array
  284. */
  285. DONNA_INLINE static void
  286. curve25519_contract(unsigned char *out, const bignum25519 input) {
  287. uint64_t t[5];
  288. uint64_t f, i;
  289. t[0] = input[0];
  290. t[1] = input[1];
  291. t[2] = input[2];
  292. t[3] = input[3];
  293. t[4] = input[4];
  294. #define curve25519_contract_carry() \
  295. t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \
  296. t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \
  297. t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \
  298. t[4] += t[3] >> 51; t[3] &= reduce_mask_51;
  299. #define curve25519_contract_carry_full() curve25519_contract_carry() \
  300. t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;
  301. #define curve25519_contract_carry_final() curve25519_contract_carry() \
  302. t[4] &= reduce_mask_51;
  303. curve25519_contract_carry_full()
  304. curve25519_contract_carry_full()
  305. /* now t is between 0 and 2^255-1, properly carried. */
  306. /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
  307. t[0] += 19;
  308. curve25519_contract_carry_full()
  309. /* now between 19 and 2^255-1 in both cases, and offset by 19. */
  310. t[0] += (reduce_mask_51 + 1) - 19;
  311. t[1] += (reduce_mask_51 + 1) - 1;
  312. t[2] += (reduce_mask_51 + 1) - 1;
  313. t[3] += (reduce_mask_51 + 1) - 1;
  314. t[4] += (reduce_mask_51 + 1) - 1;
  315. /* now between 2^255 and 2^256-20, and offset by 2^255. */
  316. curve25519_contract_carry_final()
  317. #define write51full(n,shift) \
  318. f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \
  319. for (i = 0; i < 8; i++, f >>= 8) *out++ = (unsigned char)f;
  320. #define write51(n) write51full(n,13*n)
  321. write51(0)
  322. write51(1)
  323. write51(2)
  324. write51(3)
  325. }
  326. #if !defined(ED25519_GCC_64BIT_CHOOSE)
  327. /* out = (flag) ? in : out */
  328. DONNA_INLINE static void
  329. curve25519_move_conditional_bytes(uint8_t out[96], const uint8_t in[96], uint64_t flag) {
  330. const uint64_t nb = flag - 1, b = ~nb;
  331. const uint64_t *inq = (const uint64_t *)in;
  332. uint64_t *outq = (uint64_t *)out;
  333. outq[0] = (outq[0] & nb) | (inq[0] & b);
  334. outq[1] = (outq[1] & nb) | (inq[1] & b);
  335. outq[2] = (outq[2] & nb) | (inq[2] & b);
  336. outq[3] = (outq[3] & nb) | (inq[3] & b);
  337. outq[4] = (outq[4] & nb) | (inq[4] & b);
  338. outq[5] = (outq[5] & nb) | (inq[5] & b);
  339. outq[6] = (outq[6] & nb) | (inq[6] & b);
  340. outq[7] = (outq[7] & nb) | (inq[7] & b);
  341. outq[8] = (outq[8] & nb) | (inq[8] & b);
  342. outq[9] = (outq[9] & nb) | (inq[9] & b);
  343. outq[10] = (outq[10] & nb) | (inq[10] & b);
  344. outq[11] = (outq[11] & nb) | (inq[11] & b);
  345. }
  346. /* if (iswap) swap(a, b) */
  347. DONNA_INLINE static void
  348. curve25519_swap_conditional(bignum25519 a, bignum25519 b, uint64_t iswap) {
  349. const uint64_t swap = (uint64_t)(-(int64_t)iswap);
  350. uint64_t x0,x1,x2,x3,x4;
  351. x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
  352. x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
  353. x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
  354. x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
  355. x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
  356. }
  357. #endif /* ED25519_GCC_64BIT_CHOOSE */
  358. #define ED25519_64BIT_TABLES