@ -0,0 +1,20 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
DATA ·REDMASK51(SB)/8, $0x0007FFFFFFFFFFFF | |||
GLOBL ·REDMASK51(SB), 8, $8 | |||
DATA ·_121666_213(SB)/8, $996687872 | |||
GLOBL ·_121666_213(SB), 8, $8 | |||
DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA | |||
GLOBL ·_2P0(SB), 8, $8 | |||
DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE | |||
GLOBL ·_2P1234(SB), 8, $8 |
@ -0,0 +1,88 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
// func cswap(inout *[5]uint64, v uint64) | |||
TEXT ·cswap(SB),7,$0 | |||
MOVQ inout+0(FP),DI | |||
MOVQ v+8(FP),SI | |||
CMPQ SI,$1 | |||
MOVQ 0(DI),SI | |||
MOVQ 80(DI),DX | |||
MOVQ 8(DI),CX | |||
MOVQ 88(DI),R8 | |||
MOVQ SI,R9 | |||
CMOVQEQ DX,SI | |||
CMOVQEQ R9,DX | |||
MOVQ CX,R9 | |||
CMOVQEQ R8,CX | |||
CMOVQEQ R9,R8 | |||
MOVQ SI,0(DI) | |||
MOVQ DX,80(DI) | |||
MOVQ CX,8(DI) | |||
MOVQ R8,88(DI) | |||
MOVQ 16(DI),SI | |||
MOVQ 96(DI),DX | |||
MOVQ 24(DI),CX | |||
MOVQ 104(DI),R8 | |||
MOVQ SI,R9 | |||
CMOVQEQ DX,SI | |||
CMOVQEQ R9,DX | |||
MOVQ CX,R9 | |||
CMOVQEQ R8,CX | |||
CMOVQEQ R9,R8 | |||
MOVQ SI,16(DI) | |||
MOVQ DX,96(DI) | |||
MOVQ CX,24(DI) | |||
MOVQ R8,104(DI) | |||
MOVQ 32(DI),SI | |||
MOVQ 112(DI),DX | |||
MOVQ 40(DI),CX | |||
MOVQ 120(DI),R8 | |||
MOVQ SI,R9 | |||
CMOVQEQ DX,SI | |||
CMOVQEQ R9,DX | |||
MOVQ CX,R9 | |||
CMOVQEQ R8,CX | |||
CMOVQEQ R9,R8 | |||
MOVQ SI,32(DI) | |||
MOVQ DX,112(DI) | |||
MOVQ CX,40(DI) | |||
MOVQ R8,120(DI) | |||
MOVQ 48(DI),SI | |||
MOVQ 128(DI),DX | |||
MOVQ 56(DI),CX | |||
MOVQ 136(DI),R8 | |||
MOVQ SI,R9 | |||
CMOVQEQ DX,SI | |||
CMOVQEQ R9,DX | |||
MOVQ CX,R9 | |||
CMOVQEQ R8,CX | |||
CMOVQEQ R9,R8 | |||
MOVQ SI,48(DI) | |||
MOVQ DX,128(DI) | |||
MOVQ CX,56(DI) | |||
MOVQ R8,136(DI) | |||
MOVQ 64(DI),SI | |||
MOVQ 144(DI),DX | |||
MOVQ 72(DI),CX | |||
MOVQ 152(DI),R8 | |||
MOVQ SI,R9 | |||
CMOVQEQ DX,SI | |||
CMOVQEQ R9,DX | |||
MOVQ CX,R9 | |||
CMOVQEQ R8,CX | |||
CMOVQEQ R9,R8 | |||
MOVQ SI,64(DI) | |||
MOVQ DX,144(DI) | |||
MOVQ CX,72(DI) | |||
MOVQ R8,152(DI) | |||
MOVQ DI,AX | |||
MOVQ SI,DX | |||
RET |
@ -0,0 +1,841 @@ | |||
// Copyright 2013 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// We have a implementation in amd64 assembly so this code is only run on | |||
// non-amd64 platforms. The amd64 assembly does not support gccgo. | |||
// +build !amd64 gccgo appengine | |||
package curve25519 | |||
// This code is a port of the public domain, "ref10" implementation of | |||
// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. | |||
// fieldElement represents an element of the field GF(2^255 - 19). An element | |||
// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 | |||
// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on | |||
// context. | |||
type fieldElement [10]int32 | |||
func feZero(fe *fieldElement) { | |||
for i := range fe { | |||
fe[i] = 0 | |||
} | |||
} | |||
func feOne(fe *fieldElement) { | |||
feZero(fe) | |||
fe[0] = 1 | |||
} | |||
func feAdd(dst, a, b *fieldElement) { | |||
for i := range dst { | |||
dst[i] = a[i] + b[i] | |||
} | |||
} | |||
func feSub(dst, a, b *fieldElement) { | |||
for i := range dst { | |||
dst[i] = a[i] - b[i] | |||
} | |||
} | |||
func feCopy(dst, src *fieldElement) { | |||
for i := range dst { | |||
dst[i] = src[i] | |||
} | |||
} | |||
// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. | |||
// | |||
// Preconditions: b in {0,1}. | |||
func feCSwap(f, g *fieldElement, b int32) { | |||
var x fieldElement | |||
b = -b | |||
for i := range x { | |||
x[i] = b & (f[i] ^ g[i]) | |||
} | |||
for i := range f { | |||
f[i] ^= x[i] | |||
} | |||
for i := range g { | |||
g[i] ^= x[i] | |||
} | |||
} | |||
// load3 reads a 24-bit, little-endian value from in. | |||
func load3(in []byte) int64 { | |||
var r int64 | |||
r = int64(in[0]) | |||
r |= int64(in[1]) << 8 | |||
r |= int64(in[2]) << 16 | |||
return r | |||
} | |||
// load4 reads a 32-bit, little-endian value from in. | |||
func load4(in []byte) int64 { | |||
var r int64 | |||
r = int64(in[0]) | |||
r |= int64(in[1]) << 8 | |||
r |= int64(in[2]) << 16 | |||
r |= int64(in[3]) << 24 | |||
return r | |||
} | |||
func feFromBytes(dst *fieldElement, src *[32]byte) { | |||
h0 := load4(src[:]) | |||
h1 := load3(src[4:]) << 6 | |||
h2 := load3(src[7:]) << 5 | |||
h3 := load3(src[10:]) << 3 | |||
h4 := load3(src[13:]) << 2 | |||
h5 := load4(src[16:]) | |||
h6 := load3(src[20:]) << 7 | |||
h7 := load3(src[23:]) << 5 | |||
h8 := load3(src[26:]) << 4 | |||
h9 := load3(src[29:]) << 2 | |||
var carry [10]int64 | |||
carry[9] = (h9 + 1<<24) >> 25 | |||
h0 += carry[9] * 19 | |||
h9 -= carry[9] << 25 | |||
carry[1] = (h1 + 1<<24) >> 25 | |||
h2 += carry[1] | |||
h1 -= carry[1] << 25 | |||
carry[3] = (h3 + 1<<24) >> 25 | |||
h4 += carry[3] | |||
h3 -= carry[3] << 25 | |||
carry[5] = (h5 + 1<<24) >> 25 | |||
h6 += carry[5] | |||
h5 -= carry[5] << 25 | |||
carry[7] = (h7 + 1<<24) >> 25 | |||
h8 += carry[7] | |||
h7 -= carry[7] << 25 | |||
carry[0] = (h0 + 1<<25) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
carry[2] = (h2 + 1<<25) >> 26 | |||
h3 += carry[2] | |||
h2 -= carry[2] << 26 | |||
carry[4] = (h4 + 1<<25) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
carry[6] = (h6 + 1<<25) >> 26 | |||
h7 += carry[6] | |||
h6 -= carry[6] << 26 | |||
carry[8] = (h8 + 1<<25) >> 26 | |||
h9 += carry[8] | |||
h8 -= carry[8] << 26 | |||
dst[0] = int32(h0) | |||
dst[1] = int32(h1) | |||
dst[2] = int32(h2) | |||
dst[3] = int32(h3) | |||
dst[4] = int32(h4) | |||
dst[5] = int32(h5) | |||
dst[6] = int32(h6) | |||
dst[7] = int32(h7) | |||
dst[8] = int32(h8) | |||
dst[9] = int32(h9) | |||
} | |||
// feToBytes marshals h to s. | |||
// Preconditions: | |||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | |||
// | |||
// Write p=2^255-19; q=floor(h/p). | |||
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). | |||
// | |||
// Proof: | |||
// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. | |||
// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. | |||
// | |||
// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). | |||
// Then 0<y<1. | |||
// | |||
// Write r=h-pq. | |||
// Have 0<=r<=p-1=2^255-20. | |||
// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. | |||
// | |||
// Write x=r+19(2^-255)r+y. | |||
// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. | |||
// | |||
// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) | |||
// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. | |||
func feToBytes(s *[32]byte, h *fieldElement) { | |||
var carry [10]int32 | |||
q := (19*h[9] + (1 << 24)) >> 25 | |||
q = (h[0] + q) >> 26 | |||
q = (h[1] + q) >> 25 | |||
q = (h[2] + q) >> 26 | |||
q = (h[3] + q) >> 25 | |||
q = (h[4] + q) >> 26 | |||
q = (h[5] + q) >> 25 | |||
q = (h[6] + q) >> 26 | |||
q = (h[7] + q) >> 25 | |||
q = (h[8] + q) >> 26 | |||
q = (h[9] + q) >> 25 | |||
// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. | |||
h[0] += 19 * q | |||
// Goal: Output h-2^255 q, which is between 0 and 2^255-20. | |||
carry[0] = h[0] >> 26 | |||
h[1] += carry[0] | |||
h[0] -= carry[0] << 26 | |||
carry[1] = h[1] >> 25 | |||
h[2] += carry[1] | |||
h[1] -= carry[1] << 25 | |||
carry[2] = h[2] >> 26 | |||
h[3] += carry[2] | |||
h[2] -= carry[2] << 26 | |||
carry[3] = h[3] >> 25 | |||
h[4] += carry[3] | |||
h[3] -= carry[3] << 25 | |||
carry[4] = h[4] >> 26 | |||
h[5] += carry[4] | |||
h[4] -= carry[4] << 26 | |||
carry[5] = h[5] >> 25 | |||
h[6] += carry[5] | |||
h[5] -= carry[5] << 25 | |||
carry[6] = h[6] >> 26 | |||
h[7] += carry[6] | |||
h[6] -= carry[6] << 26 | |||
carry[7] = h[7] >> 25 | |||
h[8] += carry[7] | |||
h[7] -= carry[7] << 25 | |||
carry[8] = h[8] >> 26 | |||
h[9] += carry[8] | |||
h[8] -= carry[8] << 26 | |||
carry[9] = h[9] >> 25 | |||
h[9] -= carry[9] << 25 | |||
// h10 = carry9 | |||
// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. | |||
// Have h[0]+...+2^230 h[9] between 0 and 2^255-1; | |||
// evidently 2^255 h10-2^255 q = 0. | |||
// Goal: Output h[0]+...+2^230 h[9]. | |||
s[0] = byte(h[0] >> 0) | |||
s[1] = byte(h[0] >> 8) | |||
s[2] = byte(h[0] >> 16) | |||
s[3] = byte((h[0] >> 24) | (h[1] << 2)) | |||
s[4] = byte(h[1] >> 6) | |||
s[5] = byte(h[1] >> 14) | |||
s[6] = byte((h[1] >> 22) | (h[2] << 3)) | |||
s[7] = byte(h[2] >> 5) | |||
s[8] = byte(h[2] >> 13) | |||
s[9] = byte((h[2] >> 21) | (h[3] << 5)) | |||
s[10] = byte(h[3] >> 3) | |||
s[11] = byte(h[3] >> 11) | |||
s[12] = byte((h[3] >> 19) | (h[4] << 6)) | |||
s[13] = byte(h[4] >> 2) | |||
s[14] = byte(h[4] >> 10) | |||
s[15] = byte(h[4] >> 18) | |||
s[16] = byte(h[5] >> 0) | |||
s[17] = byte(h[5] >> 8) | |||
s[18] = byte(h[5] >> 16) | |||
s[19] = byte((h[5] >> 24) | (h[6] << 1)) | |||
s[20] = byte(h[6] >> 7) | |||
s[21] = byte(h[6] >> 15) | |||
s[22] = byte((h[6] >> 23) | (h[7] << 3)) | |||
s[23] = byte(h[7] >> 5) | |||
s[24] = byte(h[7] >> 13) | |||
s[25] = byte((h[7] >> 21) | (h[8] << 4)) | |||
s[26] = byte(h[8] >> 4) | |||
s[27] = byte(h[8] >> 12) | |||
s[28] = byte((h[8] >> 20) | (h[9] << 6)) | |||
s[29] = byte(h[9] >> 2) | |||
s[30] = byte(h[9] >> 10) | |||
s[31] = byte(h[9] >> 18) | |||
} | |||
// feMul calculates h = f * g | |||
// Can overlap h with f or g. | |||
// | |||
// Preconditions: | |||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | |||
// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | |||
// | |||
// Postconditions: | |||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | |||
// | |||
// Notes on implementation strategy: | |||
// | |||
// Using schoolbook multiplication. | |||
// Karatsuba would save a little in some cost models. | |||
// | |||
// Most multiplications by 2 and 19 are 32-bit precomputations; | |||
// cheaper than 64-bit postcomputations. | |||
// | |||
// There is one remaining multiplication by 19 in the carry chain; | |||
// one *19 precomputation can be merged into this, | |||
// but the resulting data flow is considerably less clean. | |||
// | |||
// There are 12 carries below. | |||
// 10 of them are 2-way parallelizable and vectorizable. | |||
// Can get away with 11 carries, but then data flow is much deeper. | |||
// | |||
// With tighter constraints on inputs can squeeze carries into int32. | |||
func feMul(h, f, g *fieldElement) { | |||
f0 := f[0] | |||
f1 := f[1] | |||
f2 := f[2] | |||
f3 := f[3] | |||
f4 := f[4] | |||
f5 := f[5] | |||
f6 := f[6] | |||
f7 := f[7] | |||
f8 := f[8] | |||
f9 := f[9] | |||
g0 := g[0] | |||
g1 := g[1] | |||
g2 := g[2] | |||
g3 := g[3] | |||
g4 := g[4] | |||
g5 := g[5] | |||
g6 := g[6] | |||
g7 := g[7] | |||
g8 := g[8] | |||
g9 := g[9] | |||
g1_19 := 19 * g1 // 1.4*2^29 | |||
g2_19 := 19 * g2 // 1.4*2^30; still ok | |||
g3_19 := 19 * g3 | |||
g4_19 := 19 * g4 | |||
g5_19 := 19 * g5 | |||
g6_19 := 19 * g6 | |||
g7_19 := 19 * g7 | |||
g8_19 := 19 * g8 | |||
g9_19 := 19 * g9 | |||
f1_2 := 2 * f1 | |||
f3_2 := 2 * f3 | |||
f5_2 := 2 * f5 | |||
f7_2 := 2 * f7 | |||
f9_2 := 2 * f9 | |||
f0g0 := int64(f0) * int64(g0) | |||
f0g1 := int64(f0) * int64(g1) | |||
f0g2 := int64(f0) * int64(g2) | |||
f0g3 := int64(f0) * int64(g3) | |||
f0g4 := int64(f0) * int64(g4) | |||
f0g5 := int64(f0) * int64(g5) | |||
f0g6 := int64(f0) * int64(g6) | |||
f0g7 := int64(f0) * int64(g7) | |||
f0g8 := int64(f0) * int64(g8) | |||
f0g9 := int64(f0) * int64(g9) | |||
f1g0 := int64(f1) * int64(g0) | |||
f1g1_2 := int64(f1_2) * int64(g1) | |||
f1g2 := int64(f1) * int64(g2) | |||
f1g3_2 := int64(f1_2) * int64(g3) | |||
f1g4 := int64(f1) * int64(g4) | |||
f1g5_2 := int64(f1_2) * int64(g5) | |||
f1g6 := int64(f1) * int64(g6) | |||
f1g7_2 := int64(f1_2) * int64(g7) | |||
f1g8 := int64(f1) * int64(g8) | |||
f1g9_38 := int64(f1_2) * int64(g9_19) | |||
f2g0 := int64(f2) * int64(g0) | |||
f2g1 := int64(f2) * int64(g1) | |||
f2g2 := int64(f2) * int64(g2) | |||
f2g3 := int64(f2) * int64(g3) | |||
f2g4 := int64(f2) * int64(g4) | |||
f2g5 := int64(f2) * int64(g5) | |||
f2g6 := int64(f2) * int64(g6) | |||
f2g7 := int64(f2) * int64(g7) | |||
f2g8_19 := int64(f2) * int64(g8_19) | |||
f2g9_19 := int64(f2) * int64(g9_19) | |||
f3g0 := int64(f3) * int64(g0) | |||
f3g1_2 := int64(f3_2) * int64(g1) | |||
f3g2 := int64(f3) * int64(g2) | |||
f3g3_2 := int64(f3_2) * int64(g3) | |||
f3g4 := int64(f3) * int64(g4) | |||
f3g5_2 := int64(f3_2) * int64(g5) | |||
f3g6 := int64(f3) * int64(g6) | |||
f3g7_38 := int64(f3_2) * int64(g7_19) | |||
f3g8_19 := int64(f3) * int64(g8_19) | |||
f3g9_38 := int64(f3_2) * int64(g9_19) | |||
f4g0 := int64(f4) * int64(g0) | |||
f4g1 := int64(f4) * int64(g1) | |||
f4g2 := int64(f4) * int64(g2) | |||
f4g3 := int64(f4) * int64(g3) | |||
f4g4 := int64(f4) * int64(g4) | |||
f4g5 := int64(f4) * int64(g5) | |||
f4g6_19 := int64(f4) * int64(g6_19) | |||
f4g7_19 := int64(f4) * int64(g7_19) | |||
f4g8_19 := int64(f4) * int64(g8_19) | |||
f4g9_19 := int64(f4) * int64(g9_19) | |||
f5g0 := int64(f5) * int64(g0) | |||
f5g1_2 := int64(f5_2) * int64(g1) | |||
f5g2 := int64(f5) * int64(g2) | |||
f5g3_2 := int64(f5_2) * int64(g3) | |||
f5g4 := int64(f5) * int64(g4) | |||
f5g5_38 := int64(f5_2) * int64(g5_19) | |||
f5g6_19 := int64(f5) * int64(g6_19) | |||
f5g7_38 := int64(f5_2) * int64(g7_19) | |||
f5g8_19 := int64(f5) * int64(g8_19) | |||
f5g9_38 := int64(f5_2) * int64(g9_19) | |||
f6g0 := int64(f6) * int64(g0) | |||
f6g1 := int64(f6) * int64(g1) | |||
f6g2 := int64(f6) * int64(g2) | |||
f6g3 := int64(f6) * int64(g3) | |||
f6g4_19 := int64(f6) * int64(g4_19) | |||
f6g5_19 := int64(f6) * int64(g5_19) | |||
f6g6_19 := int64(f6) * int64(g6_19) | |||
f6g7_19 := int64(f6) * int64(g7_19) | |||
f6g8_19 := int64(f6) * int64(g8_19) | |||
f6g9_19 := int64(f6) * int64(g9_19) | |||
f7g0 := int64(f7) * int64(g0) | |||
f7g1_2 := int64(f7_2) * int64(g1) | |||
f7g2 := int64(f7) * int64(g2) | |||
f7g3_38 := int64(f7_2) * int64(g3_19) | |||
f7g4_19 := int64(f7) * int64(g4_19) | |||
f7g5_38 := int64(f7_2) * int64(g5_19) | |||
f7g6_19 := int64(f7) * int64(g6_19) | |||
f7g7_38 := int64(f7_2) * int64(g7_19) | |||
f7g8_19 := int64(f7) * int64(g8_19) | |||
f7g9_38 := int64(f7_2) * int64(g9_19) | |||
f8g0 := int64(f8) * int64(g0) | |||
f8g1 := int64(f8) * int64(g1) | |||
f8g2_19 := int64(f8) * int64(g2_19) | |||
f8g3_19 := int64(f8) * int64(g3_19) | |||
f8g4_19 := int64(f8) * int64(g4_19) | |||
f8g5_19 := int64(f8) * int64(g5_19) | |||
f8g6_19 := int64(f8) * int64(g6_19) | |||
f8g7_19 := int64(f8) * int64(g7_19) | |||
f8g8_19 := int64(f8) * int64(g8_19) | |||
f8g9_19 := int64(f8) * int64(g9_19) | |||
f9g0 := int64(f9) * int64(g0) | |||
f9g1_38 := int64(f9_2) * int64(g1_19) | |||
f9g2_19 := int64(f9) * int64(g2_19) | |||
f9g3_38 := int64(f9_2) * int64(g3_19) | |||
f9g4_19 := int64(f9) * int64(g4_19) | |||
f9g5_38 := int64(f9_2) * int64(g5_19) | |||
f9g6_19 := int64(f9) * int64(g6_19) | |||
f9g7_38 := int64(f9_2) * int64(g7_19) | |||
f9g8_19 := int64(f9) * int64(g8_19) | |||
f9g9_38 := int64(f9_2) * int64(g9_19) | |||
h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 | |||
h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 | |||
h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 | |||
h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 | |||
h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 | |||
h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 | |||
h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 | |||
h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 | |||
h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 | |||
h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 | |||
var carry [10]int64 | |||
// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) | |||
// i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 | |||
// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) | |||
// i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 | |||
carry[0] = (h0 + (1 << 25)) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
carry[4] = (h4 + (1 << 25)) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
// |h0| <= 2^25 | |||
// |h4| <= 2^25 | |||
// |h1| <= 1.51*2^58 | |||
// |h5| <= 1.51*2^58 | |||
carry[1] = (h1 + (1 << 24)) >> 25 | |||
h2 += carry[1] | |||
h1 -= carry[1] << 25 | |||
carry[5] = (h5 + (1 << 24)) >> 25 | |||
h6 += carry[5] | |||
h5 -= carry[5] << 25 | |||
// |h1| <= 2^24; from now on fits into int32 | |||
// |h5| <= 2^24; from now on fits into int32 | |||
// |h2| <= 1.21*2^59 | |||
// |h6| <= 1.21*2^59 | |||
carry[2] = (h2 + (1 << 25)) >> 26 | |||
h3 += carry[2] | |||
h2 -= carry[2] << 26 | |||
carry[6] = (h6 + (1 << 25)) >> 26 | |||
h7 += carry[6] | |||
h6 -= carry[6] << 26 | |||
// |h2| <= 2^25; from now on fits into int32 unchanged | |||
// |h6| <= 2^25; from now on fits into int32 unchanged | |||
// |h3| <= 1.51*2^58 | |||
// |h7| <= 1.51*2^58 | |||
carry[3] = (h3 + (1 << 24)) >> 25 | |||
h4 += carry[3] | |||
h3 -= carry[3] << 25 | |||
carry[7] = (h7 + (1 << 24)) >> 25 | |||
h8 += carry[7] | |||
h7 -= carry[7] << 25 | |||
// |h3| <= 2^24; from now on fits into int32 unchanged | |||
// |h7| <= 2^24; from now on fits into int32 unchanged | |||
// |h4| <= 1.52*2^33 | |||
// |h8| <= 1.52*2^33 | |||
carry[4] = (h4 + (1 << 25)) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
carry[8] = (h8 + (1 << 25)) >> 26 | |||
h9 += carry[8] | |||
h8 -= carry[8] << 26 | |||
// |h4| <= 2^25; from now on fits into int32 unchanged | |||
// |h8| <= 2^25; from now on fits into int32 unchanged | |||
// |h5| <= 1.01*2^24 | |||
// |h9| <= 1.51*2^58 | |||
carry[9] = (h9 + (1 << 24)) >> 25 | |||
h0 += carry[9] * 19 | |||
h9 -= carry[9] << 25 | |||
// |h9| <= 2^24; from now on fits into int32 unchanged | |||
// |h0| <= 1.8*2^37 | |||
carry[0] = (h0 + (1 << 25)) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
// |h0| <= 2^25; from now on fits into int32 unchanged | |||
// |h1| <= 1.01*2^24 | |||
h[0] = int32(h0) | |||
h[1] = int32(h1) | |||
h[2] = int32(h2) | |||
h[3] = int32(h3) | |||
h[4] = int32(h4) | |||
h[5] = int32(h5) | |||
h[6] = int32(h6) | |||
h[7] = int32(h7) | |||
h[8] = int32(h8) | |||
h[9] = int32(h9) | |||
} | |||
// feSquare calculates h = f*f. Can overlap h with f. | |||
// | |||
// Preconditions: | |||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | |||
// | |||
// Postconditions: | |||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | |||
func feSquare(h, f *fieldElement) { | |||
f0 := f[0] | |||
f1 := f[1] | |||
f2 := f[2] | |||
f3 := f[3] | |||
f4 := f[4] | |||
f5 := f[5] | |||
f6 := f[6] | |||
f7 := f[7] | |||
f8 := f[8] | |||
f9 := f[9] | |||
f0_2 := 2 * f0 | |||
f1_2 := 2 * f1 | |||
f2_2 := 2 * f2 | |||
f3_2 := 2 * f3 | |||
f4_2 := 2 * f4 | |||
f5_2 := 2 * f5 | |||
f6_2 := 2 * f6 | |||
f7_2 := 2 * f7 | |||
f5_38 := 38 * f5 // 1.31*2^30 | |||
f6_19 := 19 * f6 // 1.31*2^30 | |||
f7_38 := 38 * f7 // 1.31*2^30 | |||
f8_19 := 19 * f8 // 1.31*2^30 | |||
f9_38 := 38 * f9 // 1.31*2^30 | |||
f0f0 := int64(f0) * int64(f0) | |||
f0f1_2 := int64(f0_2) * int64(f1) | |||
f0f2_2 := int64(f0_2) * int64(f2) | |||
f0f3_2 := int64(f0_2) * int64(f3) | |||
f0f4_2 := int64(f0_2) * int64(f4) | |||
f0f5_2 := int64(f0_2) * int64(f5) | |||
f0f6_2 := int64(f0_2) * int64(f6) | |||
f0f7_2 := int64(f0_2) * int64(f7) | |||
f0f8_2 := int64(f0_2) * int64(f8) | |||
f0f9_2 := int64(f0_2) * int64(f9) | |||
f1f1_2 := int64(f1_2) * int64(f1) | |||
f1f2_2 := int64(f1_2) * int64(f2) | |||
f1f3_4 := int64(f1_2) * int64(f3_2) | |||
f1f4_2 := int64(f1_2) * int64(f4) | |||
f1f5_4 := int64(f1_2) * int64(f5_2) | |||
f1f6_2 := int64(f1_2) * int64(f6) | |||
f1f7_4 := int64(f1_2) * int64(f7_2) | |||
f1f8_2 := int64(f1_2) * int64(f8) | |||
f1f9_76 := int64(f1_2) * int64(f9_38) | |||
f2f2 := int64(f2) * int64(f2) | |||
f2f3_2 := int64(f2_2) * int64(f3) | |||
f2f4_2 := int64(f2_2) * int64(f4) | |||
f2f5_2 := int64(f2_2) * int64(f5) | |||
f2f6_2 := int64(f2_2) * int64(f6) | |||
f2f7_2 := int64(f2_2) * int64(f7) | |||
f2f8_38 := int64(f2_2) * int64(f8_19) | |||
f2f9_38 := int64(f2) * int64(f9_38) | |||
f3f3_2 := int64(f3_2) * int64(f3) | |||
f3f4_2 := int64(f3_2) * int64(f4) | |||
f3f5_4 := int64(f3_2) * int64(f5_2) | |||
f3f6_2 := int64(f3_2) * int64(f6) | |||
f3f7_76 := int64(f3_2) * int64(f7_38) | |||
f3f8_38 := int64(f3_2) * int64(f8_19) | |||
f3f9_76 := int64(f3_2) * int64(f9_38) | |||
f4f4 := int64(f4) * int64(f4) | |||
f4f5_2 := int64(f4_2) * int64(f5) | |||
f4f6_38 := int64(f4_2) * int64(f6_19) | |||
f4f7_38 := int64(f4) * int64(f7_38) | |||
f4f8_38 := int64(f4_2) * int64(f8_19) | |||
f4f9_38 := int64(f4) * int64(f9_38) | |||
f5f5_38 := int64(f5) * int64(f5_38) | |||
f5f6_38 := int64(f5_2) * int64(f6_19) | |||
f5f7_76 := int64(f5_2) * int64(f7_38) | |||
f5f8_38 := int64(f5_2) * int64(f8_19) | |||
f5f9_76 := int64(f5_2) * int64(f9_38) | |||
f6f6_19 := int64(f6) * int64(f6_19) | |||
f6f7_38 := int64(f6) * int64(f7_38) | |||
f6f8_38 := int64(f6_2) * int64(f8_19) | |||
f6f9_38 := int64(f6) * int64(f9_38) | |||
f7f7_38 := int64(f7) * int64(f7_38) | |||
f7f8_38 := int64(f7_2) * int64(f8_19) | |||
f7f9_76 := int64(f7_2) * int64(f9_38) | |||
f8f8_19 := int64(f8) * int64(f8_19) | |||
f8f9_38 := int64(f8) * int64(f9_38) | |||
f9f9_38 := int64(f9) * int64(f9_38) | |||
h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 | |||
h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 | |||
h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 | |||
h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 | |||
h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 | |||
h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 | |||
h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 | |||
h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 | |||
h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 | |||
h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 | |||
var carry [10]int64 | |||
carry[0] = (h0 + (1 << 25)) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
carry[4] = (h4 + (1 << 25)) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
carry[1] = (h1 + (1 << 24)) >> 25 | |||
h2 += carry[1] | |||
h1 -= carry[1] << 25 | |||
carry[5] = (h5 + (1 << 24)) >> 25 | |||
h6 += carry[5] | |||
h5 -= carry[5] << 25 | |||
carry[2] = (h2 + (1 << 25)) >> 26 | |||
h3 += carry[2] | |||
h2 -= carry[2] << 26 | |||
carry[6] = (h6 + (1 << 25)) >> 26 | |||
h7 += carry[6] | |||
h6 -= carry[6] << 26 | |||
carry[3] = (h3 + (1 << 24)) >> 25 | |||
h4 += carry[3] | |||
h3 -= carry[3] << 25 | |||
carry[7] = (h7 + (1 << 24)) >> 25 | |||
h8 += carry[7] | |||
h7 -= carry[7] << 25 | |||
carry[4] = (h4 + (1 << 25)) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
carry[8] = (h8 + (1 << 25)) >> 26 | |||
h9 += carry[8] | |||
h8 -= carry[8] << 26 | |||
carry[9] = (h9 + (1 << 24)) >> 25 | |||
h0 += carry[9] * 19 | |||
h9 -= carry[9] << 25 | |||
carry[0] = (h0 + (1 << 25)) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
h[0] = int32(h0) | |||
h[1] = int32(h1) | |||
h[2] = int32(h2) | |||
h[3] = int32(h3) | |||
h[4] = int32(h4) | |||
h[5] = int32(h5) | |||
h[6] = int32(h6) | |||
h[7] = int32(h7) | |||
h[8] = int32(h8) | |||
h[9] = int32(h9) | |||
} | |||
// feMul121666 calculates h = f * 121666. Can overlap h with f. | |||
// | |||
// Preconditions: | |||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | |||
// | |||
// Postconditions: | |||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | |||
func feMul121666(h, f *fieldElement) { | |||
h0 := int64(f[0]) * 121666 | |||
h1 := int64(f[1]) * 121666 | |||
h2 := int64(f[2]) * 121666 | |||
h3 := int64(f[3]) * 121666 | |||
h4 := int64(f[4]) * 121666 | |||
h5 := int64(f[5]) * 121666 | |||
h6 := int64(f[6]) * 121666 | |||
h7 := int64(f[7]) * 121666 | |||
h8 := int64(f[8]) * 121666 | |||
h9 := int64(f[9]) * 121666 | |||
var carry [10]int64 | |||
carry[9] = (h9 + (1 << 24)) >> 25 | |||
h0 += carry[9] * 19 | |||
h9 -= carry[9] << 25 | |||
carry[1] = (h1 + (1 << 24)) >> 25 | |||
h2 += carry[1] | |||
h1 -= carry[1] << 25 | |||
carry[3] = (h3 + (1 << 24)) >> 25 | |||
h4 += carry[3] | |||
h3 -= carry[3] << 25 | |||
carry[5] = (h5 + (1 << 24)) >> 25 | |||
h6 += carry[5] | |||
h5 -= carry[5] << 25 | |||
carry[7] = (h7 + (1 << 24)) >> 25 | |||
h8 += carry[7] | |||
h7 -= carry[7] << 25 | |||
carry[0] = (h0 + (1 << 25)) >> 26 | |||
h1 += carry[0] | |||
h0 -= carry[0] << 26 | |||
carry[2] = (h2 + (1 << 25)) >> 26 | |||
h3 += carry[2] | |||
h2 -= carry[2] << 26 | |||
carry[4] = (h4 + (1 << 25)) >> 26 | |||
h5 += carry[4] | |||
h4 -= carry[4] << 26 | |||
carry[6] = (h6 + (1 << 25)) >> 26 | |||
h7 += carry[6] | |||
h6 -= carry[6] << 26 | |||
carry[8] = (h8 + (1 << 25)) >> 26 | |||
h9 += carry[8] | |||
h8 -= carry[8] << 26 | |||
h[0] = int32(h0) | |||
h[1] = int32(h1) | |||
h[2] = int32(h2) | |||
h[3] = int32(h3) | |||
h[4] = int32(h4) | |||
h[5] = int32(h5) | |||
h[6] = int32(h6) | |||
h[7] = int32(h7) | |||
h[8] = int32(h8) | |||
h[9] = int32(h9) | |||
} | |||
// feInvert sets out = z^-1. | |||
func feInvert(out, z *fieldElement) { | |||
var t0, t1, t2, t3 fieldElement | |||
var i int | |||
feSquare(&t0, z) | |||
for i = 1; i < 1; i++ { | |||
feSquare(&t0, &t0) | |||
} | |||
feSquare(&t1, &t0) | |||
for i = 1; i < 2; i++ { | |||
feSquare(&t1, &t1) | |||
} | |||
feMul(&t1, z, &t1) | |||
feMul(&t0, &t0, &t1) | |||
feSquare(&t2, &t0) | |||
for i = 1; i < 1; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t1, &t1, &t2) | |||
feSquare(&t2, &t1) | |||
for i = 1; i < 5; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t1, &t2, &t1) | |||
feSquare(&t2, &t1) | |||
for i = 1; i < 10; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t2, &t2, &t1) | |||
feSquare(&t3, &t2) | |||
for i = 1; i < 20; i++ { | |||
feSquare(&t3, &t3) | |||
} | |||
feMul(&t2, &t3, &t2) | |||
feSquare(&t2, &t2) | |||
for i = 1; i < 10; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t1, &t2, &t1) | |||
feSquare(&t2, &t1) | |||
for i = 1; i < 50; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t2, &t2, &t1) | |||
feSquare(&t3, &t2) | |||
for i = 1; i < 100; i++ { | |||
feSquare(&t3, &t3) | |||
} | |||
feMul(&t2, &t3, &t2) | |||
feSquare(&t2, &t2) | |||
for i = 1; i < 50; i++ { | |||
feSquare(&t2, &t2) | |||
} | |||
feMul(&t1, &t2, &t1) | |||
feSquare(&t1, &t1) | |||
for i = 1; i < 5; i++ { | |||
feSquare(&t1, &t1) | |||
} | |||
feMul(out, &t1, &t0) | |||
} | |||
func scalarMult(out, in, base *[32]byte) { | |||
var e [32]byte | |||
copy(e[:], in[:]) | |||
e[0] &= 248 | |||
e[31] &= 127 | |||
e[31] |= 64 | |||
var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement | |||
feFromBytes(&x1, base) | |||
feOne(&x2) | |||
feCopy(&x3, &x1) | |||
feOne(&z3) | |||
swap := int32(0) | |||
for pos := 254; pos >= 0; pos-- { | |||
b := e[pos/8] >> uint(pos&7) | |||
b &= 1 | |||
swap ^= int32(b) | |||
feCSwap(&x2, &x3, swap) | |||
feCSwap(&z2, &z3, swap) | |||
swap = int32(b) | |||
feSub(&tmp0, &x3, &z3) | |||
feSub(&tmp1, &x2, &z2) | |||
feAdd(&x2, &x2, &z2) | |||
feAdd(&z2, &x3, &z3) | |||
feMul(&z3, &tmp0, &x2) | |||
feMul(&z2, &z2, &tmp1) | |||
feSquare(&tmp0, &tmp1) | |||
feSquare(&tmp1, &x2) | |||
feAdd(&x3, &z3, &z2) | |||
feSub(&z2, &z3, &z2) | |||
feMul(&x2, &tmp1, &tmp0) | |||
feSub(&tmp1, &tmp1, &tmp0) | |||
feSquare(&z2, &z2) | |||
feMul121666(&z3, &tmp1) | |||
feSquare(&x3, &x3) | |||
feAdd(&tmp0, &tmp0, &z3) | |||
feMul(&z3, &x1, &z2) | |||
feMul(&z2, &tmp1, &tmp0) | |||
} | |||
feCSwap(&x2, &x3, swap) | |||
feCSwap(&z2, &z3, swap) | |||
feInvert(&z2, &z2) | |||
feMul(&x2, &x2, &z2) | |||
feToBytes(out, &x2) | |||
} |
@ -0,0 +1,29 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package curve25519 | |||
import ( | |||
"fmt" | |||
"testing" | |||
) | |||
const expectedHex = "89161fde887b2b53de549af483940106ecc114d6982daa98256de23bdf77661a" | |||
func TestBaseScalarMult(t *testing.T) { | |||
var a, b [32]byte | |||
in := &a | |||
out := &b | |||
a[0] = 1 | |||
for i := 0; i < 200; i++ { | |||
ScalarBaseMult(out, in) | |||
in, out = out, in | |||
} | |||
result := fmt.Sprintf("%x", in[:]) | |||
if result != expectedHex { | |||
t.Errorf("incorrect result: got %s, want %s", result, expectedHex) | |||
} | |||
} |
@ -0,0 +1,23 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// Package curve25519 provides an implementation of scalar multiplication on | |||
// the elliptic curve known as curve25519. See http://cr.yp.to/ecdh.html | |||
package curve25519 | |||
// basePoint is the x coordinate of the generator of the curve. | |||
var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} | |||
// ScalarMult sets dst to the product in*base where dst and base are the x | |||
// coordinates of group points and all values are in little-endian form. | |||
func ScalarMult(dst, in, base *[32]byte) { | |||
scalarMult(dst, in, base) | |||
} | |||
// ScalarBaseMult sets dst to the product in*base where dst and base are the x | |||
// coordinates of group points, base is the standard generator and all values | |||
// are in little-endian form. | |||
func ScalarBaseMult(dst, in *[32]byte) { | |||
ScalarMult(dst, in, &basePoint) | |||
} |
@ -0,0 +1,94 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
// func freeze(inout *[5]uint64) | |||
TEXT ·freeze(SB),7,$96-8 | |||
MOVQ inout+0(FP), DI | |||
MOVQ SP,R11 | |||
MOVQ $31,CX | |||
NOTQ CX | |||
ANDQ CX,SP | |||
ADDQ $32,SP | |||
MOVQ R11,0(SP) | |||
MOVQ R12,8(SP) | |||
MOVQ R13,16(SP) | |||
MOVQ R14,24(SP) | |||
MOVQ R15,32(SP) | |||
MOVQ BX,40(SP) | |||
MOVQ BP,48(SP) | |||
MOVQ 0(DI),SI | |||
MOVQ 8(DI),DX | |||
MOVQ 16(DI),CX | |||
MOVQ 24(DI),R8 | |||
MOVQ 32(DI),R9 | |||
MOVQ ·REDMASK51(SB),AX | |||
MOVQ AX,R10 | |||
SUBQ $18,R10 | |||
MOVQ $3,R11 | |||
REDUCELOOP: | |||
MOVQ SI,R12 | |||
SHRQ $51,R12 | |||
ANDQ AX,SI | |||
ADDQ R12,DX | |||
MOVQ DX,R12 | |||
SHRQ $51,R12 | |||
ANDQ AX,DX | |||
ADDQ R12,CX | |||
MOVQ CX,R12 | |||
SHRQ $51,R12 | |||
ANDQ AX,CX | |||
ADDQ R12,R8 | |||
MOVQ R8,R12 | |||
SHRQ $51,R12 | |||
ANDQ AX,R8 | |||
ADDQ R12,R9 | |||
MOVQ R9,R12 | |||
SHRQ $51,R12 | |||
ANDQ AX,R9 | |||
IMUL3Q $19,R12,R12 | |||
ADDQ R12,SI | |||
SUBQ $1,R11 | |||
JA REDUCELOOP | |||
MOVQ $1,R12 | |||
CMPQ R10,SI | |||
CMOVQLT R11,R12 | |||
CMPQ AX,DX | |||
CMOVQNE R11,R12 | |||
CMPQ AX,CX | |||
CMOVQNE R11,R12 | |||
CMPQ AX,R8 | |||
CMOVQNE R11,R12 | |||
CMPQ AX,R9 | |||
CMOVQNE R11,R12 | |||
NEGQ R12 | |||
ANDQ R12,AX | |||
ANDQ R12,R10 | |||
SUBQ R10,SI | |||
SUBQ AX,DX | |||
SUBQ AX,CX | |||
SUBQ AX,R8 | |||
SUBQ AX,R9 | |||
MOVQ SI,0(DI) | |||
MOVQ DX,8(DI) | |||
MOVQ CX,16(DI) | |||
MOVQ R8,24(DI) | |||
MOVQ R9,32(DI) | |||
MOVQ 0(SP),R11 | |||
MOVQ 8(SP),R12 | |||
MOVQ 16(SP),R13 | |||
MOVQ 24(SP),R14 | |||
MOVQ 32(SP),R15 | |||
MOVQ 40(SP),BX | |||
MOVQ 48(SP),BP | |||
MOVQ R11,SP | |||
MOVQ DI,AX | |||
MOVQ SI,DX | |||
RET |
@ -0,0 +1,240 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build amd64,!gccgo,!appengine | |||
package curve25519 | |||
// These functions are implemented in the .s files. The names of the functions | |||
// in the rest of the file are also taken from the SUPERCOP sources to help | |||
// people following along. | |||
//go:noescape | |||
func cswap(inout *[5]uint64, v uint64) | |||
//go:noescape | |||
func ladderstep(inout *[5][5]uint64) | |||
//go:noescape | |||
func freeze(inout *[5]uint64) | |||
//go:noescape | |||
func mul(dest, a, b *[5]uint64) | |||
//go:noescape | |||
func square(out, in *[5]uint64) | |||
// mladder uses a Montgomery ladder to calculate (xr/zr) *= s. | |||
func mladder(xr, zr *[5]uint64, s *[32]byte) { | |||
var work [5][5]uint64 | |||
work[0] = *xr | |||
setint(&work[1], 1) | |||
setint(&work[2], 0) | |||
work[3] = *xr | |||
setint(&work[4], 1) | |||
j := uint(6) | |||
var prevbit byte | |||
for i := 31; i >= 0; i-- { | |||
for j < 8 { | |||
bit := ((*s)[i] >> j) & 1 | |||
swap := bit ^ prevbit | |||
prevbit = bit | |||
cswap(&work[1], uint64(swap)) | |||
ladderstep(&work) | |||
j-- | |||
} | |||
j = 7 | |||
} | |||
*xr = work[1] | |||
*zr = work[2] | |||
} | |||
func scalarMult(out, in, base *[32]byte) { | |||
var e [32]byte | |||
copy(e[:], (*in)[:]) | |||
e[0] &= 248 | |||
e[31] &= 127 | |||
e[31] |= 64 | |||
var t, z [5]uint64 | |||
unpack(&t, base) | |||
mladder(&t, &z, &e) | |||
invert(&z, &z) | |||
mul(&t, &t, &z) | |||
pack(out, &t) | |||
} | |||
func setint(r *[5]uint64, v uint64) { | |||
r[0] = v | |||
r[1] = 0 | |||
r[2] = 0 | |||
r[3] = 0 | |||
r[4] = 0 | |||
} | |||
// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian | |||
// order. | |||
func unpack(r *[5]uint64, x *[32]byte) { | |||
r[0] = uint64(x[0]) | | |||
uint64(x[1])<<8 | | |||
uint64(x[2])<<16 | | |||
uint64(x[3])<<24 | | |||
uint64(x[4])<<32 | | |||
uint64(x[5])<<40 | | |||
uint64(x[6]&7)<<48 | |||
r[1] = uint64(x[6])>>3 | | |||
uint64(x[7])<<5 | | |||
uint64(x[8])<<13 | | |||
uint64(x[9])<<21 | | |||
uint64(x[10])<<29 | | |||
uint64(x[11])<<37 | | |||
uint64(x[12]&63)<<45 | |||
r[2] = uint64(x[12])>>6 | | |||
uint64(x[13])<<2 | | |||
uint64(x[14])<<10 | | |||
uint64(x[15])<<18 | | |||
uint64(x[16])<<26 | | |||
uint64(x[17])<<34 | | |||
uint64(x[18])<<42 | | |||
uint64(x[19]&1)<<50 | |||
r[3] = uint64(x[19])>>1 | | |||
uint64(x[20])<<7 | | |||
uint64(x[21])<<15 | | |||
uint64(x[22])<<23 | | |||
uint64(x[23])<<31 | | |||
uint64(x[24])<<39 | | |||
uint64(x[25]&15)<<47 | |||
r[4] = uint64(x[25])>>4 | | |||
uint64(x[26])<<4 | | |||
uint64(x[27])<<12 | | |||
uint64(x[28])<<20 | | |||
uint64(x[29])<<28 | | |||
uint64(x[30])<<36 | | |||
uint64(x[31]&127)<<44 | |||
} | |||
// pack sets out = x where out is the usual, little-endian form of the 5, | |||
// 51-bit limbs in x. | |||
func pack(out *[32]byte, x *[5]uint64) { | |||
t := *x | |||
freeze(&t) | |||
out[0] = byte(t[0]) | |||
out[1] = byte(t[0] >> 8) | |||
out[2] = byte(t[0] >> 16) | |||
out[3] = byte(t[0] >> 24) | |||
out[4] = byte(t[0] >> 32) | |||
out[5] = byte(t[0] >> 40) | |||
out[6] = byte(t[0] >> 48) | |||
out[6] ^= byte(t[1]<<3) & 0xf8 | |||
out[7] = byte(t[1] >> 5) | |||
out[8] = byte(t[1] >> 13) | |||
out[9] = byte(t[1] >> 21) | |||
out[10] = byte(t[1] >> 29) | |||
out[11] = byte(t[1] >> 37) | |||
out[12] = byte(t[1] >> 45) | |||
out[12] ^= byte(t[2]<<6) & 0xc0 | |||
out[13] = byte(t[2] >> 2) | |||
out[14] = byte(t[2] >> 10) | |||
out[15] = byte(t[2] >> 18) | |||
out[16] = byte(t[2] >> 26) | |||
out[17] = byte(t[2] >> 34) | |||
out[18] = byte(t[2] >> 42) | |||
out[19] = byte(t[2] >> 50) | |||
out[19] ^= byte(t[3]<<1) & 0xfe | |||
out[20] = byte(t[3] >> 7) | |||
out[21] = byte(t[3] >> 15) | |||
out[22] = byte(t[3] >> 23) | |||
out[23] = byte(t[3] >> 31) | |||
out[24] = byte(t[3] >> 39) | |||
out[25] = byte(t[3] >> 47) | |||
out[25] ^= byte(t[4]<<4) & 0xf0 | |||
out[26] = byte(t[4] >> 4) | |||
out[27] = byte(t[4] >> 12) | |||
out[28] = byte(t[4] >> 20) | |||
out[29] = byte(t[4] >> 28) | |||
out[30] = byte(t[4] >> 36) | |||
out[31] = byte(t[4] >> 44) | |||
} | |||
// invert calculates r = x^-1 mod p using Fermat's little theorem. | |||
func invert(r *[5]uint64, x *[5]uint64) { | |||
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 | |||
square(&z2, x) /* 2 */ | |||
square(&t, &z2) /* 4 */ | |||
square(&t, &t) /* 8 */ | |||
mul(&z9, &t, x) /* 9 */ | |||
mul(&z11, &z9, &z2) /* 11 */ | |||
square(&t, &z11) /* 22 */ | |||
mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ | |||
square(&t, &z2_5_0) /* 2^6 - 2^1 */ | |||
for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ | |||
square(&t, &t) | |||
} | |||
mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ | |||
square(&t, &z2_10_0) /* 2^11 - 2^1 */ | |||
for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ | |||
square(&t, &t) | |||
} | |||
mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ | |||
square(&t, &z2_20_0) /* 2^21 - 2^1 */ | |||
for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ | |||
square(&t, &t) | |||
} | |||
mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ | |||
square(&t, &t) /* 2^41 - 2^1 */ | |||
for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ | |||
square(&t, &t) | |||
} | |||
mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ | |||
square(&t, &z2_50_0) /* 2^51 - 2^1 */ | |||
for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ | |||
square(&t, &t) | |||
} | |||
mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ | |||
square(&t, &z2_100_0) /* 2^101 - 2^1 */ | |||
for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ | |||
square(&t, &t) | |||
} | |||
mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ | |||
square(&t, &t) /* 2^201 - 2^1 */ | |||
for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ | |||
square(&t, &t) | |||
} | |||
mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ | |||
square(&t, &t) /* 2^251 - 2^1 */ | |||
square(&t, &t) /* 2^252 - 2^2 */ | |||
square(&t, &t) /* 2^253 - 2^3 */ | |||
square(&t, &t) /* 2^254 - 2^4 */ | |||
square(&t, &t) /* 2^255 - 2^5 */ | |||
mul(r, &t, &z11) /* 2^255 - 21 */ | |||
} |
@ -0,0 +1,191 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
// func mul(dest, a, b *[5]uint64) | |||
TEXT ·mul(SB),0,$128-24 | |||
MOVQ dest+0(FP), DI | |||
MOVQ a+8(FP), SI | |||
MOVQ b+16(FP), DX | |||
MOVQ SP,R11 | |||
MOVQ $31,CX | |||
NOTQ CX | |||
ANDQ CX,SP | |||
ADDQ $32,SP | |||
MOVQ R11,0(SP) | |||
MOVQ R12,8(SP) | |||
MOVQ R13,16(SP) | |||
MOVQ R14,24(SP) | |||
MOVQ R15,32(SP) | |||
MOVQ BX,40(SP) | |||
MOVQ BP,48(SP) | |||
MOVQ DI,56(SP) | |||
MOVQ DX,CX | |||
MOVQ 24(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MOVQ AX,64(SP) | |||
MULQ 16(CX) | |||
MOVQ AX,R8 | |||
MOVQ DX,R9 | |||
MOVQ 32(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MOVQ AX,72(SP) | |||
MULQ 8(CX) | |||
ADDQ AX,R8 | |||
ADCQ DX,R9 | |||
MOVQ 0(SI),AX | |||
MULQ 0(CX) | |||
ADDQ AX,R8 | |||
ADCQ DX,R9 | |||
MOVQ 0(SI),AX | |||
MULQ 8(CX) | |||
MOVQ AX,R10 | |||
MOVQ DX,R11 | |||
MOVQ 0(SI),AX | |||
MULQ 16(CX) | |||
MOVQ AX,R12 | |||
MOVQ DX,R13 | |||
MOVQ 0(SI),AX | |||
MULQ 24(CX) | |||
MOVQ AX,R14 | |||
MOVQ DX,R15 | |||
MOVQ 0(SI),AX | |||
MULQ 32(CX) | |||
MOVQ AX,BX | |||
MOVQ DX,BP | |||
MOVQ 8(SI),AX | |||
MULQ 0(CX) | |||
ADDQ AX,R10 | |||
ADCQ DX,R11 | |||
MOVQ 8(SI),AX | |||
MULQ 8(CX) | |||
ADDQ AX,R12 | |||
ADCQ DX,R13 | |||
MOVQ 8(SI),AX | |||
MULQ 16(CX) | |||
ADDQ AX,R14 | |||
ADCQ DX,R15 | |||
MOVQ 8(SI),AX | |||
MULQ 24(CX) | |||
ADDQ AX,BX | |||
ADCQ DX,BP | |||
MOVQ 8(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MULQ 32(CX) | |||
ADDQ AX,R8 | |||
ADCQ DX,R9 | |||
MOVQ 16(SI),AX | |||
MULQ 0(CX) | |||
ADDQ AX,R12 | |||
ADCQ DX,R13 | |||
MOVQ 16(SI),AX | |||
MULQ 8(CX) | |||
ADDQ AX,R14 | |||
ADCQ DX,R15 | |||
MOVQ 16(SI),AX | |||
MULQ 16(CX) | |||
ADDQ AX,BX | |||
ADCQ DX,BP | |||
MOVQ 16(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MULQ 24(CX) | |||
ADDQ AX,R8 | |||
ADCQ DX,R9 | |||
MOVQ 16(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MULQ 32(CX) | |||
ADDQ AX,R10 | |||
ADCQ DX,R11 | |||
MOVQ 24(SI),AX | |||
MULQ 0(CX) | |||
ADDQ AX,R14 | |||
ADCQ DX,R15 | |||
MOVQ 24(SI),AX | |||
MULQ 8(CX) | |||
ADDQ AX,BX | |||
ADCQ DX,BP | |||
MOVQ 64(SP),AX | |||
MULQ 24(CX) | |||
ADDQ AX,R10 | |||
ADCQ DX,R11 | |||
MOVQ 64(SP),AX | |||
MULQ 32(CX) | |||
ADDQ AX,R12 | |||
ADCQ DX,R13 | |||
MOVQ 32(SI),AX | |||
MULQ 0(CX) | |||
ADDQ AX,BX | |||
ADCQ DX,BP | |||
MOVQ 72(SP),AX | |||
MULQ 16(CX) | |||
ADDQ AX,R10 | |||
ADCQ DX,R11 | |||
MOVQ 72(SP),AX | |||
MULQ 24(CX) | |||
ADDQ AX,R12 | |||
ADCQ DX,R13 | |||
MOVQ 72(SP),AX | |||
MULQ 32(CX) | |||
ADDQ AX,R14 | |||
ADCQ DX,R15 | |||
MOVQ ·REDMASK51(SB),SI | |||
SHLQ $13,R9:R8 | |||
ANDQ SI,R8 | |||
SHLQ $13,R11:R10 | |||
ANDQ SI,R10 | |||
ADDQ R9,R10 | |||
SHLQ $13,R13:R12 | |||
ANDQ SI,R12 | |||
ADDQ R11,R12 | |||
SHLQ $13,R15:R14 | |||
ANDQ SI,R14 | |||
ADDQ R13,R14 | |||
SHLQ $13,BP:BX | |||
ANDQ SI,BX | |||
ADDQ R15,BX | |||
IMUL3Q $19,BP,DX | |||
ADDQ DX,R8 | |||
MOVQ R8,DX | |||
SHRQ $51,DX | |||
ADDQ R10,DX | |||
MOVQ DX,CX | |||
SHRQ $51,DX | |||
ANDQ SI,R8 | |||
ADDQ R12,DX | |||
MOVQ DX,R9 | |||
SHRQ $51,DX | |||
ANDQ SI,CX | |||
ADDQ R14,DX | |||
MOVQ DX,AX | |||
SHRQ $51,DX | |||
ANDQ SI,R9 | |||
ADDQ BX,DX | |||
MOVQ DX,R10 | |||
SHRQ $51,DX | |||
ANDQ SI,AX | |||
IMUL3Q $19,DX,DX | |||
ADDQ DX,R8 | |||
ANDQ SI,R10 | |||
MOVQ R8,0(DI) | |||
MOVQ CX,8(DI) | |||
MOVQ R9,16(DI) | |||
MOVQ AX,24(DI) | |||
MOVQ R10,32(DI) | |||
MOVQ 0(SP),R11 | |||
MOVQ 8(SP),R12 | |||
MOVQ 16(SP),R13 | |||
MOVQ 24(SP),R14 | |||
MOVQ 32(SP),R15 | |||
MOVQ 40(SP),BX | |||
MOVQ 48(SP),BP | |||
MOVQ R11,SP | |||
MOVQ DI,AX | |||
MOVQ SI,DX | |||
RET |
@ -0,0 +1,153 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
// func square(out, in *[5]uint64) | |||
TEXT ·square(SB),7,$96-16 | |||
MOVQ out+0(FP), DI | |||
MOVQ in+8(FP), SI | |||
MOVQ SP,R11 | |||
MOVQ $31,CX | |||
NOTQ CX | |||
ANDQ CX,SP | |||
ADDQ $32, SP | |||
MOVQ R11,0(SP) | |||
MOVQ R12,8(SP) | |||
MOVQ R13,16(SP) | |||
MOVQ R14,24(SP) | |||
MOVQ R15,32(SP) | |||
MOVQ BX,40(SP) | |||
MOVQ BP,48(SP) | |||
MOVQ 0(SI),AX | |||
MULQ 0(SI) | |||
MOVQ AX,CX | |||
MOVQ DX,R8 | |||
MOVQ 0(SI),AX | |||
SHLQ $1,AX | |||
MULQ 8(SI) | |||
MOVQ AX,R9 | |||
MOVQ DX,R10 | |||
MOVQ 0(SI),AX | |||
SHLQ $1,AX | |||
MULQ 16(SI) | |||
MOVQ AX,R11 | |||
MOVQ DX,R12 | |||
MOVQ 0(SI),AX | |||
SHLQ $1,AX | |||
MULQ 24(SI) | |||
MOVQ AX,R13 | |||
MOVQ DX,R14 | |||
MOVQ 0(SI),AX | |||
SHLQ $1,AX | |||
MULQ 32(SI) | |||
MOVQ AX,R15 | |||
MOVQ DX,BX | |||
MOVQ 8(SI),AX | |||
MULQ 8(SI) | |||
ADDQ AX,R11 | |||
ADCQ DX,R12 | |||
MOVQ 8(SI),AX | |||
SHLQ $1,AX | |||
MULQ 16(SI) | |||
ADDQ AX,R13 | |||
ADCQ DX,R14 | |||
MOVQ 8(SI),AX | |||
SHLQ $1,AX | |||
MULQ 24(SI) | |||
ADDQ AX,R15 | |||
ADCQ DX,BX | |||
MOVQ 8(SI),DX | |||
IMUL3Q $38,DX,AX | |||
MULQ 32(SI) | |||
ADDQ AX,CX | |||
ADCQ DX,R8 | |||
MOVQ 16(SI),AX | |||
MULQ 16(SI) | |||
ADDQ AX,R15 | |||
ADCQ DX,BX | |||
MOVQ 16(SI),DX | |||
IMUL3Q $38,DX,AX | |||
MULQ 24(SI) | |||
ADDQ AX,CX | |||
ADCQ DX,R8 | |||
MOVQ 16(SI),DX | |||
IMUL3Q $38,DX,AX | |||
MULQ 32(SI) | |||
ADDQ AX,R9 | |||
ADCQ DX,R10 | |||
MOVQ 24(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MULQ 24(SI) | |||
ADDQ AX,R9 | |||
ADCQ DX,R10 | |||
MOVQ 24(SI),DX | |||
IMUL3Q $38,DX,AX | |||
MULQ 32(SI) | |||
ADDQ AX,R11 | |||
ADCQ DX,R12 | |||
MOVQ 32(SI),DX | |||
IMUL3Q $19,DX,AX | |||
MULQ 32(SI) | |||
ADDQ AX,R13 | |||
ADCQ DX,R14 | |||
MOVQ ·REDMASK51(SB),SI | |||
SHLQ $13,R8:CX | |||
ANDQ SI,CX | |||
SHLQ $13,R10:R9 | |||
ANDQ SI,R9 | |||
ADDQ R8,R9 | |||
SHLQ $13,R12:R11 | |||
ANDQ SI,R11 | |||
ADDQ R10,R11 | |||
SHLQ $13,R14:R13 | |||
ANDQ SI,R13 | |||
ADDQ R12,R13 | |||
SHLQ $13,BX:R15 | |||
ANDQ SI,R15 | |||
ADDQ R14,R15 | |||
IMUL3Q $19,BX,DX | |||
ADDQ DX,CX | |||
MOVQ CX,DX | |||
SHRQ $51,DX | |||
ADDQ R9,DX | |||
ANDQ SI,CX | |||
MOVQ DX,R8 | |||
SHRQ $51,DX | |||
ADDQ R11,DX | |||
ANDQ SI,R8 | |||
MOVQ DX,R9 | |||
SHRQ $51,DX | |||
ADDQ R13,DX | |||
ANDQ SI,R9 | |||
MOVQ DX,AX | |||
SHRQ $51,DX | |||
ADDQ R15,DX | |||
ANDQ SI,AX | |||
MOVQ DX,R10 | |||
SHRQ $51,DX | |||
IMUL3Q $19,DX,DX | |||
ADDQ DX,CX | |||
ANDQ SI,R10 | |||
MOVQ CX,0(DI) | |||
MOVQ R8,8(DI) | |||
MOVQ R9,16(DI) | |||
MOVQ AX,24(DI) | |||
MOVQ R10,32(DI) | |||
MOVQ 0(SP),R11 | |||
MOVQ 8(SP),R12 | |||
MOVQ 16(SP),R13 | |||
MOVQ 24(SP),R14 | |||
MOVQ 32(SP),R15 | |||
MOVQ 40(SP),BX | |||
MOVQ 48(SP),BP | |||
MOVQ R11,SP | |||
MOVQ DI,AX | |||
MOVQ SI,DX | |||
RET |
@ -0,0 +1,85 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
/* | |||
Package box authenticates and encrypts messages using public-key cryptography. | |||
Box uses Curve25519, XSalsa20 and Poly1305 to encrypt and authenticate | |||
messages. The length of messages is not hidden. | |||
It is the caller's responsibility to ensure the uniqueness of nonces—for | |||
example, by using nonce 1 for the first message, nonce 2 for the second | |||
message, etc. Nonces are long enough that randomly generated nonces have | |||
negligible risk of collision. | |||
This package is interoperable with NaCl: http://nacl.cr.yp.to/box.html. | |||
*/ | |||
package box | |||
import ( | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/curve25519" | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/nacl/secretbox" | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/salsa20/salsa" | |||
"io" | |||
) | |||
// Overhead is the number of bytes of overhead when boxing a message. | |||
const Overhead = secretbox.Overhead | |||
// GenerateKey generates a new public/private key pair suitable for use with | |||
// Seal and Open. | |||
func GenerateKey(rand io.Reader) (publicKey, privateKey *[32]byte, err error) { | |||
publicKey = new([32]byte) | |||
privateKey = new([32]byte) | |||
_, err = io.ReadFull(rand, privateKey[:]) | |||
if err != nil { | |||
publicKey = nil | |||
privateKey = nil | |||
return | |||
} | |||
curve25519.ScalarBaseMult(publicKey, privateKey) | |||
return | |||
} | |||
var zeros [16]byte | |||
// Precompute calculates the shared key between peersPublicKey and privateKey | |||
// and writes it to sharedKey. The shared key can be used with | |||
// OpenAfterPrecomputation and SealAfterPrecomputation to speed up processing | |||
// when using the same pair of keys repeatedly. | |||
func Precompute(sharedKey, peersPublicKey, privateKey *[32]byte) { | |||
curve25519.ScalarMult(sharedKey, privateKey, peersPublicKey) | |||
salsa.HSalsa20(sharedKey, &zeros, sharedKey, &salsa.Sigma) | |||
} | |||
// Seal appends an encrypted and authenticated copy of message to out, which | |||
// will be Overhead bytes longer than the original and must not overlap. The | |||
// nonce must be unique for each distinct message for a given pair of keys. | |||
func Seal(out, message []byte, nonce *[24]byte, peersPublicKey, privateKey *[32]byte) []byte { | |||
var sharedKey [32]byte | |||
Precompute(&sharedKey, peersPublicKey, privateKey) | |||
return secretbox.Seal(out, message, nonce, &sharedKey) | |||
} | |||
// SealAfterPrecomputation performs the same actions as Seal, but takes a | |||
// shared key as generated by Precompute. | |||
func SealAfterPrecomputation(out, message []byte, nonce *[24]byte, sharedKey *[32]byte) []byte { | |||
return secretbox.Seal(out, message, nonce, sharedKey) | |||
} | |||
// Open authenticates and decrypts a box produced by Seal and appends the | |||
// message to out, which must not overlap box. The output will be Overhead | |||
// bytes smaller than box. | |||
func Open(out, box []byte, nonce *[24]byte, peersPublicKey, privateKey *[32]byte) ([]byte, bool) { | |||
var sharedKey [32]byte | |||
Precompute(&sharedKey, peersPublicKey, privateKey) | |||
return secretbox.Open(out, box, nonce, &sharedKey) | |||
} | |||
// OpenAfterPrecomputation performs the same actions as Open, but takes a | |||
// shared key as generated by Precompute. | |||
func OpenAfterPrecomputation(out, box []byte, nonce *[24]byte, sharedKey *[32]byte) ([]byte, bool) { | |||
return secretbox.Open(out, box, nonce, sharedKey) | |||
} |
@ -0,0 +1,78 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package box | |||
import ( | |||
"bytes" | |||
"crypto/rand" | |||
"encoding/hex" | |||
"testing" | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/curve25519" | |||
) | |||
func TestSealOpen(t *testing.T) { | |||
publicKey1, privateKey1, _ := GenerateKey(rand.Reader) | |||
publicKey2, privateKey2, _ := GenerateKey(rand.Reader) | |||
if *privateKey1 == *privateKey2 { | |||
t.Fatalf("private keys are equal!") | |||
} | |||
if *publicKey1 == *publicKey2 { | |||
t.Fatalf("public keys are equal!") | |||
} | |||
message := []byte("test message") | |||
var nonce [24]byte | |||
box := Seal(nil, message, &nonce, publicKey1, privateKey2) | |||
opened, ok := Open(nil, box, &nonce, publicKey2, privateKey1) | |||
if !ok { | |||
t.Fatalf("failed to open box") | |||
} | |||
if !bytes.Equal(opened, message) { | |||
t.Fatalf("got %x, want %x", opened, message) | |||
} | |||
for i := range box { | |||
box[i] ^= 0x40 | |||
_, ok := Open(nil, box, &nonce, publicKey2, privateKey1) | |||
if ok { | |||
t.Fatalf("opened box with byte %d corrupted", i) | |||
} | |||
box[i] ^= 0x40 | |||
} | |||
} | |||
func TestBox(t *testing.T) { | |||
var privateKey1, privateKey2 [32]byte | |||
for i := range privateKey1[:] { | |||
privateKey1[i] = 1 | |||
} | |||
for i := range privateKey2[:] { | |||
privateKey2[i] = 2 | |||
} | |||
var publicKey1 [32]byte | |||
curve25519.ScalarBaseMult(&publicKey1, &privateKey1) | |||
var message [64]byte | |||
for i := range message[:] { | |||
message[i] = 3 | |||
} | |||
var nonce [24]byte | |||
for i := range nonce[:] { | |||
nonce[i] = 4 | |||
} | |||
box := Seal(nil, message[:], &nonce, &publicKey1, &privateKey2) | |||
// expected was generated using the C implementation of NaCl. | |||
expected, _ := hex.DecodeString("78ea30b19d2341ebbdba54180f821eec265cf86312549bea8a37652a8bb94f07b78a73ed1708085e6ddd0e943bbdeb8755079a37eb31d86163ce241164a47629c0539f330b4914cd135b3855bc2a2dfc") | |||
if !bytes.Equal(box, expected) { | |||
t.Fatalf("box didn't match, got\n%x\n, expected\n%x", box, expected) | |||
} | |||
} |
@ -0,0 +1,149 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
/* | |||
Package secretbox encrypts and authenticates small messages. | |||
Secretbox uses XSalsa20 and Poly1305 to encrypt and authenticate messages with | |||
secret-key cryptography. The length of messages is not hidden. | |||
It is the caller's responsibility to ensure the uniqueness of nonces—for | |||
example, by using nonce 1 for the first message, nonce 2 for the second | |||
message, etc. Nonces are long enough that randomly generated nonces have | |||
negligible risk of collision. | |||
This package is interoperable with NaCl: http://nacl.cr.yp.to/secretbox.html. | |||
*/ | |||
package secretbox | |||
import ( | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/poly1305" | |||
"github.com/tendermint/tendermint/Godeps/_workspace/src/golang.org/x/crypto/salsa20/salsa" | |||
) | |||
// Overhead is the number of bytes of overhead when boxing a message. | |||
const Overhead = poly1305.TagSize | |||
// setup produces a sub-key and Salsa20 counter given a nonce and key. | |||
func setup(subKey *[32]byte, counter *[16]byte, nonce *[24]byte, key *[32]byte) { | |||
// We use XSalsa20 for encryption so first we need to generate a | |||
// key and nonce with HSalsa20. | |||
var hNonce [16]byte | |||
copy(hNonce[:], nonce[:]) | |||
salsa.HSalsa20(subKey, &hNonce, key, &salsa.Sigma) | |||
// The final 8 bytes of the original nonce form the new nonce. | |||
copy(counter[:], nonce[16:]) | |||
} | |||
// sliceForAppend takes a slice and a requested number of bytes. It returns a | |||
// slice with the contents of the given slice followed by that many bytes and a | |||
// second slice that aliases into it and contains only the extra bytes. If the | |||
// original slice has sufficient capacity then no allocation is performed. | |||
func sliceForAppend(in []byte, n int) (head, tail []byte) { | |||
if total := len(in) + n; cap(in) >= total { | |||
head = in[:total] | |||
} else { | |||
head = make([]byte, total) | |||
copy(head, in) | |||
} | |||
tail = head[len(in):] | |||
return | |||
} | |||
// Seal appends an encrypted and authenticated copy of message to out, which | |||
// must not overlap message. The key and nonce pair must be unique for each | |||
// distinct message and the output will be Overhead bytes longer than message. | |||
func Seal(out, message []byte, nonce *[24]byte, key *[32]byte) []byte { | |||
var subKey [32]byte | |||
var counter [16]byte | |||
setup(&subKey, &counter, nonce, key) | |||
// The Poly1305 key is generated by encrypting 32 bytes of zeros. Since | |||
// Salsa20 works with 64-byte blocks, we also generate 32 bytes of | |||
// keystream as a side effect. | |||
var firstBlock [64]byte | |||
salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) | |||
var poly1305Key [32]byte | |||
copy(poly1305Key[:], firstBlock[:]) | |||
ret, out := sliceForAppend(out, len(message)+poly1305.TagSize) | |||
// We XOR up to 32 bytes of message with the keystream generated from | |||
// the first block. | |||
firstMessageBlock := message | |||
if len(firstMessageBlock) > 32 { | |||
firstMessageBlock = firstMessageBlock[:32] | |||
} | |||
tagOut := out | |||
out = out[poly1305.TagSize:] | |||
for i, x := range firstMessageBlock { | |||
out[i] = firstBlock[32+i] ^ x | |||
} | |||
message = message[len(firstMessageBlock):] | |||
ciphertext := out | |||
out = out[len(firstMessageBlock):] | |||
// Now encrypt the rest. | |||
counter[8] = 1 | |||
salsa.XORKeyStream(out, message, &counter, &subKey) | |||
var tag [poly1305.TagSize]byte | |||
poly1305.Sum(&tag, ciphertext, &poly1305Key) | |||
copy(tagOut, tag[:]) | |||
return ret | |||
} | |||
// Open authenticates and decrypts a box produced by Seal and appends the | |||
// message to out, which must not overlap box. The output will be Overhead | |||
// bytes smaller than box. | |||
func Open(out []byte, box []byte, nonce *[24]byte, key *[32]byte) ([]byte, bool) { | |||
if len(box) < Overhead { | |||
return nil, false | |||
} | |||
var subKey [32]byte | |||
var counter [16]byte | |||
setup(&subKey, &counter, nonce, key) | |||
// The Poly1305 key is generated by encrypting 32 bytes of zeros. Since | |||
// Salsa20 works with 64-byte blocks, we also generate 32 bytes of | |||
// keystream as a side effect. | |||
var firstBlock [64]byte | |||
salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) | |||
var poly1305Key [32]byte | |||
copy(poly1305Key[:], firstBlock[:]) | |||
var tag [poly1305.TagSize]byte | |||
copy(tag[:], box) | |||
if !poly1305.Verify(&tag, box[poly1305.TagSize:], &poly1305Key) { | |||
return nil, false | |||
} | |||
ret, out := sliceForAppend(out, len(box)-Overhead) | |||
// We XOR up to 32 bytes of box with the keystream generated from | |||
// the first block. | |||
box = box[Overhead:] | |||
firstMessageBlock := box | |||
if len(firstMessageBlock) > 32 { | |||
firstMessageBlock = firstMessageBlock[:32] | |||
} | |||
for i, x := range firstMessageBlock { | |||
out[i] = firstBlock[32+i] ^ x | |||
} | |||
box = box[len(firstMessageBlock):] | |||
out = out[len(firstMessageBlock):] | |||
// Now decrypt the rest. | |||
counter[8] = 1 | |||
salsa.XORKeyStream(out, box, &counter, &subKey) | |||
return ret, true | |||
} |
@ -0,0 +1,91 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package secretbox | |||
import ( | |||
"bytes" | |||
"crypto/rand" | |||
"encoding/hex" | |||
"testing" | |||
) | |||
func TestSealOpen(t *testing.T) { | |||
var key [32]byte | |||
var nonce [24]byte | |||
rand.Reader.Read(key[:]) | |||
rand.Reader.Read(nonce[:]) | |||
var box, opened []byte | |||
for msgLen := 0; msgLen < 128; msgLen += 17 { | |||
message := make([]byte, msgLen) | |||
rand.Reader.Read(message) | |||
box = Seal(box[:0], message, &nonce, &key) | |||
var ok bool | |||
opened, ok = Open(opened[:0], box, &nonce, &key) | |||
if !ok { | |||
t.Errorf("%d: failed to open box", msgLen) | |||
continue | |||
} | |||
if !bytes.Equal(opened, message) { | |||
t.Errorf("%d: got %x, expected %x", msgLen, opened, message) | |||
continue | |||
} | |||
} | |||
for i := range box { | |||
box[i] ^= 0x20 | |||
_, ok := Open(opened[:0], box, &nonce, &key) | |||
if ok { | |||
t.Errorf("box was opened after corrupting byte %d", i) | |||
} | |||
box[i] ^= 0x20 | |||
} | |||
} | |||
func TestSecretBox(t *testing.T) { | |||
var key [32]byte | |||
var nonce [24]byte | |||
var message [64]byte | |||
for i := range key[:] { | |||
key[i] = 1 | |||
} | |||
for i := range nonce[:] { | |||
nonce[i] = 2 | |||
} | |||
for i := range message[:] { | |||
message[i] = 3 | |||
} | |||
box := Seal(nil, message[:], &nonce, &key) | |||
// expected was generated using the C implementation of NaCl. | |||
expected, _ := hex.DecodeString("8442bc313f4626f1359e3b50122b6ce6fe66ddfe7d39d14e637eb4fd5b45beadab55198df6ab5368439792a23c87db70acb6156dc5ef957ac04f6276cf6093b84be77ff0849cc33e34b7254d5a8f65ad") | |||
if !bytes.Equal(box, expected) { | |||
t.Fatalf("box didn't match, got\n%x\n, expected\n%x", box, expected) | |||
} | |||
} | |||
func TestAppend(t *testing.T) { | |||
var key [32]byte | |||
var nonce [24]byte | |||
var message [8]byte | |||
out := make([]byte, 4) | |||
box := Seal(out, message[:], &nonce, &key) | |||
if !bytes.Equal(box[:4], out[:4]) { | |||
t.Fatalf("Seal didn't correctly append") | |||
} | |||
out = make([]byte, 4, 100) | |||
box = Seal(out, message[:], &nonce, &key) | |||
if !bytes.Equal(box[:4], out[:4]) { | |||
t.Fatalf("Seal didn't correctly append with sufficient capacity.") | |||
} | |||
} |
@ -0,0 +1,45 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
DATA ·SCALE(SB)/8, $0x37F4000000000000 | |||
GLOBL ·SCALE(SB), 8, $8 | |||
DATA ·TWO32(SB)/8, $0x41F0000000000000 | |||
GLOBL ·TWO32(SB), 8, $8 | |||
DATA ·TWO64(SB)/8, $0x43F0000000000000 | |||
GLOBL ·TWO64(SB), 8, $8 | |||
DATA ·TWO96(SB)/8, $0x45F0000000000000 | |||
GLOBL ·TWO96(SB), 8, $8 | |||
DATA ·ALPHA32(SB)/8, $0x45E8000000000000 | |||
GLOBL ·ALPHA32(SB), 8, $8 | |||
DATA ·ALPHA64(SB)/8, $0x47E8000000000000 | |||
GLOBL ·ALPHA64(SB), 8, $8 | |||
DATA ·ALPHA96(SB)/8, $0x49E8000000000000 | |||
GLOBL ·ALPHA96(SB), 8, $8 | |||
DATA ·ALPHA130(SB)/8, $0x4C08000000000000 | |||
GLOBL ·ALPHA130(SB), 8, $8 | |||
DATA ·DOFFSET0(SB)/8, $0x4330000000000000 | |||
GLOBL ·DOFFSET0(SB), 8, $8 | |||
DATA ·DOFFSET1(SB)/8, $0x4530000000000000 | |||
GLOBL ·DOFFSET1(SB), 8, $8 | |||
DATA ·DOFFSET2(SB)/8, $0x4730000000000000 | |||
GLOBL ·DOFFSET2(SB), 8, $8 | |||
DATA ·DOFFSET3(SB)/8, $0x4930000000000000 | |||
GLOBL ·DOFFSET3(SB), 8, $8 | |||
DATA ·DOFFSET3MINUSTWO128(SB)/8, $0x492FFFFE00000000 | |||
GLOBL ·DOFFSET3MINUSTWO128(SB), 8, $8 | |||
DATA ·HOFFSET0(SB)/8, $0x43300001FFFFFFFB | |||
GLOBL ·HOFFSET0(SB), 8, $8 | |||
DATA ·HOFFSET1(SB)/8, $0x45300001FFFFFFFE | |||
GLOBL ·HOFFSET1(SB), 8, $8 | |||
DATA ·HOFFSET2(SB)/8, $0x47300001FFFFFFFE | |||
GLOBL ·HOFFSET2(SB), 8, $8 | |||
DATA ·HOFFSET3(SB)/8, $0x49300003FFFFFFFE | |||
GLOBL ·HOFFSET3(SB), 8, $8 | |||
DATA ·ROUNDING(SB)/2, $0x137f | |||
GLOBL ·ROUNDING(SB), 8, $2 |
@ -0,0 +1,32 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
/* | |||
Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf. | |||
Poly1305 is a fast, one-time authentication function. It is infeasible for an | |||
attacker to generate an authenticator for a message without the key. However, a | |||
key must only be used for a single message. Authenticating two different | |||
messages with the same key allows an attacker to forge authenticators for other | |||
messages with the same key. | |||
Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was | |||
used with a fixed key in order to generate one-time keys from an nonce. | |||
However, in this package AES isn't used and the one-time key is specified | |||
directly. | |||
*/ | |||
package poly1305 | |||
import "crypto/subtle" | |||
// TagSize is the size, in bytes, of a poly1305 authenticator. | |||
const TagSize = 16 | |||
// Verify returns true if mac is a valid authenticator for m with the given | |||
// key. | |||
func Verify(mac *[16]byte, m []byte, key *[32]byte) bool { | |||
var tmp [16]byte | |||
Sum(&tmp, m, key) | |||
return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 | |||
} |
@ -0,0 +1,497 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// +build amd64,!gccgo,!appengine | |||
// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) | |||
TEXT ·poly1305(SB),0,$224-32 | |||
MOVQ out+0(FP),DI | |||
MOVQ m+8(FP),SI | |||
MOVQ mlen+16(FP),DX | |||
MOVQ key+24(FP),CX | |||
MOVQ SP,R11 | |||
MOVQ $31,R9 | |||
NOTQ R9 | |||
ANDQ R9,SP | |||
ADDQ $32,SP | |||
MOVQ R11,32(SP) | |||
MOVQ R12,40(SP) | |||
MOVQ R13,48(SP) | |||
MOVQ R14,56(SP) | |||
MOVQ R15,64(SP) | |||
MOVQ BX,72(SP) | |||
MOVQ BP,80(SP) | |||
FLDCW ·ROUNDING(SB) | |||
MOVL 0(CX),R8 | |||
MOVL 4(CX),R9 | |||
MOVL 8(CX),AX | |||
MOVL 12(CX),R10 | |||
MOVQ DI,88(SP) | |||
MOVQ CX,96(SP) | |||
MOVL $0X43300000,108(SP) | |||
MOVL $0X45300000,116(SP) | |||
MOVL $0X47300000,124(SP) | |||
MOVL $0X49300000,132(SP) | |||
ANDL $0X0FFFFFFF,R8 | |||
ANDL $0X0FFFFFFC,R9 | |||
ANDL $0X0FFFFFFC,AX | |||
ANDL $0X0FFFFFFC,R10 | |||
MOVL R8,104(SP) | |||
MOVL R9,112(SP) | |||
MOVL AX,120(SP) | |||
MOVL R10,128(SP) | |||
FMOVD 104(SP), F0 | |||
FSUBD ·DOFFSET0(SB), F0 | |||
FMOVD 112(SP), F0 | |||
FSUBD ·DOFFSET1(SB), F0 | |||
FMOVD 120(SP), F0 | |||
FSUBD ·DOFFSET2(SB), F0 | |||
FMOVD 128(SP), F0 | |||
FSUBD ·DOFFSET3(SB), F0 | |||
FXCHD F0, F3 | |||
FMOVDP F0, 136(SP) | |||
FXCHD F0, F1 | |||
FMOVD F0, 144(SP) | |||
FMULD ·SCALE(SB), F0 | |||
FMOVDP F0, 152(SP) | |||
FMOVD F0, 160(SP) | |||
FMULD ·SCALE(SB), F0 | |||
FMOVDP F0, 168(SP) | |||
FMOVD F0, 176(SP) | |||
FMULD ·SCALE(SB), F0 | |||
FMOVDP F0, 184(SP) | |||
FLDZ | |||
FLDZ | |||
FLDZ | |||
FLDZ | |||
CMPQ DX,$16 | |||
JB ADDATMOST15BYTES | |||
INITIALATLEAST16BYTES: | |||
MOVL 12(SI),DI | |||
MOVL 8(SI),CX | |||
MOVL 4(SI),R8 | |||
MOVL 0(SI),R9 | |||
MOVL DI,128(SP) | |||
MOVL CX,120(SP) | |||
MOVL R8,112(SP) | |||
MOVL R9,104(SP) | |||
ADDQ $16,SI | |||
SUBQ $16,DX | |||
FXCHD F0, F3 | |||
FADDD 128(SP), F0 | |||
FSUBD ·DOFFSET3MINUSTWO128(SB), F0 | |||
FXCHD F0, F1 | |||
FADDD 112(SP), F0 | |||
FSUBD ·DOFFSET1(SB), F0 | |||
FXCHD F0, F2 | |||
FADDD 120(SP), F0 | |||
FSUBD ·DOFFSET2(SB), F0 | |||
FXCHD F0, F3 | |||
FADDD 104(SP), F0 | |||
FSUBD ·DOFFSET0(SB), F0 | |||
CMPQ DX,$16 | |||
JB MULTIPLYADDATMOST15BYTES | |||
MULTIPLYADDATLEAST16BYTES: | |||
MOVL 12(SI),DI | |||
MOVL 8(SI),CX | |||
MOVL 4(SI),R8 | |||
MOVL 0(SI),R9 | |||
MOVL DI,128(SP) | |||
MOVL CX,120(SP) | |||
MOVL R8,112(SP) | |||
MOVL R9,104(SP) | |||
ADDQ $16,SI | |||
SUBQ $16,DX | |||
FMOVD ·ALPHA130(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA130(SB), F0 | |||
FSUBD F0,F2 | |||
FMULD ·SCALE(SB), F0 | |||
FMOVD ·ALPHA32(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA32(SB), F0 | |||
FSUBD F0,F2 | |||
FXCHD F0, F2 | |||
FADDDP F0,F1 | |||
FMOVD ·ALPHA64(SB), F0 | |||
FADDD F4,F0 | |||
FSUBD ·ALPHA64(SB), F0 | |||
FSUBD F0,F4 | |||
FMOVD ·ALPHA96(SB), F0 | |||
FADDD F6,F0 | |||
FSUBD ·ALPHA96(SB), F0 | |||
FSUBD F0,F6 | |||
FXCHD F0, F6 | |||
FADDDP F0,F1 | |||
FXCHD F0, F3 | |||
FADDDP F0,F5 | |||
FXCHD F0, F3 | |||
FADDDP F0,F1 | |||
FMOVD 176(SP), F0 | |||
FMULD F3,F0 | |||
FMOVD 160(SP), F0 | |||
FMULD F4,F0 | |||
FMOVD 144(SP), F0 | |||
FMULD F5,F0 | |||
FMOVD 136(SP), F0 | |||
FMULDP F0,F6 | |||
FMOVD 160(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F3 | |||
FMOVD 144(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULDP F0,F4 | |||
FXCHD F0, F3 | |||
FADDDP F0,F5 | |||
FMOVD 144(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULD F4,F0 | |||
FADDDP F0,F3 | |||
FMOVD 168(SP), F0 | |||
FMULDP F0,F4 | |||
FXCHD F0, F3 | |||
FADDDP F0,F4 | |||
FMOVD 136(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FXCHD F0, F3 | |||
FMOVD 184(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F3 | |||
FXCHD F0, F1 | |||
FMOVD 168(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FMOVD 152(SP), F0 | |||
FMULDP F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F1 | |||
CMPQ DX,$16 | |||
FXCHD F0, F2 | |||
FMOVD 128(SP), F0 | |||
FSUBD ·DOFFSET3MINUSTWO128(SB), F0 | |||
FADDDP F0,F1 | |||
FXCHD F0, F1 | |||
FMOVD 120(SP), F0 | |||
FSUBD ·DOFFSET2(SB), F0 | |||
FADDDP F0,F1 | |||
FXCHD F0, F3 | |||
FMOVD 112(SP), F0 | |||
FSUBD ·DOFFSET1(SB), F0 | |||
FADDDP F0,F1 | |||
FXCHD F0, F2 | |||
FMOVD 104(SP), F0 | |||
FSUBD ·DOFFSET0(SB), F0 | |||
FADDDP F0,F1 | |||
JAE MULTIPLYADDATLEAST16BYTES | |||
MULTIPLYADDATMOST15BYTES: | |||
FMOVD ·ALPHA130(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA130(SB), F0 | |||
FSUBD F0,F2 | |||
FMULD ·SCALE(SB), F0 | |||
FMOVD ·ALPHA32(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA32(SB), F0 | |||
FSUBD F0,F2 | |||
FMOVD ·ALPHA64(SB), F0 | |||
FADDD F5,F0 | |||
FSUBD ·ALPHA64(SB), F0 | |||
FSUBD F0,F5 | |||
FMOVD ·ALPHA96(SB), F0 | |||
FADDD F7,F0 | |||
FSUBD ·ALPHA96(SB), F0 | |||
FSUBD F0,F7 | |||
FXCHD F0, F7 | |||
FADDDP F0,F1 | |||
FXCHD F0, F5 | |||
FADDDP F0,F1 | |||
FXCHD F0, F3 | |||
FADDDP F0,F5 | |||
FADDDP F0,F1 | |||
FMOVD 176(SP), F0 | |||
FMULD F1,F0 | |||
FMOVD 160(SP), F0 | |||
FMULD F2,F0 | |||
FMOVD 144(SP), F0 | |||
FMULD F3,F0 | |||
FMOVD 136(SP), F0 | |||
FMULDP F0,F4 | |||
FMOVD 160(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F3 | |||
FMOVD 144(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULDP F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F3 | |||
FMOVD 144(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F4 | |||
FMOVD 168(SP), F0 | |||
FMULDP F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F4 | |||
FMOVD 168(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F3 | |||
FMOVD 152(SP), F0 | |||
FMULDP F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F1 | |||
ADDATMOST15BYTES: | |||
CMPQ DX,$0 | |||
JE NOMOREBYTES | |||
MOVL $0,0(SP) | |||
MOVL $0, 4 (SP) | |||
MOVL $0, 8 (SP) | |||
MOVL $0, 12 (SP) | |||
LEAQ 0(SP),DI | |||
MOVQ DX,CX | |||
REP; MOVSB | |||
MOVB $1,0(DI) | |||
MOVL 12 (SP),DI | |||
MOVL 8 (SP),SI | |||
MOVL 4 (SP),DX | |||
MOVL 0(SP),CX | |||
MOVL DI,128(SP) | |||
MOVL SI,120(SP) | |||
MOVL DX,112(SP) | |||
MOVL CX,104(SP) | |||
FXCHD F0, F3 | |||
FADDD 128(SP), F0 | |||
FSUBD ·DOFFSET3(SB), F0 | |||
FXCHD F0, F2 | |||
FADDD 120(SP), F0 | |||
FSUBD ·DOFFSET2(SB), F0 | |||
FXCHD F0, F1 | |||
FADDD 112(SP), F0 | |||
FSUBD ·DOFFSET1(SB), F0 | |||
FXCHD F0, F3 | |||
FADDD 104(SP), F0 | |||
FSUBD ·DOFFSET0(SB), F0 | |||
FMOVD ·ALPHA130(SB), F0 | |||
FADDD F3,F0 | |||
FSUBD ·ALPHA130(SB), F0 | |||
FSUBD F0,F3 | |||
FMULD ·SCALE(SB), F0 | |||
FMOVD ·ALPHA32(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA32(SB), F0 | |||
FSUBD F0,F2 | |||
FMOVD ·ALPHA64(SB), F0 | |||
FADDD F6,F0 | |||
FSUBD ·ALPHA64(SB), F0 | |||
FSUBD F0,F6 | |||
FMOVD ·ALPHA96(SB), F0 | |||
FADDD F5,F0 | |||
FSUBD ·ALPHA96(SB), F0 | |||
FSUBD F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F3 | |||
FXCHD F0, F6 | |||
FADDDP F0,F1 | |||
FXCHD F0, F3 | |||
FADDDP F0,F5 | |||
FXCHD F0, F3 | |||
FADDDP F0,F1 | |||
FMOVD 176(SP), F0 | |||
FMULD F3,F0 | |||
FMOVD 160(SP), F0 | |||
FMULD F4,F0 | |||
FMOVD 144(SP), F0 | |||
FMULD F5,F0 | |||
FMOVD 136(SP), F0 | |||
FMULDP F0,F6 | |||
FMOVD 160(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F3 | |||
FMOVD 144(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F5,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULDP F0,F5 | |||
FXCHD F0, F4 | |||
FADDDP F0,F5 | |||
FMOVD 144(SP), F0 | |||
FMULD F6,F0 | |||
FADDDP F0,F2 | |||
FMOVD 136(SP), F0 | |||
FMULD F6,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULD F6,F0 | |||
FADDDP F0,F4 | |||
FMOVD 168(SP), F0 | |||
FMULDP F0,F6 | |||
FXCHD F0, F5 | |||
FADDDP F0,F4 | |||
FMOVD 136(SP), F0 | |||
FMULD F2,F0 | |||
FADDDP F0,F1 | |||
FMOVD 184(SP), F0 | |||
FMULD F2,F0 | |||
FADDDP F0,F5 | |||
FMOVD 168(SP), F0 | |||
FMULD F2,F0 | |||
FADDDP F0,F3 | |||
FMOVD 152(SP), F0 | |||
FMULDP F0,F2 | |||
FXCHD F0, F1 | |||
FADDDP F0,F3 | |||
FXCHD F0, F3 | |||
FXCHD F0, F2 | |||
NOMOREBYTES: | |||
MOVL $0,R10 | |||
FMOVD ·ALPHA130(SB), F0 | |||
FADDD F4,F0 | |||
FSUBD ·ALPHA130(SB), F0 | |||
FSUBD F0,F4 | |||
FMULD ·SCALE(SB), F0 | |||
FMOVD ·ALPHA32(SB), F0 | |||
FADDD F2,F0 | |||
FSUBD ·ALPHA32(SB), F0 | |||
FSUBD F0,F2 | |||
FMOVD ·ALPHA64(SB), F0 | |||
FADDD F4,F0 | |||
FSUBD ·ALPHA64(SB), F0 | |||
FSUBD F0,F4 | |||
FMOVD ·ALPHA96(SB), F0 | |||
FADDD F6,F0 | |||
FSUBD ·ALPHA96(SB), F0 | |||
FXCHD F0, F6 | |||
FSUBD F6,F0 | |||
FXCHD F0, F4 | |||
FADDDP F0,F3 | |||
FXCHD F0, F4 | |||
FADDDP F0,F1 | |||
FXCHD F0, F2 | |||
FADDDP F0,F3 | |||
FXCHD F0, F4 | |||
FADDDP F0,F3 | |||
FXCHD F0, F3 | |||
FADDD ·HOFFSET0(SB), F0 | |||
FXCHD F0, F3 | |||
FADDD ·HOFFSET1(SB), F0 | |||
FXCHD F0, F1 | |||
FADDD ·HOFFSET2(SB), F0 | |||
FXCHD F0, F2 | |||
FADDD ·HOFFSET3(SB), F0 | |||
FXCHD F0, F3 | |||
FMOVDP F0, 104(SP) | |||
FMOVDP F0, 112(SP) | |||
FMOVDP F0, 120(SP) | |||
FMOVDP F0, 128(SP) | |||
MOVL 108(SP),DI | |||
ANDL $63,DI | |||
MOVL 116(SP),SI | |||
ANDL $63,SI | |||
MOVL 124(SP),DX | |||
ANDL $63,DX | |||
MOVL 132(SP),CX | |||
ANDL $63,CX | |||
MOVL 112(SP),R8 | |||
ADDL DI,R8 | |||
MOVQ R8,112(SP) | |||
MOVL 120(SP),DI | |||
ADCL SI,DI | |||
MOVQ DI,120(SP) | |||
MOVL 128(SP),DI | |||
ADCL DX,DI | |||
MOVQ DI,128(SP) | |||
MOVL R10,DI | |||
ADCL CX,DI | |||
MOVQ DI,136(SP) | |||
MOVQ $5,DI | |||
MOVL 104(SP),SI | |||
ADDL SI,DI | |||
MOVQ DI,104(SP) | |||
MOVL R10,DI | |||
MOVQ 112(SP),DX | |||
ADCL DX,DI | |||
MOVQ DI,112(SP) | |||
MOVL R10,DI | |||
MOVQ 120(SP),CX | |||
ADCL CX,DI | |||
MOVQ DI,120(SP) | |||
MOVL R10,DI | |||
MOVQ 128(SP),R8 | |||
ADCL R8,DI | |||
MOVQ DI,128(SP) | |||
MOVQ $0XFFFFFFFC,DI | |||
MOVQ 136(SP),R9 | |||
ADCL R9,DI | |||
SARL $16,DI | |||
MOVQ DI,R9 | |||
XORL $0XFFFFFFFF,R9 | |||
ANDQ DI,SI | |||
MOVQ 104(SP),AX | |||
ANDQ R9,AX | |||
ORQ AX,SI | |||
ANDQ DI,DX | |||
MOVQ 112(SP),AX | |||
ANDQ R9,AX | |||
ORQ AX,DX | |||
ANDQ DI,CX | |||
MOVQ 120(SP),AX | |||
ANDQ R9,AX | |||
ORQ AX,CX | |||
ANDQ DI,R8 | |||
MOVQ 128(SP),DI | |||
ANDQ R9,DI | |||
ORQ DI,R8 | |||
MOVQ 88(SP),DI | |||
MOVQ 96(SP),R9 | |||
ADDL 16(R9),SI | |||
ADCL 20(R9),DX | |||
ADCL 24(R9),CX | |||
ADCL 28(R9),R8 | |||
MOVL SI,0(DI) | |||
MOVL DX,4(DI) | |||
MOVL CX,8(DI) | |||
MOVL R8,12(DI) | |||
MOVQ 32(SP),R11 | |||
MOVQ 40(SP),R12 | |||
MOVQ 48(SP),R13 | |||
MOVQ 56(SP),R14 | |||
MOVQ 64(SP),R15 | |||
MOVQ 72(SP),BX | |||
MOVQ 80(SP),BP | |||
MOVQ R11,SP | |||
RET |
@ -0,0 +1,331 @@ | |||
// Copyright 2015 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// This code was translated into a form compatible with 5a from the public | |||
// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. | |||
// +build arm,!gccgo,!appengine | |||
DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff | |||
DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 | |||
DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff | |||
DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff | |||
DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff | |||
GLOBL poly1305_init_constants_armv6<>(SB), 8, $20 | |||
// Warning: the linker may use R11 to synthesize certain instructions. Please | |||
// take care and verify that no synthetic instructions use it. | |||
TEXT poly1305_init_ext_armv6<>(SB),4,$-4 | |||
MOVM.DB.W [R4-R11], (R13) | |||
MOVM.IA.W (R1), [R2-R5] | |||
MOVW $poly1305_init_constants_armv6<>(SB), R7 | |||
MOVW R2, R8 | |||
MOVW R2>>26, R9 | |||
MOVW R3>>20, g | |||
MOVW R4>>14, R11 | |||
MOVW R5>>8, R12 | |||
ORR R3<<6, R9, R9 | |||
ORR R4<<12, g, g | |||
ORR R5<<18, R11, R11 | |||
MOVM.IA (R7), [R2-R6] | |||
AND R8, R2, R2 | |||
AND R9, R3, R3 | |||
AND g, R4, R4 | |||
AND R11, R5, R5 | |||
AND R12, R6, R6 | |||
MOVM.IA.W [R2-R6], (R0) | |||
EOR R2, R2, R2 | |||
EOR R3, R3, R3 | |||
EOR R4, R4, R4 | |||
EOR R5, R5, R5 | |||
EOR R6, R6, R6 | |||
MOVM.IA.W [R2-R6], (R0) | |||
MOVM.IA.W (R1), [R2-R5] | |||
MOVM.IA [R2-R6], (R0) | |||
MOVM.IA.W (R13), [R4-R11] | |||
RET | |||
TEXT poly1305_blocks_armv6<>(SB),4,$-4 | |||
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) | |||
SUB $128, R13 | |||
MOVW R0, 36(R13) | |||
MOVW R1, 40(R13) | |||
MOVW R2, 44(R13) | |||
MOVW R1, R14 | |||
MOVW R2, R12 | |||
MOVW 56(R0), R8 | |||
WORD $0xe1180008 // TST R8, R8 not working see issue 5921 | |||
EOR R6, R6, R6 | |||
MOVW.EQ $(1<<24), R6 | |||
MOVW R6, 32(R13) | |||
ADD $64, R13, g | |||
MOVM.IA (R0), [R0-R9] | |||
MOVM.IA [R0-R4], (g) | |||
CMP $16, R12 | |||
BLO poly1305_blocks_armv6_done | |||
poly1305_blocks_armv6_mainloop: | |||
MOVM.IA.W (R14), [R0-R3] | |||
MOVW R0>>26, g | |||
MOVW R1>>20, R11 | |||
MOVW R2>>14, R12 | |||
MOVW R14, 40(R13) | |||
MOVW R3>>8, R4 | |||
ORR R1<<6, g, g | |||
ORR R2<<12, R11, R11 | |||
ORR R3<<18, R12, R12 | |||
BIC $0xfc000000, R0, R0 | |||
BIC $0xfc000000, g, g | |||
MOVW 32(R13), R3 | |||
BIC $0xfc000000, R11, R11 | |||
BIC $0xfc000000, R12, R12 | |||
ADD R0, R5, R5 | |||
ADD g, R6, R6 | |||
ORR R3, R4, R4 | |||
ADD R11, R7, R7 | |||
ADD $64, R13, R14 | |||
ADD R12, R8, R8 | |||
ADD R4, R9, R9 | |||
MOVM.IA (R14), [R0-R4] | |||
MULLU R4, R5, (R11, g) | |||
MULLU R3, R5, (R14, R12) | |||
MULALU R3, R6, (R11, g) | |||
MULALU R2, R6, (R14, R12) | |||
MULALU R2, R7, (R11, g) | |||
MULALU R1, R7, (R14, R12) | |||
ADD R4<<2, R4, R4 | |||
ADD R3<<2, R3, R3 | |||
MULALU R1, R8, (R11, g) | |||
MULALU R0, R8, (R14, R12) | |||
MULALU R0, R9, (R11, g) | |||
MULALU R4, R9, (R14, R12) | |||
MOVW g, 24(R13) | |||
MOVW R11, 28(R13) | |||
MOVW R12, 16(R13) | |||
MOVW R14, 20(R13) | |||
MULLU R2, R5, (R11, g) | |||
MULLU R1, R5, (R14, R12) | |||
MULALU R1, R6, (R11, g) | |||
MULALU R0, R6, (R14, R12) | |||
MULALU R0, R7, (R11, g) | |||
MULALU R4, R7, (R14, R12) | |||
ADD R2<<2, R2, R2 | |||
ADD R1<<2, R1, R1 | |||
MULALU R4, R8, (R11, g) | |||
MULALU R3, R8, (R14, R12) | |||
MULALU R3, R9, (R11, g) | |||
MULALU R2, R9, (R14, R12) | |||
MOVW g, 8(R13) | |||
MOVW R11, 12(R13) | |||
MOVW R12, 0(R13) | |||
MOVW R14, w+4(SP) | |||
MULLU R0, R5, (R11, g) | |||
MULALU R4, R6, (R11, g) | |||
MULALU R3, R7, (R11, g) | |||
MULALU R2, R8, (R11, g) | |||
MULALU R1, R9, (R11, g) | |||
MOVM.IA (R13), [R0-R7] | |||
MOVW g>>26, R12 | |||
MOVW R4>>26, R14 | |||
ORR R11<<6, R12, R12 | |||
ORR R5<<6, R14, R14 | |||
BIC $0xfc000000, g, g | |||
BIC $0xfc000000, R4, R4 | |||
ADD.S R12, R0, R0 | |||
ADC $0, R1, R1 | |||
ADD.S R14, R6, R6 | |||
ADC $0, R7, R7 | |||
MOVW R0>>26, R12 | |||
MOVW R6>>26, R14 | |||
ORR R1<<6, R12, R12 | |||
ORR R7<<6, R14, R14 | |||
BIC $0xfc000000, R0, R0 | |||
BIC $0xfc000000, R6, R6 | |||
ADD R14<<2, R14, R14 | |||
ADD.S R12, R2, R2 | |||
ADC $0, R3, R3 | |||
ADD R14, g, g | |||
MOVW R2>>26, R12 | |||
MOVW g>>26, R14 | |||
ORR R3<<6, R12, R12 | |||
BIC $0xfc000000, g, R5 | |||
BIC $0xfc000000, R2, R7 | |||
ADD R12, R4, R4 | |||
ADD R14, R0, R0 | |||
MOVW R4>>26, R12 | |||
BIC $0xfc000000, R4, R8 | |||
ADD R12, R6, R9 | |||
MOVW w+44(SP), R12 | |||
MOVW w+40(SP), R14 | |||
MOVW R0, R6 | |||
CMP $32, R12 | |||
SUB $16, R12, R12 | |||
MOVW R12, 44(R13) | |||
BHS poly1305_blocks_armv6_mainloop | |||
poly1305_blocks_armv6_done: | |||
MOVW 36(R13), R12 | |||
MOVW R5, 20(R12) | |||
MOVW R6, 24(R12) | |||
MOVW R7, 28(R12) | |||
MOVW R8, 32(R12) | |||
MOVW R9, 36(R12) | |||
ADD $128, R13, R13 | |||
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] | |||
RET | |||
TEXT poly1305_finish_ext_armv6<>(SB),4,$-4 | |||
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) | |||
SUB $16, R13, R13 | |||
MOVW R0, R5 | |||
MOVW R1, R6 | |||
MOVW R2, R7 | |||
MOVW R3, R8 | |||
AND.S R2, R2, R2 | |||
BEQ poly1305_finish_ext_armv6_noremaining | |||
EOR R0, R0 | |||
MOVW R13, R9 | |||
MOVW R0, 0(R13) | |||
MOVW R0, 4(R13) | |||
MOVW R0, 8(R13) | |||
MOVW R0, 12(R13) | |||
WORD $0xe3120008 // TST R2, #8 not working see issue 5921 | |||
BEQ poly1305_finish_ext_armv6_skip8 | |||
MOVM.IA.W (R1), [g-R11] | |||
MOVM.IA.W [g-R11], (R9) | |||
poly1305_finish_ext_armv6_skip8: | |||
WORD $0xe3120004 // TST $4, R2 not working see issue 5921 | |||
BEQ poly1305_finish_ext_armv6_skip4 | |||
MOVW.P 4(R1), g | |||
MOVW.P g, 4(R9) | |||
poly1305_finish_ext_armv6_skip4: | |||
WORD $0xe3120002 // TST $2, R2 not working see issue 5921 | |||
BEQ poly1305_finish_ext_armv6_skip2 | |||
MOVHU.P 2(R1), g | |||
MOVH.P g, 2(R9) | |||
poly1305_finish_ext_armv6_skip2: | |||
WORD $0xe3120001 // TST $1, R2 not working see issue 5921 | |||
BEQ poly1305_finish_ext_armv6_skip1 | |||
MOVBU.P 1(R1), g | |||
MOVBU.P g, 1(R9) | |||
poly1305_finish_ext_armv6_skip1: | |||
MOVW $1, R11 | |||
MOVBU R11, 0(R9) | |||
MOVW R11, 56(R5) | |||
MOVW R5, R0 | |||
MOVW R13, R1 | |||
MOVW $16, R2 | |||
BL poly1305_blocks_armv6<>(SB) | |||
poly1305_finish_ext_armv6_noremaining: | |||
MOVW 20(R5), R0 | |||
MOVW 24(R5), R1 | |||
MOVW 28(R5), R2 | |||
MOVW 32(R5), R3 | |||
MOVW 36(R5), R4 | |||
MOVW R4>>26, R12 | |||
BIC $0xfc000000, R4, R4 | |||
ADD R12<<2, R12, R12 | |||
ADD R12, R0, R0 | |||
MOVW R0>>26, R12 | |||
BIC $0xfc000000, R0, R0 | |||
ADD R12, R1, R1 | |||
MOVW R1>>26, R12 | |||
BIC $0xfc000000, R1, R1 | |||
ADD R12, R2, R2 | |||
MOVW R2>>26, R12 | |||
BIC $0xfc000000, R2, R2 | |||
ADD R12, R3, R3 | |||
MOVW R3>>26, R12 | |||
BIC $0xfc000000, R3, R3 | |||
ADD R12, R4, R4 | |||
ADD $5, R0, R6 | |||
MOVW R6>>26, R12 | |||
BIC $0xfc000000, R6, R6 | |||
ADD R12, R1, R7 | |||
MOVW R7>>26, R12 | |||
BIC $0xfc000000, R7, R7 | |||
ADD R12, R2, g | |||
MOVW g>>26, R12 | |||
BIC $0xfc000000, g, g | |||
ADD R12, R3, R11 | |||
MOVW $-(1<<26), R12 | |||
ADD R11>>26, R12, R12 | |||
BIC $0xfc000000, R11, R11 | |||
ADD R12, R4, R14 | |||
MOVW R14>>31, R12 | |||
SUB $1, R12 | |||
AND R12, R6, R6 | |||
AND R12, R7, R7 | |||
AND R12, g, g | |||
AND R12, R11, R11 | |||
AND R12, R14, R14 | |||
MVN R12, R12 | |||
AND R12, R0, R0 | |||
AND R12, R1, R1 | |||
AND R12, R2, R2 | |||
AND R12, R3, R3 | |||
AND R12, R4, R4 | |||
ORR R6, R0, R0 | |||
ORR R7, R1, R1 | |||
ORR g, R2, R2 | |||
ORR R11, R3, R3 | |||
ORR R14, R4, R4 | |||
ORR R1<<26, R0, R0 | |||
MOVW R1>>6, R1 | |||
ORR R2<<20, R1, R1 | |||
MOVW R2>>12, R2 | |||
ORR R3<<14, R2, R2 | |||
MOVW R3>>18, R3 | |||
ORR R4<<8, R3, R3 | |||
MOVW 40(R5), R6 | |||
MOVW 44(R5), R7 | |||
MOVW 48(R5), g | |||
MOVW 52(R5), R11 | |||
ADD.S R6, R0, R0 | |||
ADC.S R7, R1, R1 | |||
ADC.S g, R2, R2 | |||
ADC.S R11, R3, R3 | |||
MOVM.IA [R0-R3], (R8) | |||
MOVW R5, R12 | |||
EOR R0, R0, R0 | |||
EOR R1, R1, R1 | |||
EOR R2, R2, R2 | |||
EOR R3, R3, R3 | |||
EOR R4, R4, R4 | |||
EOR R5, R5, R5 | |||
EOR R6, R6, R6 | |||
EOR R7, R7, R7 | |||
MOVM.IA.W [R0-R7], (R12) | |||
MOVM.IA [R0-R7], (R12) | |||
ADD $16, R13, R13 | |||
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] | |||
RET | |||
// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) | |||
TEXT ·poly1305_auth_armv6(SB),0,$280-16 | |||
MOVW out+0(FP), R4 | |||
MOVW m+4(FP), R5 | |||
MOVW mlen+8(FP), R6 | |||
MOVW key+12(FP), R7 | |||
MOVW R13, R8 | |||
BIC $63, R13 | |||
SUB $64, R13, R13 | |||
MOVW R13, R0 | |||
MOVW R7, R1 | |||
BL poly1305_init_ext_armv6<>(SB) | |||
BIC.S $15, R6, R2 | |||
BEQ poly1305_auth_armv6_noblocks | |||
MOVW R13, R0 | |||
MOVW R5, R1 | |||
ADD R2, R5, R5 | |||
SUB R2, R6, R6 | |||
BL poly1305_blocks_armv6<>(SB) | |||
poly1305_auth_armv6_noblocks: | |||
MOVW R13, R0 | |||
MOVW R5, R1 | |||
MOVW R6, R2 | |||
MOVW R4, R3 | |||
BL poly1305_finish_ext_armv6<>(SB) | |||
MOVW R8, R13 | |||
RET |
@ -0,0 +1,74 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package poly1305 | |||
import ( | |||
"bytes" | |||
"testing" | |||
) | |||
var testData = []struct { | |||
in, k, correct []byte | |||
}{ | |||
{ | |||
[]byte("Hello world!"), | |||
[]byte("this is 32-byte key for Poly1305"), | |||
[]byte{0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0}, | |||
}, | |||
{ | |||
make([]byte, 32), | |||
[]byte("this is 32-byte key for Poly1305"), | |||
[]byte{0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07}, | |||
}, | |||
{ | |||
make([]byte, 2007), | |||
[]byte("this is 32-byte key for Poly1305"), | |||
[]byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa}, | |||
}, | |||
{ | |||
make([]byte, 2007), | |||
make([]byte, 32), | |||
make([]byte, 16), | |||
}, | |||
} | |||
func TestSum(t *testing.T) { | |||
var out [16]byte | |||
var key [32]byte | |||
for i, v := range testData { | |||
copy(key[:], v.k) | |||
Sum(&out, v.in, &key) | |||
if !bytes.Equal(out[:], v.correct) { | |||
t.Errorf("%d: expected %x, got %x", i, v.correct, out[:]) | |||
} | |||
} | |||
} | |||
func Benchmark1K(b *testing.B) { | |||
b.StopTimer() | |||
var out [16]byte | |||
var key [32]byte | |||
in := make([]byte, 1024) | |||
b.SetBytes(int64(len(in))) | |||
b.StartTimer() | |||
for i := 0; i < b.N; i++ { | |||
Sum(&out, in, &key) | |||
} | |||
} | |||
func Benchmark64(b *testing.B) { | |||
b.StopTimer() | |||
var out [16]byte | |||
var key [32]byte | |||
in := make([]byte, 64) | |||
b.SetBytes(int64(len(in))) | |||
b.StartTimer() | |||
for i := 0; i < b.N; i++ { | |||
Sum(&out, in, &key) | |||
} | |||
} |
@ -0,0 +1,24 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build amd64,!gccgo,!appengine | |||
package poly1305 | |||
// This function is implemented in poly1305_amd64.s | |||
//go:noescape | |||
func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte) | |||
// Sum generates an authenticator for m using a one-time key and puts the | |||
// 16-byte result into out. Authenticating two different messages with the same | |||
// key allows an attacker to forge messages at will. | |||
func Sum(out *[16]byte, m []byte, key *[32]byte) { | |||
var mPtr *byte | |||
if len(m) > 0 { | |||
mPtr = &m[0] | |||
} | |||
poly1305(out, mPtr, uint64(len(m)), key) | |||
} |
@ -0,0 +1,24 @@ | |||
// Copyright 2015 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build arm,!gccgo,!appengine | |||
package poly1305 | |||
// This function is implemented in poly1305_arm.s | |||
//go:noescape | |||
func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) | |||
// Sum generates an authenticator for m using a one-time key and puts the | |||
// 16-byte result into out. Authenticating two different messages with the same | |||
// key allows an attacker to forge messages at will. | |||
func Sum(out *[16]byte, m []byte, key *[32]byte) { | |||
var mPtr *byte | |||
if len(m) > 0 { | |||
mPtr = &m[0] | |||
} | |||
poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) | |||
} |
@ -0,0 +1,120 @@ | |||
// Copyright 2010 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// Package ripemd160 implements the RIPEMD-160 hash algorithm. | |||
package ripemd160 | |||
// RIPEMD-160 is designed by by Hans Dobbertin, Antoon Bosselaers, and Bart | |||
// Preneel with specifications available at: | |||
// http://homes.esat.kuleuven.be/~cosicart/pdf/AB-9601/AB-9601.pdf. | |||
import ( | |||
"crypto" | |||
"hash" | |||
) | |||
func init() { | |||
crypto.RegisterHash(crypto.RIPEMD160, New) | |||
} | |||
// The size of the checksum in bytes. | |||
const Size = 20 | |||
// The block size of the hash algorithm in bytes. | |||
const BlockSize = 64 | |||
const ( | |||
_s0 = 0x67452301 | |||
_s1 = 0xefcdab89 | |||
_s2 = 0x98badcfe | |||
_s3 = 0x10325476 | |||
_s4 = 0xc3d2e1f0 | |||
) | |||
// digest represents the partial evaluation of a checksum. | |||
type digest struct { | |||
s [5]uint32 // running context | |||
x [BlockSize]byte // temporary buffer | |||
nx int // index into x | |||
tc uint64 // total count of bytes processed | |||
} | |||
func (d *digest) Reset() { | |||
d.s[0], d.s[1], d.s[2], d.s[3], d.s[4] = _s0, _s1, _s2, _s3, _s4 | |||
d.nx = 0 | |||
d.tc = 0 | |||
} | |||
// New returns a new hash.Hash computing the checksum. | |||
func New() hash.Hash { | |||
result := new(digest) | |||
result.Reset() | |||
return result | |||
} | |||
func (d *digest) Size() int { return Size } | |||
func (d *digest) BlockSize() int { return BlockSize } | |||
func (d *digest) Write(p []byte) (nn int, err error) { | |||
nn = len(p) | |||
d.tc += uint64(nn) | |||
if d.nx > 0 { | |||
n := len(p) | |||
if n > BlockSize-d.nx { | |||
n = BlockSize - d.nx | |||
} | |||
for i := 0; i < n; i++ { | |||
d.x[d.nx+i] = p[i] | |||
} | |||
d.nx += n | |||
if d.nx == BlockSize { | |||
_Block(d, d.x[0:]) | |||
d.nx = 0 | |||
} | |||
p = p[n:] | |||
} | |||
n := _Block(d, p) | |||
p = p[n:] | |||
if len(p) > 0 { | |||
d.nx = copy(d.x[:], p) | |||
} | |||
return | |||
} | |||
func (d0 *digest) Sum(in []byte) []byte { | |||
// Make a copy of d0 so that caller can keep writing and summing. | |||
d := *d0 | |||
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. | |||
tc := d.tc | |||
var tmp [64]byte | |||
tmp[0] = 0x80 | |||
if tc%64 < 56 { | |||
d.Write(tmp[0 : 56-tc%64]) | |||
} else { | |||
d.Write(tmp[0 : 64+56-tc%64]) | |||
} | |||
// Length in bits. | |||
tc <<= 3 | |||
for i := uint(0); i < 8; i++ { | |||
tmp[i] = byte(tc >> (8 * i)) | |||
} | |||
d.Write(tmp[0:8]) | |||
if d.nx != 0 { | |||
panic("d.nx != 0") | |||
} | |||
var digest [Size]byte | |||
for i, s := range d.s { | |||
digest[i*4] = byte(s) | |||
digest[i*4+1] = byte(s >> 8) | |||
digest[i*4+2] = byte(s >> 16) | |||
digest[i*4+3] = byte(s >> 24) | |||
} | |||
return append(in, digest[:]...) | |||
} |
@ -0,0 +1,64 @@ | |||
// Copyright 2010 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package ripemd160 | |||
// Test vectors are from: | |||
// http://homes.esat.kuleuven.be/~bosselae/ripemd160.html | |||
import ( | |||
"fmt" | |||
"io" | |||
"testing" | |||
) | |||
type mdTest struct { | |||
out string | |||
in string | |||
} | |||
var vectors = [...]mdTest{ | |||
{"9c1185a5c5e9fc54612808977ee8f548b2258d31", ""}, | |||
{"0bdc9d2d256b3ee9daae347be6f4dc835a467ffe", "a"}, | |||
{"8eb208f7e05d987a9b044a8e98c6b087f15a0bfc", "abc"}, | |||
{"5d0689ef49d2fae572b881b123a85ffa21595f36", "message digest"}, | |||
{"f71c27109c692c1b56bbdceb5b9d2865b3708dbc", "abcdefghijklmnopqrstuvwxyz"}, | |||
{"12a053384a9c0c88e405a06c27dcf49ada62eb2b", "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"}, | |||
{"b0e20b6e3116640286ed3a87a5713079b21f5189", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"}, | |||
{"9b752e45573d4b39f4dbd3323cab82bf63326bfb", "12345678901234567890123456789012345678901234567890123456789012345678901234567890"}, | |||
} | |||
func TestVectors(t *testing.T) { | |||
for i := 0; i < len(vectors); i++ { | |||
tv := vectors[i] | |||
md := New() | |||
for j := 0; j < 3; j++ { | |||
if j < 2 { | |||
io.WriteString(md, tv.in) | |||
} else { | |||
io.WriteString(md, tv.in[0:len(tv.in)/2]) | |||
md.Sum(nil) | |||
io.WriteString(md, tv.in[len(tv.in)/2:]) | |||
} | |||
s := fmt.Sprintf("%x", md.Sum(nil)) | |||
if s != tv.out { | |||
t.Fatalf("RIPEMD-160[%d](%s) = %s, expected %s", j, tv.in, s, tv.out) | |||
} | |||
md.Reset() | |||
} | |||
} | |||
} | |||
func TestMillionA(t *testing.T) { | |||
md := New() | |||
for i := 0; i < 100000; i++ { | |||
io.WriteString(md, "aaaaaaaaaa") | |||
} | |||
out := "52783243c1697bdbe16d37f97f68f08325dc1528" | |||
s := fmt.Sprintf("%x", md.Sum(nil)) | |||
if s != out { | |||
t.Fatalf("RIPEMD-160 (1 million 'a') = %s, expected %s", s, out) | |||
} | |||
md.Reset() | |||
} |
@ -0,0 +1,161 @@ | |||
// Copyright 2010 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// RIPEMD-160 block step. | |||
// In its own file so that a faster assembly or C version | |||
// can be substituted easily. | |||
package ripemd160 | |||
// work buffer indices and roll amounts for one line | |||
var _n = [80]uint{ | |||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |||
7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, | |||
3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, | |||
1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, | |||
4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, | |||
} | |||
var _r = [80]uint{ | |||
11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, | |||
7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, | |||
11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, | |||
11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, | |||
9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, | |||
} | |||
// same for the other parallel one | |||
var n_ = [80]uint{ | |||
5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, | |||
6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, | |||
15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, | |||
8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, | |||
12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11, | |||
} | |||
var r_ = [80]uint{ | |||
8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, | |||
9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, | |||
9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, | |||
15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, | |||
8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11, | |||
} | |||
func _Block(md *digest, p []byte) int { | |||
n := 0 | |||
var x [16]uint32 | |||
var alpha, beta uint32 | |||
for len(p) >= BlockSize { | |||
a, b, c, d, e := md.s[0], md.s[1], md.s[2], md.s[3], md.s[4] | |||
aa, bb, cc, dd, ee := a, b, c, d, e | |||
j := 0 | |||
for i := 0; i < 16; i++ { | |||
x[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 | |||
j += 4 | |||
} | |||
// round 1 | |||
i := 0 | |||
for i < 16 { | |||
alpha = a + (b ^ c ^ d) + x[_n[i]] | |||
s := _r[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + e | |||
beta = c<<10 | c>>22 | |||
a, b, c, d, e = e, alpha, b, beta, d | |||
// parallel line | |||
alpha = aa + (bb ^ (cc | ^dd)) + x[n_[i]] + 0x50a28be6 | |||
s = r_[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + ee | |||
beta = cc<<10 | cc>>22 | |||
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd | |||
i++ | |||
} | |||
// round 2 | |||
for i < 32 { | |||
alpha = a + (b&c | ^b&d) + x[_n[i]] + 0x5a827999 | |||
s := _r[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + e | |||
beta = c<<10 | c>>22 | |||
a, b, c, d, e = e, alpha, b, beta, d | |||
// parallel line | |||
alpha = aa + (bb&dd | cc&^dd) + x[n_[i]] + 0x5c4dd124 | |||
s = r_[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + ee | |||
beta = cc<<10 | cc>>22 | |||
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd | |||
i++ | |||
} | |||
// round 3 | |||
for i < 48 { | |||
alpha = a + (b | ^c ^ d) + x[_n[i]] + 0x6ed9eba1 | |||
s := _r[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + e | |||
beta = c<<10 | c>>22 | |||
a, b, c, d, e = e, alpha, b, beta, d | |||
// parallel line | |||
alpha = aa + (bb | ^cc ^ dd) + x[n_[i]] + 0x6d703ef3 | |||
s = r_[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + ee | |||
beta = cc<<10 | cc>>22 | |||
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd | |||
i++ | |||
} | |||
// round 4 | |||
for i < 64 { | |||
alpha = a + (b&d | c&^d) + x[_n[i]] + 0x8f1bbcdc | |||
s := _r[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + e | |||
beta = c<<10 | c>>22 | |||
a, b, c, d, e = e, alpha, b, beta, d | |||
// parallel line | |||
alpha = aa + (bb&cc | ^bb&dd) + x[n_[i]] + 0x7a6d76e9 | |||
s = r_[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + ee | |||
beta = cc<<10 | cc>>22 | |||
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd | |||
i++ | |||
} | |||
// round 5 | |||
for i < 80 { | |||
alpha = a + (b ^ (c | ^d)) + x[_n[i]] + 0xa953fd4e | |||
s := _r[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + e | |||
beta = c<<10 | c>>22 | |||
a, b, c, d, e = e, alpha, b, beta, d | |||
// parallel line | |||
alpha = aa + (bb ^ cc ^ dd) + x[n_[i]] | |||
s = r_[i] | |||
alpha = (alpha<<s | alpha>>(32-s)) + ee | |||
beta = cc<<10 | cc>>22 | |||
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd | |||
i++ | |||
} | |||
// combine results | |||
dd += c + md.s[1] | |||
md.s[1] = md.s[2] + d + ee | |||
md.s[2] = md.s[3] + e + aa | |||
md.s[3] = md.s[4] + a + bb | |||
md.s[4] = md.s[0] + b + cc | |||
md.s[0] = dd | |||
p = p[BlockSize:] | |||
n += BlockSize | |||
} | |||
return n | |||
} |
@ -0,0 +1,144 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// Package salsa provides low-level access to functions in the Salsa family. | |||
package salsa | |||
// Sigma is the Salsa20 constant for 256-bit keys. | |||
var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'} | |||
// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte | |||
// key k, and 16-byte constant c, and puts the result into the 32-byte array | |||
// out. | |||
func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) { | |||
x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 | |||
x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 | |||
x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 | |||
x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 | |||
x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 | |||
x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 | |||
x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 | |||
x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 | |||
x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 | |||
x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 | |||
x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 | |||
x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 | |||
x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 | |||
x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 | |||
x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 | |||
x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 | |||
for i := 0; i < 20; i += 2 { | |||
u := x0 + x12 | |||
x4 ^= u<<7 | u>>(32-7) | |||
u = x4 + x0 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x4 | |||
x12 ^= u<<13 | u>>(32-13) | |||
u = x12 + x8 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x1 | |||
x9 ^= u<<7 | u>>(32-7) | |||
u = x9 + x5 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x9 | |||
x1 ^= u<<13 | u>>(32-13) | |||
u = x1 + x13 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x6 | |||
x14 ^= u<<7 | u>>(32-7) | |||
u = x14 + x10 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x14 | |||
x6 ^= u<<13 | u>>(32-13) | |||
u = x6 + x2 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x11 | |||
x3 ^= u<<7 | u>>(32-7) | |||
u = x3 + x15 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x3 | |||
x11 ^= u<<13 | u>>(32-13) | |||
u = x11 + x7 | |||
x15 ^= u<<18 | u>>(32-18) | |||
u = x0 + x3 | |||
x1 ^= u<<7 | u>>(32-7) | |||
u = x1 + x0 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x1 | |||
x3 ^= u<<13 | u>>(32-13) | |||
u = x3 + x2 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x4 | |||
x6 ^= u<<7 | u>>(32-7) | |||
u = x6 + x5 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x6 | |||
x4 ^= u<<13 | u>>(32-13) | |||
u = x4 + x7 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x9 | |||
x11 ^= u<<7 | u>>(32-7) | |||
u = x11 + x10 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x11 | |||
x9 ^= u<<13 | u>>(32-13) | |||
u = x9 + x8 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x14 | |||
x12 ^= u<<7 | u>>(32-7) | |||
u = x12 + x15 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x12 | |||
x14 ^= u<<13 | u>>(32-13) | |||
u = x14 + x13 | |||
x15 ^= u<<18 | u>>(32-18) | |||
} | |||
out[0] = byte(x0) | |||
out[1] = byte(x0 >> 8) | |||
out[2] = byte(x0 >> 16) | |||
out[3] = byte(x0 >> 24) | |||
out[4] = byte(x5) | |||
out[5] = byte(x5 >> 8) | |||
out[6] = byte(x5 >> 16) | |||
out[7] = byte(x5 >> 24) | |||
out[8] = byte(x10) | |||
out[9] = byte(x10 >> 8) | |||
out[10] = byte(x10 >> 16) | |||
out[11] = byte(x10 >> 24) | |||
out[12] = byte(x15) | |||
out[13] = byte(x15 >> 8) | |||
out[14] = byte(x15 >> 16) | |||
out[15] = byte(x15 >> 24) | |||
out[16] = byte(x6) | |||
out[17] = byte(x6 >> 8) | |||
out[18] = byte(x6 >> 16) | |||
out[19] = byte(x6 >> 24) | |||
out[20] = byte(x7) | |||
out[21] = byte(x7 >> 8) | |||
out[22] = byte(x7 >> 16) | |||
out[23] = byte(x7 >> 24) | |||
out[24] = byte(x8) | |||
out[25] = byte(x8 >> 8) | |||
out[26] = byte(x8 >> 16) | |||
out[27] = byte(x8 >> 24) | |||
out[28] = byte(x9) | |||
out[29] = byte(x9 >> 8) | |||
out[30] = byte(x9 >> 16) | |||
out[31] = byte(x9 >> 24) | |||
} |
@ -0,0 +1,902 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build amd64,!appengine,!gccgo | |||
// This code was translated into a form compatible with 6a from the public | |||
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html | |||
// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) | |||
TEXT ·salsa2020XORKeyStream(SB),0,$512-40 | |||
MOVQ out+0(FP),DI | |||
MOVQ in+8(FP),SI | |||
MOVQ n+16(FP),DX | |||
MOVQ nonce+24(FP),CX | |||
MOVQ key+32(FP),R8 | |||
MOVQ SP,R11 | |||
MOVQ $31,R9 | |||
NOTQ R9 | |||
ANDQ R9,SP | |||
ADDQ $32,SP | |||
MOVQ R11,352(SP) | |||
MOVQ R12,360(SP) | |||
MOVQ R13,368(SP) | |||
MOVQ R14,376(SP) | |||
MOVQ R15,384(SP) | |||
MOVQ BX,392(SP) | |||
MOVQ BP,400(SP) | |||
MOVQ DX,R9 | |||
MOVQ CX,DX | |||
MOVQ R8,R10 | |||
CMPQ R9,$0 | |||
JBE DONE | |||
START: | |||
MOVL 20(R10),CX | |||
MOVL 0(R10),R8 | |||
MOVL 0(DX),AX | |||
MOVL 16(R10),R11 | |||
MOVL CX,0(SP) | |||
MOVL R8, 4 (SP) | |||
MOVL AX, 8 (SP) | |||
MOVL R11, 12 (SP) | |||
MOVL 8(DX),CX | |||
MOVL 24(R10),R8 | |||
MOVL 4(R10),AX | |||
MOVL 4(DX),R11 | |||
MOVL CX,16(SP) | |||
MOVL R8, 20 (SP) | |||
MOVL AX, 24 (SP) | |||
MOVL R11, 28 (SP) | |||
MOVL 12(DX),CX | |||
MOVL 12(R10),DX | |||
MOVL 28(R10),R8 | |||
MOVL 8(R10),AX | |||
MOVL DX,32(SP) | |||
MOVL CX, 36 (SP) | |||
MOVL R8, 40 (SP) | |||
MOVL AX, 44 (SP) | |||
MOVQ $1634760805,DX | |||
MOVQ $857760878,CX | |||
MOVQ $2036477234,R8 | |||
MOVQ $1797285236,AX | |||
MOVL DX,48(SP) | |||
MOVL CX, 52 (SP) | |||
MOVL R8, 56 (SP) | |||
MOVL AX, 60 (SP) | |||
CMPQ R9,$256 | |||
JB BYTESBETWEEN1AND255 | |||
MOVOA 48(SP),X0 | |||
PSHUFL $0X55,X0,X1 | |||
PSHUFL $0XAA,X0,X2 | |||
PSHUFL $0XFF,X0,X3 | |||
PSHUFL $0X00,X0,X0 | |||
MOVOA X1,64(SP) | |||
MOVOA X2,80(SP) | |||
MOVOA X3,96(SP) | |||
MOVOA X0,112(SP) | |||
MOVOA 0(SP),X0 | |||
PSHUFL $0XAA,X0,X1 | |||
PSHUFL $0XFF,X0,X2 | |||
PSHUFL $0X00,X0,X3 | |||
PSHUFL $0X55,X0,X0 | |||
MOVOA X1,128(SP) | |||
MOVOA X2,144(SP) | |||
MOVOA X3,160(SP) | |||
MOVOA X0,176(SP) | |||
MOVOA 16(SP),X0 | |||
PSHUFL $0XFF,X0,X1 | |||
PSHUFL $0X55,X0,X2 | |||
PSHUFL $0XAA,X0,X0 | |||
MOVOA X1,192(SP) | |||
MOVOA X2,208(SP) | |||
MOVOA X0,224(SP) | |||
MOVOA 32(SP),X0 | |||
PSHUFL $0X00,X0,X1 | |||
PSHUFL $0XAA,X0,X2 | |||
PSHUFL $0XFF,X0,X0 | |||
MOVOA X1,240(SP) | |||
MOVOA X2,256(SP) | |||
MOVOA X0,272(SP) | |||
BYTESATLEAST256: | |||
MOVL 16(SP),DX | |||
MOVL 36 (SP),CX | |||
MOVL DX,288(SP) | |||
MOVL CX,304(SP) | |||
ADDQ $1,DX | |||
SHLQ $32,CX | |||
ADDQ CX,DX | |||
MOVQ DX,CX | |||
SHRQ $32,CX | |||
MOVL DX, 292 (SP) | |||
MOVL CX, 308 (SP) | |||
ADDQ $1,DX | |||
SHLQ $32,CX | |||
ADDQ CX,DX | |||
MOVQ DX,CX | |||
SHRQ $32,CX | |||
MOVL DX, 296 (SP) | |||
MOVL CX, 312 (SP) | |||
ADDQ $1,DX | |||
SHLQ $32,CX | |||
ADDQ CX,DX | |||
MOVQ DX,CX | |||
SHRQ $32,CX | |||
MOVL DX, 300 (SP) | |||
MOVL CX, 316 (SP) | |||
ADDQ $1,DX | |||
SHLQ $32,CX | |||
ADDQ CX,DX | |||
MOVQ DX,CX | |||
SHRQ $32,CX | |||
MOVL DX,16(SP) | |||
MOVL CX, 36 (SP) | |||
MOVQ R9,408(SP) | |||
MOVQ $20,DX | |||
MOVOA 64(SP),X0 | |||
MOVOA 80(SP),X1 | |||
MOVOA 96(SP),X2 | |||
MOVOA 256(SP),X3 | |||
MOVOA 272(SP),X4 | |||
MOVOA 128(SP),X5 | |||
MOVOA 144(SP),X6 | |||
MOVOA 176(SP),X7 | |||
MOVOA 192(SP),X8 | |||
MOVOA 208(SP),X9 | |||
MOVOA 224(SP),X10 | |||
MOVOA 304(SP),X11 | |||
MOVOA 112(SP),X12 | |||
MOVOA 160(SP),X13 | |||
MOVOA 240(SP),X14 | |||
MOVOA 288(SP),X15 | |||
MAINLOOP1: | |||
MOVOA X1,320(SP) | |||
MOVOA X2,336(SP) | |||
MOVOA X13,X1 | |||
PADDL X12,X1 | |||
MOVOA X1,X2 | |||
PSLLL $7,X1 | |||
PXOR X1,X14 | |||
PSRLL $25,X2 | |||
PXOR X2,X14 | |||
MOVOA X7,X1 | |||
PADDL X0,X1 | |||
MOVOA X1,X2 | |||
PSLLL $7,X1 | |||
PXOR X1,X11 | |||
PSRLL $25,X2 | |||
PXOR X2,X11 | |||
MOVOA X12,X1 | |||
PADDL X14,X1 | |||
MOVOA X1,X2 | |||
PSLLL $9,X1 | |||
PXOR X1,X15 | |||
PSRLL $23,X2 | |||
PXOR X2,X15 | |||
MOVOA X0,X1 | |||
PADDL X11,X1 | |||
MOVOA X1,X2 | |||
PSLLL $9,X1 | |||
PXOR X1,X9 | |||
PSRLL $23,X2 | |||
PXOR X2,X9 | |||
MOVOA X14,X1 | |||
PADDL X15,X1 | |||
MOVOA X1,X2 | |||
PSLLL $13,X1 | |||
PXOR X1,X13 | |||
PSRLL $19,X2 | |||
PXOR X2,X13 | |||
MOVOA X11,X1 | |||
PADDL X9,X1 | |||
MOVOA X1,X2 | |||
PSLLL $13,X1 | |||
PXOR X1,X7 | |||
PSRLL $19,X2 | |||
PXOR X2,X7 | |||
MOVOA X15,X1 | |||
PADDL X13,X1 | |||
MOVOA X1,X2 | |||
PSLLL $18,X1 | |||
PXOR X1,X12 | |||
PSRLL $14,X2 | |||
PXOR X2,X12 | |||
MOVOA 320(SP),X1 | |||
MOVOA X12,320(SP) | |||
MOVOA X9,X2 | |||
PADDL X7,X2 | |||
MOVOA X2,X12 | |||
PSLLL $18,X2 | |||
PXOR X2,X0 | |||
PSRLL $14,X12 | |||
PXOR X12,X0 | |||
MOVOA X5,X2 | |||
PADDL X1,X2 | |||
MOVOA X2,X12 | |||
PSLLL $7,X2 | |||
PXOR X2,X3 | |||
PSRLL $25,X12 | |||
PXOR X12,X3 | |||
MOVOA 336(SP),X2 | |||
MOVOA X0,336(SP) | |||
MOVOA X6,X0 | |||
PADDL X2,X0 | |||
MOVOA X0,X12 | |||
PSLLL $7,X0 | |||
PXOR X0,X4 | |||
PSRLL $25,X12 | |||
PXOR X12,X4 | |||
MOVOA X1,X0 | |||
PADDL X3,X0 | |||
MOVOA X0,X12 | |||
PSLLL $9,X0 | |||
PXOR X0,X10 | |||
PSRLL $23,X12 | |||
PXOR X12,X10 | |||
MOVOA X2,X0 | |||
PADDL X4,X0 | |||
MOVOA X0,X12 | |||
PSLLL $9,X0 | |||
PXOR X0,X8 | |||
PSRLL $23,X12 | |||
PXOR X12,X8 | |||
MOVOA X3,X0 | |||
PADDL X10,X0 | |||
MOVOA X0,X12 | |||
PSLLL $13,X0 | |||
PXOR X0,X5 | |||
PSRLL $19,X12 | |||
PXOR X12,X5 | |||
MOVOA X4,X0 | |||
PADDL X8,X0 | |||
MOVOA X0,X12 | |||
PSLLL $13,X0 | |||
PXOR X0,X6 | |||
PSRLL $19,X12 | |||
PXOR X12,X6 | |||
MOVOA X10,X0 | |||
PADDL X5,X0 | |||
MOVOA X0,X12 | |||
PSLLL $18,X0 | |||
PXOR X0,X1 | |||
PSRLL $14,X12 | |||
PXOR X12,X1 | |||
MOVOA 320(SP),X0 | |||
MOVOA X1,320(SP) | |||
MOVOA X4,X1 | |||
PADDL X0,X1 | |||
MOVOA X1,X12 | |||
PSLLL $7,X1 | |||
PXOR X1,X7 | |||
PSRLL $25,X12 | |||
PXOR X12,X7 | |||
MOVOA X8,X1 | |||
PADDL X6,X1 | |||
MOVOA X1,X12 | |||
PSLLL $18,X1 | |||
PXOR X1,X2 | |||
PSRLL $14,X12 | |||
PXOR X12,X2 | |||
MOVOA 336(SP),X12 | |||
MOVOA X2,336(SP) | |||
MOVOA X14,X1 | |||
PADDL X12,X1 | |||
MOVOA X1,X2 | |||
PSLLL $7,X1 | |||
PXOR X1,X5 | |||
PSRLL $25,X2 | |||
PXOR X2,X5 | |||
MOVOA X0,X1 | |||
PADDL X7,X1 | |||
MOVOA X1,X2 | |||
PSLLL $9,X1 | |||
PXOR X1,X10 | |||
PSRLL $23,X2 | |||
PXOR X2,X10 | |||
MOVOA X12,X1 | |||
PADDL X5,X1 | |||
MOVOA X1,X2 | |||
PSLLL $9,X1 | |||
PXOR X1,X8 | |||
PSRLL $23,X2 | |||
PXOR X2,X8 | |||
MOVOA X7,X1 | |||
PADDL X10,X1 | |||
MOVOA X1,X2 | |||
PSLLL $13,X1 | |||
PXOR X1,X4 | |||
PSRLL $19,X2 | |||
PXOR X2,X4 | |||
MOVOA X5,X1 | |||
PADDL X8,X1 | |||
MOVOA X1,X2 | |||
PSLLL $13,X1 | |||
PXOR X1,X14 | |||
PSRLL $19,X2 | |||
PXOR X2,X14 | |||
MOVOA X10,X1 | |||
PADDL X4,X1 | |||
MOVOA X1,X2 | |||
PSLLL $18,X1 | |||
PXOR X1,X0 | |||
PSRLL $14,X2 | |||
PXOR X2,X0 | |||
MOVOA 320(SP),X1 | |||
MOVOA X0,320(SP) | |||
MOVOA X8,X0 | |||
PADDL X14,X0 | |||
MOVOA X0,X2 | |||
PSLLL $18,X0 | |||
PXOR X0,X12 | |||
PSRLL $14,X2 | |||
PXOR X2,X12 | |||
MOVOA X11,X0 | |||
PADDL X1,X0 | |||
MOVOA X0,X2 | |||
PSLLL $7,X0 | |||
PXOR X0,X6 | |||
PSRLL $25,X2 | |||
PXOR X2,X6 | |||
MOVOA 336(SP),X2 | |||
MOVOA X12,336(SP) | |||
MOVOA X3,X0 | |||
PADDL X2,X0 | |||
MOVOA X0,X12 | |||
PSLLL $7,X0 | |||
PXOR X0,X13 | |||
PSRLL $25,X12 | |||
PXOR X12,X13 | |||
MOVOA X1,X0 | |||
PADDL X6,X0 | |||
MOVOA X0,X12 | |||
PSLLL $9,X0 | |||
PXOR X0,X15 | |||
PSRLL $23,X12 | |||
PXOR X12,X15 | |||
MOVOA X2,X0 | |||
PADDL X13,X0 | |||
MOVOA X0,X12 | |||
PSLLL $9,X0 | |||
PXOR X0,X9 | |||
PSRLL $23,X12 | |||
PXOR X12,X9 | |||
MOVOA X6,X0 | |||
PADDL X15,X0 | |||
MOVOA X0,X12 | |||
PSLLL $13,X0 | |||
PXOR X0,X11 | |||
PSRLL $19,X12 | |||
PXOR X12,X11 | |||
MOVOA X13,X0 | |||
PADDL X9,X0 | |||
MOVOA X0,X12 | |||
PSLLL $13,X0 | |||
PXOR X0,X3 | |||
PSRLL $19,X12 | |||
PXOR X12,X3 | |||
MOVOA X15,X0 | |||
PADDL X11,X0 | |||
MOVOA X0,X12 | |||
PSLLL $18,X0 | |||
PXOR X0,X1 | |||
PSRLL $14,X12 | |||
PXOR X12,X1 | |||
MOVOA X9,X0 | |||
PADDL X3,X0 | |||
MOVOA X0,X12 | |||
PSLLL $18,X0 | |||
PXOR X0,X2 | |||
PSRLL $14,X12 | |||
PXOR X12,X2 | |||
MOVOA 320(SP),X12 | |||
MOVOA 336(SP),X0 | |||
SUBQ $2,DX | |||
JA MAINLOOP1 | |||
PADDL 112(SP),X12 | |||
PADDL 176(SP),X7 | |||
PADDL 224(SP),X10 | |||
PADDL 272(SP),X4 | |||
MOVD X12,DX | |||
MOVD X7,CX | |||
MOVD X10,R8 | |||
MOVD X4,R9 | |||
PSHUFL $0X39,X12,X12 | |||
PSHUFL $0X39,X7,X7 | |||
PSHUFL $0X39,X10,X10 | |||
PSHUFL $0X39,X4,X4 | |||
XORL 0(SI),DX | |||
XORL 4(SI),CX | |||
XORL 8(SI),R8 | |||
XORL 12(SI),R9 | |||
MOVL DX,0(DI) | |||
MOVL CX,4(DI) | |||
MOVL R8,8(DI) | |||
MOVL R9,12(DI) | |||
MOVD X12,DX | |||
MOVD X7,CX | |||
MOVD X10,R8 | |||
MOVD X4,R9 | |||
PSHUFL $0X39,X12,X12 | |||
PSHUFL $0X39,X7,X7 | |||
PSHUFL $0X39,X10,X10 | |||
PSHUFL $0X39,X4,X4 | |||
XORL 64(SI),DX | |||
XORL 68(SI),CX | |||
XORL 72(SI),R8 | |||
XORL 76(SI),R9 | |||
MOVL DX,64(DI) | |||
MOVL CX,68(DI) | |||
MOVL R8,72(DI) | |||
MOVL R9,76(DI) | |||
MOVD X12,DX | |||
MOVD X7,CX | |||
MOVD X10,R8 | |||
MOVD X4,R9 | |||
PSHUFL $0X39,X12,X12 | |||
PSHUFL $0X39,X7,X7 | |||
PSHUFL $0X39,X10,X10 | |||
PSHUFL $0X39,X4,X4 | |||
XORL 128(SI),DX | |||
XORL 132(SI),CX | |||
XORL 136(SI),R8 | |||
XORL 140(SI),R9 | |||
MOVL DX,128(DI) | |||
MOVL CX,132(DI) | |||
MOVL R8,136(DI) | |||
MOVL R9,140(DI) | |||
MOVD X12,DX | |||
MOVD X7,CX | |||
MOVD X10,R8 | |||
MOVD X4,R9 | |||
XORL 192(SI),DX | |||
XORL 196(SI),CX | |||
XORL 200(SI),R8 | |||
XORL 204(SI),R9 | |||
MOVL DX,192(DI) | |||
MOVL CX,196(DI) | |||
MOVL R8,200(DI) | |||
MOVL R9,204(DI) | |||
PADDL 240(SP),X14 | |||
PADDL 64(SP),X0 | |||
PADDL 128(SP),X5 | |||
PADDL 192(SP),X8 | |||
MOVD X14,DX | |||
MOVD X0,CX | |||
MOVD X5,R8 | |||
MOVD X8,R9 | |||
PSHUFL $0X39,X14,X14 | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X5,X5 | |||
PSHUFL $0X39,X8,X8 | |||
XORL 16(SI),DX | |||
XORL 20(SI),CX | |||
XORL 24(SI),R8 | |||
XORL 28(SI),R9 | |||
MOVL DX,16(DI) | |||
MOVL CX,20(DI) | |||
MOVL R8,24(DI) | |||
MOVL R9,28(DI) | |||
MOVD X14,DX | |||
MOVD X0,CX | |||
MOVD X5,R8 | |||
MOVD X8,R9 | |||
PSHUFL $0X39,X14,X14 | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X5,X5 | |||
PSHUFL $0X39,X8,X8 | |||
XORL 80(SI),DX | |||
XORL 84(SI),CX | |||
XORL 88(SI),R8 | |||
XORL 92(SI),R9 | |||
MOVL DX,80(DI) | |||
MOVL CX,84(DI) | |||
MOVL R8,88(DI) | |||
MOVL R9,92(DI) | |||
MOVD X14,DX | |||
MOVD X0,CX | |||
MOVD X5,R8 | |||
MOVD X8,R9 | |||
PSHUFL $0X39,X14,X14 | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X5,X5 | |||
PSHUFL $0X39,X8,X8 | |||
XORL 144(SI),DX | |||
XORL 148(SI),CX | |||
XORL 152(SI),R8 | |||
XORL 156(SI),R9 | |||
MOVL DX,144(DI) | |||
MOVL CX,148(DI) | |||
MOVL R8,152(DI) | |||
MOVL R9,156(DI) | |||
MOVD X14,DX | |||
MOVD X0,CX | |||
MOVD X5,R8 | |||
MOVD X8,R9 | |||
XORL 208(SI),DX | |||
XORL 212(SI),CX | |||
XORL 216(SI),R8 | |||
XORL 220(SI),R9 | |||
MOVL DX,208(DI) | |||
MOVL CX,212(DI) | |||
MOVL R8,216(DI) | |||
MOVL R9,220(DI) | |||
PADDL 288(SP),X15 | |||
PADDL 304(SP),X11 | |||
PADDL 80(SP),X1 | |||
PADDL 144(SP),X6 | |||
MOVD X15,DX | |||
MOVD X11,CX | |||
MOVD X1,R8 | |||
MOVD X6,R9 | |||
PSHUFL $0X39,X15,X15 | |||
PSHUFL $0X39,X11,X11 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X6,X6 | |||
XORL 32(SI),DX | |||
XORL 36(SI),CX | |||
XORL 40(SI),R8 | |||
XORL 44(SI),R9 | |||
MOVL DX,32(DI) | |||
MOVL CX,36(DI) | |||
MOVL R8,40(DI) | |||
MOVL R9,44(DI) | |||
MOVD X15,DX | |||
MOVD X11,CX | |||
MOVD X1,R8 | |||
MOVD X6,R9 | |||
PSHUFL $0X39,X15,X15 | |||
PSHUFL $0X39,X11,X11 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X6,X6 | |||
XORL 96(SI),DX | |||
XORL 100(SI),CX | |||
XORL 104(SI),R8 | |||
XORL 108(SI),R9 | |||
MOVL DX,96(DI) | |||
MOVL CX,100(DI) | |||
MOVL R8,104(DI) | |||
MOVL R9,108(DI) | |||
MOVD X15,DX | |||
MOVD X11,CX | |||
MOVD X1,R8 | |||
MOVD X6,R9 | |||
PSHUFL $0X39,X15,X15 | |||
PSHUFL $0X39,X11,X11 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X6,X6 | |||
XORL 160(SI),DX | |||
XORL 164(SI),CX | |||
XORL 168(SI),R8 | |||
XORL 172(SI),R9 | |||
MOVL DX,160(DI) | |||
MOVL CX,164(DI) | |||
MOVL R8,168(DI) | |||
MOVL R9,172(DI) | |||
MOVD X15,DX | |||
MOVD X11,CX | |||
MOVD X1,R8 | |||
MOVD X6,R9 | |||
XORL 224(SI),DX | |||
XORL 228(SI),CX | |||
XORL 232(SI),R8 | |||
XORL 236(SI),R9 | |||
MOVL DX,224(DI) | |||
MOVL CX,228(DI) | |||
MOVL R8,232(DI) | |||
MOVL R9,236(DI) | |||
PADDL 160(SP),X13 | |||
PADDL 208(SP),X9 | |||
PADDL 256(SP),X3 | |||
PADDL 96(SP),X2 | |||
MOVD X13,DX | |||
MOVD X9,CX | |||
MOVD X3,R8 | |||
MOVD X2,R9 | |||
PSHUFL $0X39,X13,X13 | |||
PSHUFL $0X39,X9,X9 | |||
PSHUFL $0X39,X3,X3 | |||
PSHUFL $0X39,X2,X2 | |||
XORL 48(SI),DX | |||
XORL 52(SI),CX | |||
XORL 56(SI),R8 | |||
XORL 60(SI),R9 | |||
MOVL DX,48(DI) | |||
MOVL CX,52(DI) | |||
MOVL R8,56(DI) | |||
MOVL R9,60(DI) | |||
MOVD X13,DX | |||
MOVD X9,CX | |||
MOVD X3,R8 | |||
MOVD X2,R9 | |||
PSHUFL $0X39,X13,X13 | |||
PSHUFL $0X39,X9,X9 | |||
PSHUFL $0X39,X3,X3 | |||
PSHUFL $0X39,X2,X2 | |||
XORL 112(SI),DX | |||
XORL 116(SI),CX | |||
XORL 120(SI),R8 | |||
XORL 124(SI),R9 | |||
MOVL DX,112(DI) | |||
MOVL CX,116(DI) | |||
MOVL R8,120(DI) | |||
MOVL R9,124(DI) | |||
MOVD X13,DX | |||
MOVD X9,CX | |||
MOVD X3,R8 | |||
MOVD X2,R9 | |||
PSHUFL $0X39,X13,X13 | |||
PSHUFL $0X39,X9,X9 | |||
PSHUFL $0X39,X3,X3 | |||
PSHUFL $0X39,X2,X2 | |||
XORL 176(SI),DX | |||
XORL 180(SI),CX | |||
XORL 184(SI),R8 | |||
XORL 188(SI),R9 | |||
MOVL DX,176(DI) | |||
MOVL CX,180(DI) | |||
MOVL R8,184(DI) | |||
MOVL R9,188(DI) | |||
MOVD X13,DX | |||
MOVD X9,CX | |||
MOVD X3,R8 | |||
MOVD X2,R9 | |||
XORL 240(SI),DX | |||
XORL 244(SI),CX | |||
XORL 248(SI),R8 | |||
XORL 252(SI),R9 | |||
MOVL DX,240(DI) | |||
MOVL CX,244(DI) | |||
MOVL R8,248(DI) | |||
MOVL R9,252(DI) | |||
MOVQ 408(SP),R9 | |||
SUBQ $256,R9 | |||
ADDQ $256,SI | |||
ADDQ $256,DI | |||
CMPQ R9,$256 | |||
JAE BYTESATLEAST256 | |||
CMPQ R9,$0 | |||
JBE DONE | |||
BYTESBETWEEN1AND255: | |||
CMPQ R9,$64 | |||
JAE NOCOPY | |||
MOVQ DI,DX | |||
LEAQ 416(SP),DI | |||
MOVQ R9,CX | |||
REP; MOVSB | |||
LEAQ 416(SP),DI | |||
LEAQ 416(SP),SI | |||
NOCOPY: | |||
MOVQ R9,408(SP) | |||
MOVOA 48(SP),X0 | |||
MOVOA 0(SP),X1 | |||
MOVOA 16(SP),X2 | |||
MOVOA 32(SP),X3 | |||
MOVOA X1,X4 | |||
MOVQ $20,CX | |||
MAINLOOP2: | |||
PADDL X0,X4 | |||
MOVOA X0,X5 | |||
MOVOA X4,X6 | |||
PSLLL $7,X4 | |||
PSRLL $25,X6 | |||
PXOR X4,X3 | |||
PXOR X6,X3 | |||
PADDL X3,X5 | |||
MOVOA X3,X4 | |||
MOVOA X5,X6 | |||
PSLLL $9,X5 | |||
PSRLL $23,X6 | |||
PXOR X5,X2 | |||
PSHUFL $0X93,X3,X3 | |||
PXOR X6,X2 | |||
PADDL X2,X4 | |||
MOVOA X2,X5 | |||
MOVOA X4,X6 | |||
PSLLL $13,X4 | |||
PSRLL $19,X6 | |||
PXOR X4,X1 | |||
PSHUFL $0X4E,X2,X2 | |||
PXOR X6,X1 | |||
PADDL X1,X5 | |||
MOVOA X3,X4 | |||
MOVOA X5,X6 | |||
PSLLL $18,X5 | |||
PSRLL $14,X6 | |||
PXOR X5,X0 | |||
PSHUFL $0X39,X1,X1 | |||
PXOR X6,X0 | |||
PADDL X0,X4 | |||
MOVOA X0,X5 | |||
MOVOA X4,X6 | |||
PSLLL $7,X4 | |||
PSRLL $25,X6 | |||
PXOR X4,X1 | |||
PXOR X6,X1 | |||
PADDL X1,X5 | |||
MOVOA X1,X4 | |||
MOVOA X5,X6 | |||
PSLLL $9,X5 | |||
PSRLL $23,X6 | |||
PXOR X5,X2 | |||
PSHUFL $0X93,X1,X1 | |||
PXOR X6,X2 | |||
PADDL X2,X4 | |||
MOVOA X2,X5 | |||
MOVOA X4,X6 | |||
PSLLL $13,X4 | |||
PSRLL $19,X6 | |||
PXOR X4,X3 | |||
PSHUFL $0X4E,X2,X2 | |||
PXOR X6,X3 | |||
PADDL X3,X5 | |||
MOVOA X1,X4 | |||
MOVOA X5,X6 | |||
PSLLL $18,X5 | |||
PSRLL $14,X6 | |||
PXOR X5,X0 | |||
PSHUFL $0X39,X3,X3 | |||
PXOR X6,X0 | |||
PADDL X0,X4 | |||
MOVOA X0,X5 | |||
MOVOA X4,X6 | |||
PSLLL $7,X4 | |||
PSRLL $25,X6 | |||
PXOR X4,X3 | |||
PXOR X6,X3 | |||
PADDL X3,X5 | |||
MOVOA X3,X4 | |||
MOVOA X5,X6 | |||
PSLLL $9,X5 | |||
PSRLL $23,X6 | |||
PXOR X5,X2 | |||
PSHUFL $0X93,X3,X3 | |||
PXOR X6,X2 | |||
PADDL X2,X4 | |||
MOVOA X2,X5 | |||
MOVOA X4,X6 | |||
PSLLL $13,X4 | |||
PSRLL $19,X6 | |||
PXOR X4,X1 | |||
PSHUFL $0X4E,X2,X2 | |||
PXOR X6,X1 | |||
PADDL X1,X5 | |||
MOVOA X3,X4 | |||
MOVOA X5,X6 | |||
PSLLL $18,X5 | |||
PSRLL $14,X6 | |||
PXOR X5,X0 | |||
PSHUFL $0X39,X1,X1 | |||
PXOR X6,X0 | |||
PADDL X0,X4 | |||
MOVOA X0,X5 | |||
MOVOA X4,X6 | |||
PSLLL $7,X4 | |||
PSRLL $25,X6 | |||
PXOR X4,X1 | |||
PXOR X6,X1 | |||
PADDL X1,X5 | |||
MOVOA X1,X4 | |||
MOVOA X5,X6 | |||
PSLLL $9,X5 | |||
PSRLL $23,X6 | |||
PXOR X5,X2 | |||
PSHUFL $0X93,X1,X1 | |||
PXOR X6,X2 | |||
PADDL X2,X4 | |||
MOVOA X2,X5 | |||
MOVOA X4,X6 | |||
PSLLL $13,X4 | |||
PSRLL $19,X6 | |||
PXOR X4,X3 | |||
PSHUFL $0X4E,X2,X2 | |||
PXOR X6,X3 | |||
SUBQ $4,CX | |||
PADDL X3,X5 | |||
MOVOA X1,X4 | |||
MOVOA X5,X6 | |||
PSLLL $18,X5 | |||
PXOR X7,X7 | |||
PSRLL $14,X6 | |||
PXOR X5,X0 | |||
PSHUFL $0X39,X3,X3 | |||
PXOR X6,X0 | |||
JA MAINLOOP2 | |||
PADDL 48(SP),X0 | |||
PADDL 0(SP),X1 | |||
PADDL 16(SP),X2 | |||
PADDL 32(SP),X3 | |||
MOVD X0,CX | |||
MOVD X1,R8 | |||
MOVD X2,R9 | |||
MOVD X3,AX | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X2,X2 | |||
PSHUFL $0X39,X3,X3 | |||
XORL 0(SI),CX | |||
XORL 48(SI),R8 | |||
XORL 32(SI),R9 | |||
XORL 16(SI),AX | |||
MOVL CX,0(DI) | |||
MOVL R8,48(DI) | |||
MOVL R9,32(DI) | |||
MOVL AX,16(DI) | |||
MOVD X0,CX | |||
MOVD X1,R8 | |||
MOVD X2,R9 | |||
MOVD X3,AX | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X2,X2 | |||
PSHUFL $0X39,X3,X3 | |||
XORL 20(SI),CX | |||
XORL 4(SI),R8 | |||
XORL 52(SI),R9 | |||
XORL 36(SI),AX | |||
MOVL CX,20(DI) | |||
MOVL R8,4(DI) | |||
MOVL R9,52(DI) | |||
MOVL AX,36(DI) | |||
MOVD X0,CX | |||
MOVD X1,R8 | |||
MOVD X2,R9 | |||
MOVD X3,AX | |||
PSHUFL $0X39,X0,X0 | |||
PSHUFL $0X39,X1,X1 | |||
PSHUFL $0X39,X2,X2 | |||
PSHUFL $0X39,X3,X3 | |||
XORL 40(SI),CX | |||
XORL 24(SI),R8 | |||
XORL 8(SI),R9 | |||
XORL 56(SI),AX | |||
MOVL CX,40(DI) | |||
MOVL R8,24(DI) | |||
MOVL R9,8(DI) | |||
MOVL AX,56(DI) | |||
MOVD X0,CX | |||
MOVD X1,R8 | |||
MOVD X2,R9 | |||
MOVD X3,AX | |||
XORL 60(SI),CX | |||
XORL 44(SI),R8 | |||
XORL 28(SI),R9 | |||
XORL 12(SI),AX | |||
MOVL CX,60(DI) | |||
MOVL R8,44(DI) | |||
MOVL R9,28(DI) | |||
MOVL AX,12(DI) | |||
MOVQ 408(SP),R9 | |||
MOVL 16(SP),CX | |||
MOVL 36 (SP),R8 | |||
ADDQ $1,CX | |||
SHLQ $32,R8 | |||
ADDQ R8,CX | |||
MOVQ CX,R8 | |||
SHRQ $32,R8 | |||
MOVL CX,16(SP) | |||
MOVL R8, 36 (SP) | |||
CMPQ R9,$64 | |||
JA BYTESATLEAST65 | |||
JAE BYTESATLEAST64 | |||
MOVQ DI,SI | |||
MOVQ DX,DI | |||
MOVQ R9,CX | |||
REP; MOVSB | |||
BYTESATLEAST64: | |||
DONE: | |||
MOVQ 352(SP),R11 | |||
MOVQ 360(SP),R12 | |||
MOVQ 368(SP),R13 | |||
MOVQ 376(SP),R14 | |||
MOVQ 384(SP),R15 | |||
MOVQ 392(SP),BX | |||
MOVQ 400(SP),BP | |||
MOVQ R11,SP | |||
RET | |||
BYTESATLEAST65: | |||
SUBQ $64,R9 | |||
ADDQ $64,DI | |||
ADDQ $64,SI | |||
JMP BYTESBETWEEN1AND255 |
@ -0,0 +1,199 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package salsa | |||
// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts | |||
// the result into the 64-byte array out. The input and output may be the same array. | |||
func Core208(out *[64]byte, in *[64]byte) { | |||
j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 | |||
j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 | |||
j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 | |||
j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 | |||
j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24 | |||
j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24 | |||
j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24 | |||
j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24 | |||
j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24 | |||
j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24 | |||
j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24 | |||
j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24 | |||
j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24 | |||
j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24 | |||
j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24 | |||
j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24 | |||
x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 | |||
x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 | |||
for i := 0; i < 8; i += 2 { | |||
u := x0 + x12 | |||
x4 ^= u<<7 | u>>(32-7) | |||
u = x4 + x0 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x4 | |||
x12 ^= u<<13 | u>>(32-13) | |||
u = x12 + x8 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x1 | |||
x9 ^= u<<7 | u>>(32-7) | |||
u = x9 + x5 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x9 | |||
x1 ^= u<<13 | u>>(32-13) | |||
u = x1 + x13 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x6 | |||
x14 ^= u<<7 | u>>(32-7) | |||
u = x14 + x10 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x14 | |||
x6 ^= u<<13 | u>>(32-13) | |||
u = x6 + x2 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x11 | |||
x3 ^= u<<7 | u>>(32-7) | |||
u = x3 + x15 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x3 | |||
x11 ^= u<<13 | u>>(32-13) | |||
u = x11 + x7 | |||
x15 ^= u<<18 | u>>(32-18) | |||
u = x0 + x3 | |||
x1 ^= u<<7 | u>>(32-7) | |||
u = x1 + x0 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x1 | |||
x3 ^= u<<13 | u>>(32-13) | |||
u = x3 + x2 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x4 | |||
x6 ^= u<<7 | u>>(32-7) | |||
u = x6 + x5 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x6 | |||
x4 ^= u<<13 | u>>(32-13) | |||
u = x4 + x7 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x9 | |||
x11 ^= u<<7 | u>>(32-7) | |||
u = x11 + x10 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x11 | |||
x9 ^= u<<13 | u>>(32-13) | |||
u = x9 + x8 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x14 | |||
x12 ^= u<<7 | u>>(32-7) | |||
u = x12 + x15 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x12 | |||
x14 ^= u<<13 | u>>(32-13) | |||
u = x14 + x13 | |||
x15 ^= u<<18 | u>>(32-18) | |||
} | |||
x0 += j0 | |||
x1 += j1 | |||
x2 += j2 | |||
x3 += j3 | |||
x4 += j4 | |||
x5 += j5 | |||
x6 += j6 | |||
x7 += j7 | |||
x8 += j8 | |||
x9 += j9 | |||
x10 += j10 | |||
x11 += j11 | |||
x12 += j12 | |||
x13 += j13 | |||
x14 += j14 | |||
x15 += j15 | |||
out[0] = byte(x0) | |||
out[1] = byte(x0 >> 8) | |||
out[2] = byte(x0 >> 16) | |||
out[3] = byte(x0 >> 24) | |||
out[4] = byte(x1) | |||
out[5] = byte(x1 >> 8) | |||
out[6] = byte(x1 >> 16) | |||
out[7] = byte(x1 >> 24) | |||
out[8] = byte(x2) | |||
out[9] = byte(x2 >> 8) | |||
out[10] = byte(x2 >> 16) | |||
out[11] = byte(x2 >> 24) | |||
out[12] = byte(x3) | |||
out[13] = byte(x3 >> 8) | |||
out[14] = byte(x3 >> 16) | |||
out[15] = byte(x3 >> 24) | |||
out[16] = byte(x4) | |||
out[17] = byte(x4 >> 8) | |||
out[18] = byte(x4 >> 16) | |||
out[19] = byte(x4 >> 24) | |||
out[20] = byte(x5) | |||
out[21] = byte(x5 >> 8) | |||
out[22] = byte(x5 >> 16) | |||
out[23] = byte(x5 >> 24) | |||
out[24] = byte(x6) | |||
out[25] = byte(x6 >> 8) | |||
out[26] = byte(x6 >> 16) | |||
out[27] = byte(x6 >> 24) | |||
out[28] = byte(x7) | |||
out[29] = byte(x7 >> 8) | |||
out[30] = byte(x7 >> 16) | |||
out[31] = byte(x7 >> 24) | |||
out[32] = byte(x8) | |||
out[33] = byte(x8 >> 8) | |||
out[34] = byte(x8 >> 16) | |||
out[35] = byte(x8 >> 24) | |||
out[36] = byte(x9) | |||
out[37] = byte(x9 >> 8) | |||
out[38] = byte(x9 >> 16) | |||
out[39] = byte(x9 >> 24) | |||
out[40] = byte(x10) | |||
out[41] = byte(x10 >> 8) | |||
out[42] = byte(x10 >> 16) | |||
out[43] = byte(x10 >> 24) | |||
out[44] = byte(x11) | |||
out[45] = byte(x11 >> 8) | |||
out[46] = byte(x11 >> 16) | |||
out[47] = byte(x11 >> 24) | |||
out[48] = byte(x12) | |||
out[49] = byte(x12 >> 8) | |||
out[50] = byte(x12 >> 16) | |||
out[51] = byte(x12 >> 24) | |||
out[52] = byte(x13) | |||
out[53] = byte(x13 >> 8) | |||
out[54] = byte(x13 >> 16) | |||
out[55] = byte(x13 >> 24) | |||
out[56] = byte(x14) | |||
out[57] = byte(x14 >> 8) | |||
out[58] = byte(x14 >> 16) | |||
out[59] = byte(x14 >> 24) | |||
out[60] = byte(x15) | |||
out[61] = byte(x15 >> 8) | |||
out[62] = byte(x15 >> 16) | |||
out[63] = byte(x15 >> 24) | |||
} |
@ -0,0 +1,23 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build amd64,!appengine,!gccgo | |||
package salsa | |||
// This function is implemented in salsa2020_amd64.s. | |||
//go:noescape | |||
func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) | |||
// XORKeyStream crypts bytes from in to out using the given key and counters. | |||
// In and out may be the same slice but otherwise should not overlap. Counter | |||
// contains the raw salsa20 counter bytes (both nonce and block counter). | |||
func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { | |||
if len(in) == 0 { | |||
return | |||
} | |||
salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0]) | |||
} |
@ -0,0 +1,234 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
// +build !amd64 appengine gccgo | |||
package salsa | |||
const rounds = 20 | |||
// core applies the Salsa20 core function to 16-byte input in, 32-byte key k, | |||
// and 16-byte constant c, and puts the result into 64-byte array out. | |||
func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) { | |||
j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 | |||
j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 | |||
j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 | |||
j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 | |||
j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 | |||
j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 | |||
j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 | |||
j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 | |||
j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 | |||
j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 | |||
j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 | |||
j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 | |||
j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 | |||
j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 | |||
j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 | |||
j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 | |||
x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 | |||
x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 | |||
for i := 0; i < rounds; i += 2 { | |||
u := x0 + x12 | |||
x4 ^= u<<7 | u>>(32-7) | |||
u = x4 + x0 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x4 | |||
x12 ^= u<<13 | u>>(32-13) | |||
u = x12 + x8 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x1 | |||
x9 ^= u<<7 | u>>(32-7) | |||
u = x9 + x5 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x9 | |||
x1 ^= u<<13 | u>>(32-13) | |||
u = x1 + x13 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x6 | |||
x14 ^= u<<7 | u>>(32-7) | |||
u = x14 + x10 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x14 | |||
x6 ^= u<<13 | u>>(32-13) | |||
u = x6 + x2 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x11 | |||
x3 ^= u<<7 | u>>(32-7) | |||
u = x3 + x15 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x3 | |||
x11 ^= u<<13 | u>>(32-13) | |||
u = x11 + x7 | |||
x15 ^= u<<18 | u>>(32-18) | |||
u = x0 + x3 | |||
x1 ^= u<<7 | u>>(32-7) | |||
u = x1 + x0 | |||
x2 ^= u<<9 | u>>(32-9) | |||
u = x2 + x1 | |||
x3 ^= u<<13 | u>>(32-13) | |||
u = x3 + x2 | |||
x0 ^= u<<18 | u>>(32-18) | |||
u = x5 + x4 | |||
x6 ^= u<<7 | u>>(32-7) | |||
u = x6 + x5 | |||
x7 ^= u<<9 | u>>(32-9) | |||
u = x7 + x6 | |||
x4 ^= u<<13 | u>>(32-13) | |||
u = x4 + x7 | |||
x5 ^= u<<18 | u>>(32-18) | |||
u = x10 + x9 | |||
x11 ^= u<<7 | u>>(32-7) | |||
u = x11 + x10 | |||
x8 ^= u<<9 | u>>(32-9) | |||
u = x8 + x11 | |||
x9 ^= u<<13 | u>>(32-13) | |||
u = x9 + x8 | |||
x10 ^= u<<18 | u>>(32-18) | |||
u = x15 + x14 | |||
x12 ^= u<<7 | u>>(32-7) | |||
u = x12 + x15 | |||
x13 ^= u<<9 | u>>(32-9) | |||
u = x13 + x12 | |||
x14 ^= u<<13 | u>>(32-13) | |||
u = x14 + x13 | |||
x15 ^= u<<18 | u>>(32-18) | |||
} | |||
x0 += j0 | |||
x1 += j1 | |||
x2 += j2 | |||
x3 += j3 | |||
x4 += j4 | |||
x5 += j5 | |||
x6 += j6 | |||
x7 += j7 | |||
x8 += j8 | |||
x9 += j9 | |||
x10 += j10 | |||
x11 += j11 | |||
x12 += j12 | |||
x13 += j13 | |||
x14 += j14 | |||
x15 += j15 | |||
out[0] = byte(x0) | |||
out[1] = byte(x0 >> 8) | |||
out[2] = byte(x0 >> 16) | |||
out[3] = byte(x0 >> 24) | |||
out[4] = byte(x1) | |||
out[5] = byte(x1 >> 8) | |||
out[6] = byte(x1 >> 16) | |||
out[7] = byte(x1 >> 24) | |||
out[8] = byte(x2) | |||
out[9] = byte(x2 >> 8) | |||
out[10] = byte(x2 >> 16) | |||
out[11] = byte(x2 >> 24) | |||
out[12] = byte(x3) | |||
out[13] = byte(x3 >> 8) | |||
out[14] = byte(x3 >> 16) | |||
out[15] = byte(x3 >> 24) | |||
out[16] = byte(x4) | |||
out[17] = byte(x4 >> 8) | |||
out[18] = byte(x4 >> 16) | |||
out[19] = byte(x4 >> 24) | |||
out[20] = byte(x5) | |||
out[21] = byte(x5 >> 8) | |||
out[22] = byte(x5 >> 16) | |||
out[23] = byte(x5 >> 24) | |||
out[24] = byte(x6) | |||
out[25] = byte(x6 >> 8) | |||
out[26] = byte(x6 >> 16) | |||
out[27] = byte(x6 >> 24) | |||
out[28] = byte(x7) | |||
out[29] = byte(x7 >> 8) | |||
out[30] = byte(x7 >> 16) | |||
out[31] = byte(x7 >> 24) | |||
out[32] = byte(x8) | |||
out[33] = byte(x8 >> 8) | |||
out[34] = byte(x8 >> 16) | |||
out[35] = byte(x8 >> 24) | |||
out[36] = byte(x9) | |||
out[37] = byte(x9 >> 8) | |||
out[38] = byte(x9 >> 16) | |||
out[39] = byte(x9 >> 24) | |||
out[40] = byte(x10) | |||
out[41] = byte(x10 >> 8) | |||
out[42] = byte(x10 >> 16) | |||
out[43] = byte(x10 >> 24) | |||
out[44] = byte(x11) | |||
out[45] = byte(x11 >> 8) | |||
out[46] = byte(x11 >> 16) | |||
out[47] = byte(x11 >> 24) | |||
out[48] = byte(x12) | |||
out[49] = byte(x12 >> 8) | |||
out[50] = byte(x12 >> 16) | |||
out[51] = byte(x12 >> 24) | |||
out[52] = byte(x13) | |||
out[53] = byte(x13 >> 8) | |||
out[54] = byte(x13 >> 16) | |||
out[55] = byte(x13 >> 24) | |||
out[56] = byte(x14) | |||
out[57] = byte(x14 >> 8) | |||
out[58] = byte(x14 >> 16) | |||
out[59] = byte(x14 >> 24) | |||
out[60] = byte(x15) | |||
out[61] = byte(x15 >> 8) | |||
out[62] = byte(x15 >> 16) | |||
out[63] = byte(x15 >> 24) | |||
} | |||
// XORKeyStream crypts bytes from in to out using the given key and counters. | |||
// In and out may be the same slice but otherwise should not overlap. Counter | |||
// contains the raw salsa20 counter bytes (both nonce and block counter). | |||
func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { | |||
var block [64]byte | |||
var counterCopy [16]byte | |||
copy(counterCopy[:], counter[:]) | |||
for len(in) >= 64 { | |||
core(&block, &counterCopy, key, &Sigma) | |||
for i, x := range block { | |||
out[i] = in[i] ^ x | |||
} | |||
u := uint32(1) | |||
for i := 8; i < 16; i++ { | |||
u += uint32(counterCopy[i]) | |||
counterCopy[i] = byte(u) | |||
u >>= 8 | |||
} | |||
in = in[64:] | |||
out = out[64:] | |||
} | |||
if len(in) > 0 { | |||
core(&block, &counterCopy, key, &Sigma) | |||
for i, v := range in { | |||
out[i] = v ^ block[i] | |||
} | |||
} | |||
} |
@ -0,0 +1,35 @@ | |||
// Copyright 2012 The Go Authors. All rights reserved. | |||
// Use of this source code is governed by a BSD-style | |||
// license that can be found in the LICENSE file. | |||
package salsa | |||
import "testing" | |||
func TestCore208(t *testing.T) { | |||
in := [64]byte{ | |||
0x7e, 0x87, 0x9a, 0x21, 0x4f, 0x3e, 0xc9, 0x86, | |||
0x7c, 0xa9, 0x40, 0xe6, 0x41, 0x71, 0x8f, 0x26, | |||
0xba, 0xee, 0x55, 0x5b, 0x8c, 0x61, 0xc1, 0xb5, | |||
0x0d, 0xf8, 0x46, 0x11, 0x6d, 0xcd, 0x3b, 0x1d, | |||
0xee, 0x24, 0xf3, 0x19, 0xdf, 0x9b, 0x3d, 0x85, | |||
0x14, 0x12, 0x1e, 0x4b, 0x5a, 0xc5, 0xaa, 0x32, | |||
0x76, 0x02, 0x1d, 0x29, 0x09, 0xc7, 0x48, 0x29, | |||
0xed, 0xeb, 0xc6, 0x8d, 0xb8, 0xb8, 0xc2, 0x5e} | |||
out := [64]byte{ | |||
0xa4, 0x1f, 0x85, 0x9c, 0x66, 0x08, 0xcc, 0x99, | |||
0x3b, 0x81, 0xca, 0xcb, 0x02, 0x0c, 0xef, 0x05, | |||
0x04, 0x4b, 0x21, 0x81, 0xa2, 0xfd, 0x33, 0x7d, | |||
0xfd, 0x7b, 0x1c, 0x63, 0x96, 0x68, 0x2f, 0x29, | |||
0xb4, 0x39, 0x31, 0x68, 0xe3, 0xc9, 0xe6, 0xbc, | |||
0xfe, 0x6b, 0xc5, 0xb7, 0xa0, 0x6d, 0x96, 0xba, | |||
0xe4, 0x24, 0xcc, 0x10, 0x2c, 0x91, 0x74, 0x5c, | |||
0x24, 0xad, 0x67, 0x3d, 0xc7, 0x61, 0x8f, 0x81, | |||
} | |||
Core208(&in, &in) | |||
if in != out { | |||
t.Errorf("expected %x, got %x", out, in) | |||
} | |||
} |