From 671c5c9b84644b4b83b00fbf23f8d3dd7ff5c199 Mon Sep 17 00:00:00 2001
From: Sean Braithwaite <brapse@gmail.com>
Date: Thu, 18 Apr 2019 17:31:36 +0200
Subject: [PATCH] crypto: Proof of Concept for iterative version of
 SimpleHashFromByteSlices (#2611) (#3530)

(#2611) had suggested that an iterative version of
SimpleHashFromByteSlice would be faster, presumably because
 we can envision some overhead accumulating from stack
frames and function calls. Additionally, a recursive algorithm risks
hitting the stack limit and causing a stack overflow should the tree
be too large.

Provided here is an iterative alternative, a simple test to assert
correctness and a benchmark. On the performance side, there appears to
be no overall difference:

```
BenchmarkSimpleHashAlternatives/recursive-4                20000 77677 ns/op
BenchmarkSimpleHashAlternatives/iterative-4                20000 76802 ns/op
```

On the surface it might seem that the additional overhead is due to
the different allocation patterns of the implementations. The recursive
version uses a single `[][]byte` slices which it then re-slices at each level of the tree.
The iterative version reproduces `[][]byte` once within the function and
then rewrites sub-slices of that array at each level of the tree.

Eexperimenting by modifying the code to simply calculate the
hash and not store the result show little to no difference in performance.

These preliminary results suggest:
1. The performance of the current implementation is pretty good
2. Go has low overhead for recursive functions
3. The performance of the SimpleHashFromByteSlice routine is dominated
by the actual hashing of data

Although this work is in no way exhaustive, point #3 suggests that
optimizations of this routine would need to take an alternative
approach to make significant improvements on the current performance.

Finally, considering that the recursive implementation is easier to
read, it might not be worthwhile to switch to a less intuitive
implementation for so little benefit.

* re-add slice re-writing
* [crypto] Document SimpleHashFromByteSlicesIterative
---
 crypto/merkle/simple_tree.go      | 71 +++++++++++++++++++++++++++++++
 crypto/merkle/simple_tree_test.go | 36 ++++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/crypto/merkle/simple_tree.go b/crypto/merkle/simple_tree.go
index 5de514b51..03dc9d9d1 100644
--- a/crypto/merkle/simple_tree.go
+++ b/crypto/merkle/simple_tree.go
@@ -20,6 +20,77 @@ func SimpleHashFromByteSlices(items [][]byte) []byte {
 	}
 }
 
+// SimpleHashFromByteSliceIterative is an iterative alternative to
+// SimpleHashFromByteSlice motivated by potential performance improvements.
+// (#2611) had suggested that an iterative version of
+// SimpleHashFromByteSlice would be faster, presumably because
+// we can envision some overhead accumulating from stack
+// frames and function calls. Additionally, a recursive algorithm risks
+// hitting the stack limit and causing a stack overflow should the tree
+// be too large.
+//
+// Provided here is an iterative alternative, a simple test to assert
+// correctness and a benchmark. On the performance side, there appears to
+// be no overall difference:
+//
+// BenchmarkSimpleHashAlternatives/recursive-4                20000 77677 ns/op
+// BenchmarkSimpleHashAlternatives/iterative-4                20000 76802 ns/op
+//
+// On the surface it might seem that the additional overhead is due to
+// the different allocation patterns of the implementations. The recursive
+// version uses a single [][]byte slices which it then re-slices at each level of the tree.
+// The iterative version reproduces [][]byte once within the function and
+// then rewrites sub-slices of that array at each level of the tree.
+//
+// Experimenting by modifying the code to simply calculate the
+// hash and not store the result show little to no difference in performance.
+//
+// These preliminary results suggest:
+//
+// 1. The performance of the SimpleHashFromByteSlice is pretty good
+// 2. Go has low overhead for recursive functions
+// 3. The performance of the SimpleHashFromByteSlice routine is dominated
+//    by the actual hashing of data
+//
+// Although this work is in no way exhaustive, point #3 suggests that
+// optimization of this routine would need to take an alternative
+// approach to make significant improvements on the current performance.
+//
+// Finally, considering that the recursive implementation is easier to
+// read, it might not be worthwhile to switch to a less intuitive
+// implementation for so little benefit.
+func SimpleHashFromByteSlicesIterative(input [][]byte) []byte {
+	items := make([][]byte, len(input))
+
+	for i, leaf := range input {
+		items[i] = leafHash(leaf)
+	}
+
+	size := len(items)
+	for {
+		switch size {
+		case 0:
+			return nil
+		case 1:
+			return items[0]
+		default:
+			rp := 0 // read position
+			wp := 0 // write position
+			for rp < size {
+				if rp+1 < size {
+					items[wp] = innerHash(items[rp], items[rp+1])
+					rp += 2
+				} else {
+					items[wp] = items[rp]
+					rp += 1
+				}
+				wp += 1
+			}
+			size = wp
+		}
+	}
+}
+
 // SimpleHashFromMap computes a Merkle tree from sorted map.
 // Like calling SimpleHashFromHashers with
 // `item = []byte(Hash(key) | Hash(value))`,
diff --git a/crypto/merkle/simple_tree_test.go b/crypto/merkle/simple_tree_test.go
index 9abe321c3..5bbe294af 100644
--- a/crypto/merkle/simple_tree_test.go
+++ b/crypto/merkle/simple_tree_test.go
@@ -70,6 +70,42 @@ func TestSimpleProof(t *testing.T) {
 	}
 }
 
+func TestSimpleHashAlternatives(t *testing.T) {
+
+	total := 100
+
+	items := make([][]byte, total)
+	for i := 0; i < total; i++ {
+		items[i] = testItem(cmn.RandBytes(tmhash.Size))
+	}
+
+	rootHash1 := SimpleHashFromByteSlicesIterative(items)
+	rootHash2 := SimpleHashFromByteSlices(items)
+	require.Equal(t, rootHash1, rootHash2, "Unmatched root hashes: %X vs %X", rootHash1, rootHash2)
+}
+
+func BenchmarkSimpleHashAlternatives(b *testing.B) {
+	total := 100
+
+	items := make([][]byte, total)
+	for i := 0; i < total; i++ {
+		items[i] = testItem(cmn.RandBytes(tmhash.Size))
+	}
+
+	b.ResetTimer()
+	b.Run("recursive", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = SimpleHashFromByteSlices(items)
+		}
+	})
+
+	b.Run("iterative", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = SimpleHashFromByteSlicesIterative(items)
+		}
+	})
+}
+
 func Test_getSplitPoint(t *testing.T) {
 	tests := []struct {
 		length int