Browse Source

Merge pull request #775 from tendermint/rpc-client-jitter

rpc/lib/client: add jitter for exponential backoff of WSClient
pull/779/merge
Ethan Buchman 7 years ago
committed by GitHub
parent
commit
376f47e030
3 changed files with 76 additions and 4 deletions
  1. +3
    -0
      Makefile
  2. +66
    -0
      rpc/lib/client/integration_test.go
  3. +7
    -4
      rpc/lib/client/ws_client.go

+ 3
- 0
Makefile View File

@ -35,6 +35,9 @@ test_race:
test_integrations:
@bash ./test/test.sh
release:
@go test -tags release $(PACKAGES)
test100:
@for i in {1..100}; do make test; done


+ 66
- 0
rpc/lib/client/integration_test.go View File

@ -0,0 +1,66 @@
// +build release
// The code in here is comprehensive as an integration
// test and is long, hence is only run before releases.
package rpcclient
import (
"bytes"
"errors"
"net"
"regexp"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/tendermint/tmlibs/log"
)
func TestWSClientReconnectWithJitter(t *testing.T) {
n := 8
maxReconnectAttempts := 3
// Max wait time is ceil(1+0.999) + ceil(2+0.999) + ceil(4+0.999) + ceil(...) = 2 + 3 + 5 = 10s + ...
maxSleepTime := time.Second * time.Duration(((1<<uint(maxReconnectAttempts))-1)+maxReconnectAttempts)
var errNotConnected = errors.New("not connected")
clientMap := make(map[int]*WSClient)
buf := new(bytes.Buffer)
logger := log.NewTMLogger(buf)
for i := 0; i < n; i++ {
c := NewWSClient("tcp://foo", "/websocket")
c.Dialer = func(string, string) (net.Conn, error) {
return nil, errNotConnected
}
c.SetLogger(logger)
c.maxReconnectAttempts = maxReconnectAttempts
// Not invoking defer c.Stop() because
// after all the reconnect attempts have been
// exhausted, c.Stop is implicitly invoked.
clientMap[i] = c
// Trigger the reconnect routine that performs exponential backoff.
go c.reconnect()
}
stopCount := 0
time.Sleep(maxSleepTime)
for key, c := range clientMap {
if !c.IsActive() {
delete(clientMap, key)
stopCount += 1
}
}
require.Equal(t, stopCount, n, "expecting all clients to have been stopped")
// Next we have to examine the logs to ensure that no single time was repeated
backoffDurRegexp := regexp.MustCompile(`backoff_duration=(.+)`)
matches := backoffDurRegexp.FindAll(buf.Bytes(), -1)
seenMap := make(map[string]int)
for i, match := range matches {
if origIndex, seen := seenMap[string(match)]; seen {
t.Errorf("Match #%d (%q) was seen originally at log entry #%d", i, match, origIndex)
} else {
seenMap[string(match)] = i
}
}
}

+ 7
- 4
rpc/lib/client/ws_client.go View File

@ -4,7 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"math/rand"
"net"
"net/http"
"sync"
@ -251,11 +251,14 @@ func (c *WSClient) reconnect() error {
c.mtx.Unlock()
}()
// 1s == (1e9 ns) == (1 Billion ns)
billionNs := float64(time.Second.Nanoseconds())
for {
c.Logger.Info("reconnecting", "attempt", attempt+1)
jitterSeconds := time.Duration(rand.Float64() * billionNs)
backoffDuration := jitterSeconds + ((1 << uint(attempt)) * time.Second)
d := time.Duration(math.Exp2(float64(attempt)))
time.Sleep(d * time.Second)
c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration)
time.Sleep(backoffDuration)
err := c.dial()
if err != nil {


Loading…
Cancel
Save