From 0f41570c80858e0081392c03977e0f77c0d21299 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Sun, 11 Mar 2018 13:20:51 +0400 Subject: [PATCH] fixes from bucky's review --- p2p/pex/pex_reactor.go | 23 ++++++++++++++--------- rpc/lib/client/ws_client.go | 4 +--- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 73df67dca..aff1ab714 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -39,6 +39,8 @@ const ( defaultSeedDisconnectWaitPeriod = 2 * time.Minute // disconnect after this defaultCrawlPeerInterval = 2 * time.Minute // dont redial for this. TODO: back-off defaultCrawlPeersPeriod = 30 * time.Second // check some peers every this + + maxAttemptsToDial = 16 // ~ 35h in total (last attempt - 18h sleep) ) // PEXReactor handles PEX (peer exchange) and ensures that an @@ -61,7 +63,7 @@ type PEXReactor struct { requestsSent *cmn.CMap // ID->struct{}: unanswered send requests lastReceivedRequests *cmn.CMap // ID->time.Time: last time peer requested from us - attemptsToDial sync.Map // dial addr -> number of attempts to dial (for exponential backoff) + attemptsToDial sync.Map // dial address (string) -> number of attempts (int) to dial (for exponential backoff) } // PEXReactorConfig holds reactor specific configuration data. @@ -365,22 +367,25 @@ func (r *PEXReactor) ensurePeers() { } func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { - // 1s == (1e9 ns) == (1 Billion ns) - billionNs := float64(time.Second.Nanoseconds()) + attempts := r.AttemptsToDial(addr) + + if attempts > maxAttemptsToDial { + r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts) + r.book.MarkBad(addr) + return + } // exponential backoff if it's not our first attempt to dial given address - var attempts int - if lAttempts, attempted := r.attemptsToDial.Load(addr.DialString()); attempted { - attempts = lAttempts.(int) - jitterSeconds := time.Duration(rand.Float64() * billionNs) + if attempts != 0 { + jitterSeconds := time.Duration(rand.Float64() * float64(time.Second)) // 1s == (1e9 ns) backoffDuration := jitterSeconds + ((1 << uint(attempts)) * time.Second) - r.Logger.Debug(fmt.Sprintf("Dialing %v", addr), "attempts", attempts, "backoff_duration", backoffDuration) + r.Logger.Debug("Sleeping before dialing", "addr", addr, "dur", backoffDuration) time.Sleep(backoffDuration) } err := r.Switch.DialPeerWithAddress(addr, false) if err != nil { - r.Logger.Error("Dialing failed", "err", err) + r.Logger.Error("Dialing failed", "addr", addr, "err", err, "attempts", attempts) // TODO: detect more "bad peer" scenarios if _, ok := err.(p2p.ErrSwitchAuthenticationFailure); ok { r.book.MarkBad(addr) diff --git a/rpc/lib/client/ws_client.go b/rpc/lib/client/ws_client.go index ca75ad561..fe15cda21 100644 --- a/rpc/lib/client/ws_client.go +++ b/rpc/lib/client/ws_client.go @@ -254,10 +254,8 @@ func (c *WSClient) reconnect() error { c.mtx.Unlock() }() - // 1s == (1e9 ns) == (1 Billion ns) - billionNs := float64(time.Second.Nanoseconds()) for { - jitterSeconds := time.Duration(rand.Float64() * billionNs) + jitterSeconds := time.Duration(rand.Float64() * float64(time.Second)) // 1s == (1e9 ns) backoffDuration := jitterSeconds + ((1 << uint(attempt)) * time.Second) c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration)