From a7250af30303369721b37489aecf3677813426c0 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 23 Mar 2018 09:48:27 +0100 Subject: [PATCH] Exponential backoff follow up (#1349) * document new functionality [ci skip] Refs #1304 * add fixme [ci skip] Refs #1304 * ensure that we dial peer after backoff duration Refs #1304 --- docs/specification/new-spec/reactors/pex/pex.md | 9 ++++++++- p2p/pex/pex_reactor.go | 8 ++++++-- p2p/pex/pex_reactor_test.go | 9 +++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index 43d6f80d7..d3981bda2 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -57,10 +57,17 @@ a trust metric (see below), but it's best to start with something simple. ## Select Peers to Dial When we need more peers, we pick them randomly from the addrbook with some -configurable bias for unvetted peers. The bias should be lower when we have fewer peers, +configurable bias for unvetted peers. The bias should be lower when we have fewer peers and can increase as we obtain more, ensuring that our first peers are more trustworthy, but always giving us the chance to discover new good peers. +We track the last time we dialed a peer and the number of unsuccessful attempts +we've made. If too many attempts are made, we mark the peer as bad. + +Connection attempts are made with exponential backoff (plus jitter). Because +the selection process happens every `ensurePeersPeriod`, we might not end up +dialing a peer for much longer than the backoff duration. + ## Select Peers to Exchange When we’re asked for peers, we select them as follows: diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 2abf25930..45a05bdb7 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -409,11 +409,15 @@ func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { // TODO: detect more "bad peer" scenarios if _, ok := err.(p2p.ErrSwitchAuthenticationFailure); ok { r.book.MarkBad(addr) + r.attemptsToDial.Delete(addr.DialString()) } else { r.book.MarkAttempt(addr) + // FIXME: if the addr is going to be removed from the addrbook (hard to + // tell at this point), we need to Delete it from attemptsToDial, not + // record another attempt. + // record attempt + r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()}) } - // record attempt - r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()}) } else { // cleanup any history r.attemptsToDial.Delete(addr.DialString()) diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index 95cedfea5..20276ec86 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -308,6 +308,15 @@ func TestPEXReactorDialPeer(t *testing.T) { // must be skipped because it is too early assert.Equal(t, 1, pexR.AttemptsToDial(addr)) + + if !testing.Short() { + time.Sleep(3 * time.Second) + + // 3rd attempt + pexR.dialPeer(addr) + + assert.Equal(t, 2, pexR.AttemptsToDial(addr)) + } } type mockPeer struct {