From e4a03f249dcb7d2e833fc20add382b2a5e7f167e Mon Sep 17 00:00:00 2001 From: Greg Szabo <16846635+greg-szabo@users.noreply.github.com> Date: Mon, 1 Apr 2019 14:18:18 -0400 Subject: [PATCH 01/12] Release message changelog link fix (#3519) --- scripts/release_management/github-draft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_management/github-draft.py b/scripts/release_management/github-draft.py index 1fccd38b9..8a189d53e 100755 --- a/scripts/release_management/github-draft.py +++ b/scripts/release_management/github-draft.py @@ -34,7 +34,7 @@ def create_draft(org,repo,branch,version): 'tag_name': version, 'target_commitish': '{0}'.format(branch), 'name': '{0} (WARNING: ALPHA SOFTWARE)'.format(version), - 'body': 'https://github.com/{0}/{1}/blob/master/CHANGELOG.md#{2}'.format(org,repo,version.replace('v','').replace('.','')), + 'body': 'https://github.com/{0}/{1}/blob/{2}/CHANGELOG.md#{3}'.format(org,repo,branch,version.replace('.','')), 'draft': True, 'prerelease': False } From f965a4db15796dfca4e504d93eaa83760ced4af2 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 3 Apr 2019 11:22:52 +0200 Subject: [PATCH 02/12] p2p: seed mode refactoring (#3011) ListOfKnownAddresses is removed panic if addrbook size is less than zero CrawlPeers does not attempt to connect to existing or peers we're currently dialing various perf. fixes improved tests (though not complete) move IsDialingOrExistingAddress check into DialPeerWithAddress (Fixes #2716) * addrbook: preallocate memory when saving addrbook to file * addrbook: remove oldestFirst struct and check for ID * oldestFirst replaced with sort.Slice * ID is now mandatory, so no need to check * addrbook: remove ListOfKnownAddresses GetSelection is used instead in seed mode. * addrbook: panic if size is less than 0 * rewrite addrbook#saveToFile to not use a counter * test AttemptDisconnects func * move IsDialingOrExistingAddress check into DialPeerWithAddress * save and cleanup crawl peer data * get rid of DefaultSeedDisconnectWaitPeriod * make linter happy * fix TestPEXReactorSeedMode * fix comment * add a changelog entry * Apply suggestions from code review Co-Authored-By: melekes * rename ErrDialingOrExistingAddress to ErrCurrentlyDialingOrExistingAddress * lowercase errors * do not persist seed data pros: - no extra files - less IO cons: - if the node crashes, seed might crawl a peer too soon * fixes after Ethan's review * add a changelog entry * we should only consult Switch about peers checking addrbook size does not make sense since only PEX reactor uses it for dialing peers! https://github.com/tendermint/tendermint/pull/3011#discussion_r270948875 --- CHANGELOG_PENDING.md | 2 +- node/node.go | 6 ++ p2p/errors.go | 24 ++++-- p2p/pex/addrbook.go | 24 ++---- p2p/pex/file.go | 9 +- p2p/pex/known_address.go | 12 --- p2p/pex/pex_reactor.go | 163 +++++++++++++++++++----------------- p2p/pex/pex_reactor_test.go | 66 ++++++++------- p2p/switch.go | 30 ++++--- 9 files changed, 173 insertions(+), 163 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 045247937..21e52fe60 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -19,4 +19,4 @@ ### IMPROVEMENTS: ### BUG FIXES: - +- [p2p] \#2716 Check if we're already connected to peer right before dialing it (@melekes) diff --git a/node/node.go b/node/node.go index e91d36357..c0a4d736e 100644 --- a/node/node.go +++ b/node/node.go @@ -498,6 +498,12 @@ func NewNode(config *cfg.Config, &pex.PEXReactorConfig{ Seeds: splitAndTrimEmpty(config.P2P.Seeds, ",", " "), SeedMode: config.P2P.SeedMode, + // See consensus/reactor.go: blocksToContributeToBecomeGoodPeer 10000 + // blocks assuming 10s blocks ~ 28 hours. + // TODO (melekes): make it dynamic based on the actual block latencies + // from the live network. + // https://github.com/tendermint/tendermint/issues/3523 + SeedDisconnectWaitPeriod: 28 * time.Hour, }) pexReactor.SetLogger(logger.With("module", "pex")) sw.AddReactor("PEX", pexReactor) diff --git a/p2p/errors.go b/p2p/errors.go index 706150945..3650a7a0a 100644 --- a/p2p/errors.go +++ b/p2p/errors.go @@ -103,7 +103,7 @@ type ErrSwitchDuplicatePeerID struct { } func (e ErrSwitchDuplicatePeerID) Error() string { - return fmt.Sprintf("Duplicate peer ID %v", e.ID) + return fmt.Sprintf("duplicate peer ID %v", e.ID) } // ErrSwitchDuplicatePeerIP to be raised whena a peer is connecting with a known @@ -113,7 +113,7 @@ type ErrSwitchDuplicatePeerIP struct { } func (e ErrSwitchDuplicatePeerIP) Error() string { - return fmt.Sprintf("Duplicate peer IP %v", e.IP.String()) + return fmt.Sprintf("duplicate peer IP %v", e.IP.String()) } // ErrSwitchConnectToSelf to be raised when trying to connect to itself. @@ -122,7 +122,7 @@ type ErrSwitchConnectToSelf struct { } func (e ErrSwitchConnectToSelf) Error() string { - return fmt.Sprintf("Connect to self: %v", e.Addr) + return fmt.Sprintf("connect to self: %v", e.Addr) } type ErrSwitchAuthenticationFailure struct { @@ -132,7 +132,7 @@ type ErrSwitchAuthenticationFailure struct { func (e ErrSwitchAuthenticationFailure) Error() string { return fmt.Sprintf( - "Failed to authenticate peer. Dialed %v, but got peer with ID %s", + "failed to authenticate peer. Dialed %v, but got peer with ID %s", e.Dialed, e.Got, ) @@ -152,7 +152,7 @@ type ErrNetAddressNoID struct { } func (e ErrNetAddressNoID) Error() string { - return fmt.Sprintf("Address (%s) does not contain ID", e.Addr) + return fmt.Sprintf("address (%s) does not contain ID", e.Addr) } type ErrNetAddressInvalid struct { @@ -161,7 +161,7 @@ type ErrNetAddressInvalid struct { } func (e ErrNetAddressInvalid) Error() string { - return fmt.Sprintf("Invalid address (%s): %v", e.Addr, e.Err) + return fmt.Sprintf("invalid address (%s): %v", e.Addr, e.Err) } type ErrNetAddressLookup struct { @@ -170,5 +170,15 @@ type ErrNetAddressLookup struct { } func (e ErrNetAddressLookup) Error() string { - return fmt.Sprintf("Error looking up host (%s): %v", e.Addr, e.Err) + return fmt.Sprintf("error looking up host (%s): %v", e.Addr, e.Err) +} + +// ErrCurrentlyDialingOrExistingAddress indicates that we're currently +// dialing this address or it belongs to an existing peer. +type ErrCurrentlyDialingOrExistingAddress struct { + Addr string +} + +func (e ErrCurrentlyDialingOrExistingAddress) Error() string { + return fmt.Sprintf("connection with %s has been established or dialed", e.Addr) } diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 3cb91c380..6be03d75b 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -66,8 +66,7 @@ type AddrBook interface { // Send a selection of addresses with bias GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddress - // TODO: remove - ListOfKnownAddresses() []*knownAddress + Size() int // Persist to disk Save() @@ -254,7 +253,7 @@ func (a *addrBook) PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress { bookSize := a.size() if bookSize <= 0 { if bookSize < 0 { - a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + panic(fmt.Sprintf("Addrbook size %d (new: %d + old: %d) is less than 0", a.nNew+a.nOld, a.nNew, a.nOld)) } return nil } @@ -339,7 +338,7 @@ func (a *addrBook) GetSelection() []*p2p.NetAddress { bookSize := a.size() if bookSize <= 0 { if bookSize < 0 { - a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + panic(fmt.Sprintf("Addrbook size %d (new: %d + old: %d) is less than 0", a.nNew+a.nOld, a.nNew, a.nOld)) } return nil } @@ -389,7 +388,7 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre bookSize := a.size() if bookSize <= 0 { if bookSize < 0 { - a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + panic(fmt.Sprintf("Addrbook size %d (new: %d + old: %d) is less than 0", a.nNew+a.nOld, a.nNew, a.nOld)) } return nil } @@ -414,18 +413,6 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre return selection } -// ListOfKnownAddresses returns the new and old addresses. -func (a *addrBook) ListOfKnownAddresses() []*knownAddress { - a.mtx.Lock() - defer a.mtx.Unlock() - - addrs := []*knownAddress{} - for _, addr := range a.addrLookup { - addrs = append(addrs, addr.copy()) - } - return addrs -} - //------------------------------------------------ // Size returns the number of addresses in the book. @@ -473,8 +460,7 @@ func (a *addrBook) getBucket(bucketType byte, bucketIdx int) map[string]*knownAd case bucketTypeOld: return a.bucketsOld[bucketIdx] default: - cmn.PanicSanity("Should not happen") - return nil + panic("Invalid bucket type") } } diff --git a/p2p/pex/file.go b/p2p/pex/file.go index 33fec0336..d4a516850 100644 --- a/p2p/pex/file.go +++ b/p2p/pex/file.go @@ -16,16 +16,15 @@ type addrBookJSON struct { } func (a *addrBook) saveToFile(filePath string) { - a.Logger.Info("Saving AddrBook to file", "size", a.Size()) - a.mtx.Lock() defer a.mtx.Unlock() - // Compile Addrs - addrs := []*knownAddress{} + + a.Logger.Info("Saving AddrBook to file", "size", a.size()) + + addrs := make([]*knownAddress, 0, len(a.addrLookup)) for _, ka := range a.addrLookup { addrs = append(addrs, ka) } - aJSON := &addrBookJSON{ Key: a.key, Addrs: addrs, diff --git a/p2p/pex/known_address.go b/p2p/pex/known_address.go index 5673dec11..acde385bc 100644 --- a/p2p/pex/known_address.go +++ b/p2p/pex/known_address.go @@ -33,18 +33,6 @@ func (ka *knownAddress) ID() p2p.ID { return ka.Addr.ID } -func (ka *knownAddress) copy() *knownAddress { - return &knownAddress{ - Addr: ka.Addr, - Src: ka.Src, - Attempts: ka.Attempts, - LastAttempt: ka.LastAttempt, - LastSuccess: ka.LastSuccess, - BucketType: ka.BucketType, - Buckets: ka.Buckets, - } -} - func (ka *knownAddress) isOld() bool { return ka.BucketType == bucketTypeOld } diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 0ce116326..cf8cfe6f5 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -3,7 +3,6 @@ package pex import ( "fmt" "reflect" - "sort" "sync" "time" @@ -35,16 +34,11 @@ const ( // Seed/Crawler constants - // We want seeds to only advertise good peers. Therefore they should wait at - // least as long as we expect it to take for a peer to become good before - // disconnecting. - // see consensus/reactor.go: blocksToContributeToBecomeGoodPeer - // 10000 blocks assuming 1s blocks ~ 2.7 hours. - defaultSeedDisconnectWaitPeriod = 3 * time.Hour + // minTimeBetweenCrawls is a minimum time between attempts to crawl a peer. + minTimeBetweenCrawls = 2 * time.Minute - defaultCrawlPeerInterval = 2 * time.Minute // don't redial for this. TODO: back-off. what for? - - defaultCrawlPeersPeriod = 30 * time.Second // check some peers every this + // check some peers every this + crawlPeerPeriod = 30 * time.Second maxAttemptsToDial = 16 // ~ 35h in total (last attempt - 18h) @@ -77,6 +71,9 @@ type PEXReactor struct { seedAddrs []*p2p.NetAddress attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)} + + // seed/crawled mode fields + crawlPeerInfos map[p2p.ID]crawlPeerInfo } func (r *PEXReactor) minReceiveRequestInterval() time.Duration { @@ -90,6 +87,11 @@ type PEXReactorConfig struct { // Seed/Crawler mode SeedMode bool + // We want seeds to only advertise good peers. Therefore they should wait at + // least as long as we expect it to take for a peer to become good before + // disconnecting. + SeedDisconnectWaitPeriod time.Duration + // Seeds is a list of addresses reactor may use // if it can't connect to peers in the addrbook. Seeds []string @@ -108,6 +110,7 @@ func NewPEXReactor(b AddrBook, config *PEXReactorConfig) *PEXReactor { ensurePeersPeriod: defaultEnsurePeersPeriod, requestsSent: cmn.NewCMap(), lastReceivedRequests: cmn.NewCMap(), + crawlPeerInfos: make(map[p2p.ID]crawlPeerInfo), } r.BaseReactor = *p2p.NewBaseReactor("PEXReactor", r) return r @@ -363,9 +366,9 @@ func (r *PEXReactor) ensurePeersRoutine() { ) // Randomize first round of communication to avoid thundering herd. - // If no potential peers are present directly start connecting so we guarantee - // swift setup with the help of configured seeds. - if r.hasPotentialPeers() { + // If no peers are present directly start connecting so we guarantee swift + // setup with the help of configured seeds. + if r.nodeHasSomePeersOrDialingAny() { time.Sleep(time.Duration(jitter)) } @@ -493,23 +496,26 @@ func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { err := r.Switch.DialPeerWithAddress(addr, false) if err != nil { + if _, ok := err.(p2p.ErrCurrentlyDialingOrExistingAddress); ok { + return + } + r.Logger.Error("Dialing failed", "addr", addr, "err", err, "attempts", attempts) - // TODO: detect more "bad peer" scenarios + markAddrInBookBasedOnErr(addr, r.book, err) if _, ok := err.(p2p.ErrSwitchAuthenticationFailure); ok { - r.book.MarkBad(addr) r.attemptsToDial.Delete(addr.DialString()) } else { - r.book.MarkAttempt(addr) // FIXME: if the addr is going to be removed from the addrbook (hard to // tell at this point), we need to Delete it from attemptsToDial, not // record another attempt. // record attempt r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()}) } - } else { - // cleanup any history - r.attemptsToDial.Delete(addr.DialString()) + return } + + // cleanup any history + r.attemptsToDial.Delete(addr.DialString()) } // checkSeeds checks that addresses are well formed. @@ -568,101 +574,92 @@ func (r *PEXReactor) AttemptsToDial(addr *p2p.NetAddress) int { // from peers, except other seed nodes. func (r *PEXReactor) crawlPeersRoutine() { // Do an initial crawl - r.crawlPeers() + r.crawlPeers(r.book.GetSelection()) // Fire periodically - ticker := time.NewTicker(defaultCrawlPeersPeriod) + ticker := time.NewTicker(crawlPeerPeriod) for { select { case <-ticker.C: r.attemptDisconnects() - r.crawlPeers() + r.crawlPeers(r.book.GetSelection()) + r.cleanupCrawlPeerInfos() case <-r.Quit(): return } } } -// hasPotentialPeers indicates if there is a potential peer to connect to, by -// consulting the Switch as well as the AddrBook. -func (r *PEXReactor) hasPotentialPeers() bool { +// nodeHasSomePeersOrDialingAny returns true if the node is connected to some +// peers or dialing them currently. +func (r *PEXReactor) nodeHasSomePeersOrDialingAny() bool { out, in, dial := r.Switch.NumPeers() - - return out+in+dial > 0 && len(r.book.ListOfKnownAddresses()) > 0 + return out+in+dial > 0 } -// crawlPeerInfo handles temporary data needed for the -// network crawling performed during seed/crawler mode. +// crawlPeerInfo handles temporary data needed for the network crawling +// performed during seed/crawler mode. type crawlPeerInfo struct { - // The listening address of a potential peer we learned about - Addr *p2p.NetAddress - - // The last time we attempt to reach this address - LastAttempt time.Time - - // The last time we successfully reached this address - LastSuccess time.Time + Addr *p2p.NetAddress `json:"addr"` + // The last time we crawled the peer or attempted to do so. + LastCrawled time.Time `json:"last_crawled"` } -// oldestFirst implements sort.Interface for []crawlPeerInfo -// based on the LastAttempt field. -type oldestFirst []crawlPeerInfo - -func (of oldestFirst) Len() int { return len(of) } -func (of oldestFirst) Swap(i, j int) { of[i], of[j] = of[j], of[i] } -func (of oldestFirst) Less(i, j int) bool { return of[i].LastAttempt.Before(of[j].LastAttempt) } +// crawlPeers will crawl the network looking for new peer addresses. +func (r *PEXReactor) crawlPeers(addrs []*p2p.NetAddress) { + now := time.Now() -// getPeersToCrawl returns addresses of potential peers that we wish to validate. -// NOTE: The status information is ordered as described above. -func (r *PEXReactor) getPeersToCrawl() []crawlPeerInfo { - // TODO: be more selective - addrs := r.book.ListOfKnownAddresses() - of := make(oldestFirst, 0, len(addrs)) for _, addr := range addrs { - if len(addr.ID()) == 0 { - continue // dont use peers without id - } - - of = append(of, crawlPeerInfo{ - Addr: addr.Addr, - LastAttempt: addr.LastAttempt, - LastSuccess: addr.LastSuccess, - }) - } - sort.Sort(of) - return of -} - -// crawlPeers will crawl the network looking for new peer addresses. (once) -func (r *PEXReactor) crawlPeers() { - peerInfos := r.getPeersToCrawl() + peerInfo, ok := r.crawlPeerInfos[addr.ID] - now := time.Now() - // Use addresses we know of to reach additional peers - for _, pi := range peerInfos { - // Do not attempt to connect with peers we recently dialed - if now.Sub(pi.LastAttempt) < defaultCrawlPeerInterval { + // Do not attempt to connect with peers we recently crawled. + if ok && now.Sub(peerInfo.LastCrawled) < minTimeBetweenCrawls { continue } - // Otherwise, attempt to connect with the known address - err := r.Switch.DialPeerWithAddress(pi.Addr, false) + + // Record crawling attempt. + r.crawlPeerInfos[addr.ID] = crawlPeerInfo{ + Addr: addr, + LastCrawled: now, + } + + err := r.Switch.DialPeerWithAddress(addr, false) if err != nil { - r.book.MarkAttempt(pi.Addr) + if _, ok := err.(p2p.ErrCurrentlyDialingOrExistingAddress); ok { + continue + } + + r.Logger.Error("Dialing failed", "addr", addr, "err", err) + markAddrInBookBasedOnErr(addr, r.book, err) continue } - // Ask for more addresses - peer := r.Switch.Peers().Get(pi.Addr.ID) + + peer := r.Switch.Peers().Get(addr.ID) if peer != nil { r.RequestAddrs(peer) } } } +func (r *PEXReactor) cleanupCrawlPeerInfos() { + for id, info := range r.crawlPeerInfos { + // If we did not crawl a peer for 24 hours, it means the peer was removed + // from the addrbook => remove + // + // 10000 addresses / maxGetSelection = 40 cycles to get all addresses in + // the ideal case, + // 40 * crawlPeerPeriod ~ 20 minutes + if time.Since(info.LastCrawled) > 24*time.Hour { + delete(r.crawlPeerInfos, id) + } + } +} + // attemptDisconnects checks if we've been with each peer long enough to disconnect func (r *PEXReactor) attemptDisconnects() { for _, peer := range r.Switch.Peers().List() { - if peer.Status().Duration < defaultSeedDisconnectWaitPeriod { + if peer.Status().Duration < r.config.SeedDisconnectWaitPeriod { continue } if peer.IsPersistent() { @@ -672,6 +669,16 @@ func (r *PEXReactor) attemptDisconnects() { } } +func markAddrInBookBasedOnErr(addr *p2p.NetAddress, book AddrBook, err error) { + // TODO: detect more "bad peer" scenarios + switch err.(type) { + case p2p.ErrSwitchAuthenticationFailure: + book.MarkBad(addr) + default: + book.MarkAttempt(addr) + } +} + //----------------------------------------------------------------------------- // Messages diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index 4a6118c63..dda60daaa 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -204,26 +204,26 @@ func TestCheckSeeds(t *testing.T) { defer os.RemoveAll(dir) // nolint: errcheck // 1. test creating peer with no seeds works - peer := testCreateDefaultPeer(dir, 0) - require.Nil(t, peer.Start()) - peer.Stop() + peerSwitch := testCreateDefaultPeer(dir, 0) + require.Nil(t, peerSwitch.Start()) + peerSwitch.Stop() // 2. create seed seed := testCreateSeed(dir, 1, []*p2p.NetAddress{}, []*p2p.NetAddress{}) // 3. test create peer with online seed works - peer = testCreatePeerWithSeed(dir, 2, seed) - require.Nil(t, peer.Start()) - peer.Stop() + peerSwitch = testCreatePeerWithSeed(dir, 2, seed) + require.Nil(t, peerSwitch.Start()) + peerSwitch.Stop() // 4. test create peer with all seeds having unresolvable DNS fails badPeerConfig := &PEXReactorConfig{ Seeds: []string{"ed3dfd27bfc4af18f67a49862f04cc100696e84d@bad.network.addr:26657", "d824b13cb5d40fa1d8a614e089357c7eff31b670@anotherbad.network.addr:26657"}, } - peer = testCreatePeerWithConfig(dir, 2, badPeerConfig) - require.Error(t, peer.Start()) - peer.Stop() + peerSwitch = testCreatePeerWithConfig(dir, 2, badPeerConfig) + require.Error(t, peerSwitch.Start()) + peerSwitch.Stop() // 5. test create peer with one good seed address succeeds badPeerConfig = &PEXReactorConfig{ @@ -231,9 +231,9 @@ func TestCheckSeeds(t *testing.T) { "d824b13cb5d40fa1d8a614e089357c7eff31b670@anotherbad.network.addr:26657", seed.NetAddress().String()}, } - peer = testCreatePeerWithConfig(dir, 2, badPeerConfig) - require.Nil(t, peer.Start()) - peer.Stop() + peerSwitch = testCreatePeerWithConfig(dir, 2, badPeerConfig) + require.Nil(t, peerSwitch.Start()) + peerSwitch.Stop() } func TestPEXReactorUsesSeedsIfNeeded(t *testing.T) { @@ -285,31 +285,41 @@ func TestConnectionSpeedForPeerReceivedFromSeed(t *testing.T) { assertPeersWithTimeout(t, []*p2p.Switch{secondPeer}, 10*time.Millisecond, 1*time.Second, 2) } -func TestPEXReactorCrawlStatus(t *testing.T) { - pexR, book := createReactor(&PEXReactorConfig{SeedMode: true}) +func TestPEXReactorSeedMode(t *testing.T) { + // directory to store address books + dir, err := ioutil.TempDir("", "pex_reactor") + require.Nil(t, err) + defer os.RemoveAll(dir) // nolint: errcheck + + pexR, book := createReactor(&PEXReactorConfig{SeedMode: true, SeedDisconnectWaitPeriod: 10 * time.Millisecond}) defer teardownReactor(book) - // Seed/Crawler mode uses data from the Switch sw := createSwitchAndAddReactors(pexR) sw.SetAddrBook(book) + err = sw.Start() + require.NoError(t, err) + defer sw.Stop() - // Create a peer, add it to the peer set and the addrbook. - peer := p2p.CreateRandomPeer(false) - p2p.AddPeerToSwitch(pexR.Switch, peer) - addr1 := peer.SocketAddr() - pexR.book.AddAddress(addr1, addr1) + assert.Zero(t, sw.Peers().Size()) + + peerSwitch := testCreateDefaultPeer(dir, 1) + require.NoError(t, peerSwitch.Start()) + defer peerSwitch.Stop() - // Add a non-connected address to the book. - _, addr2 := p2p.CreateRoutableAddr() - pexR.book.AddAddress(addr2, addr1) + // 1. Test crawlPeers dials the peer + pexR.crawlPeers([]*p2p.NetAddress{peerSwitch.NetAddress()}) + assert.Equal(t, 1, sw.Peers().Size()) + assert.True(t, sw.Peers().Has(peerSwitch.NodeInfo().ID())) - // Get some peerInfos to crawl - peerInfos := pexR.getPeersToCrawl() + // 2. attemptDisconnects should not disconnect because of wait period + pexR.attemptDisconnects() + assert.Equal(t, 1, sw.Peers().Size()) - // Make sure it has the proper number of elements - assert.Equal(t, 2, len(peerInfos)) + time.Sleep(100 * time.Millisecond) - // TODO: test + // 3. attemptDisconnects should disconnect after wait period + pexR.attemptDisconnects() + assert.Equal(t, 0, sw.Peers().Size()) } // connect a peer to a seed, wait a bit, then stop it. diff --git a/p2p/switch.go b/p2p/switch.go index 76da9ad0c..5b1da1ead 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -339,14 +339,11 @@ func (sw *Switch) reconnectToPeer(addr *NetAddress) { return } - if sw.IsDialingOrExistingAddress(addr) { - sw.Logger.Debug("Peer connection has been established or dialed while we waiting next try", "addr", addr) - return - } - err := sw.DialPeerWithAddress(addr, true) if err == nil { return // success + } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { + return } sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) @@ -365,9 +362,12 @@ func (sw *Switch) reconnectToPeer(addr *NetAddress) { // sleep an exponentially increasing amount sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) + err := sw.DialPeerWithAddress(addr, true) if err == nil { return // success + } else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok { + return } sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) } @@ -435,15 +435,10 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b sw.randomSleep(0) - if sw.IsDialingOrExistingAddress(addr) { - sw.Logger.Debug("Ignore attempt to connect to an existing peer", "addr", addr) - return - } - err := sw.DialPeerWithAddress(addr, persistent) if err != nil { switch err.(type) { - case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID: + case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress: sw.Logger.Debug("Error dialing peer", "err", err) default: sw.Logger.Error("Error dialing peer", "err", err) @@ -454,11 +449,20 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b return nil } -// DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects and authenticates successfully. -// If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails. +// DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects +// and authenticates successfully. +// If `persistent == true`, the switch will always try to reconnect to this +// peer if the connection ever fails. +// If we're currently dialing this address or it belongs to an existing peer, +// ErrCurrentlyDialingOrExistingAddress is returned. func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) error { + if sw.IsDialingOrExistingAddress(addr) { + return ErrCurrentlyDialingOrExistingAddress{addr.String()} + } + sw.dialing.Set(string(addr.ID), addr) defer sw.dialing.Delete(string(addr.ID)) + return sw.addOutboundPeerWithConfig(addr, sw.config, persistent) } From 40da355234bda9d510dca370e94b646450673879 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 3 Apr 2019 14:56:51 +0200 Subject: [PATCH 03/12] docs: fix block.Header.Time description (#3529) It's not proposer local time anymore, but a weighted median Fixes #3514 --- CHANGELOG_PENDING.md | 1 + docs/spec/abci/abci.md | 6 ++++-- docs/spec/blockchain/blockchain.md | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 21e52fe60..22854a530 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -20,3 +20,4 @@ ### BUG FIXES: - [p2p] \#2716 Check if we're already connected to peer right before dialing it (@melekes) +- [docs] \#3514 Fix block.Header.Time description (@melekes) diff --git a/docs/spec/abci/abci.md b/docs/spec/abci/abci.md index c696c9384..c65d96ec1 100644 --- a/docs/spec/abci/abci.md +++ b/docs/spec/abci/abci.md @@ -347,8 +347,10 @@ Commit are included in the header of the next block. - `Version (Version)`: Version of the blockchain and the application - `ChainID (string)`: ID of the blockchain - `Height (int64)`: Height of the block in the chain - - `Time (google.protobuf.Timestamp)`: Time of the block. It is the proposer's - local time when block was created. + - `Time (google.protobuf.Timestamp)`: Time of the previous block. + For heights > 1, it's the weighted median of the timestamps of the valid + votes in the block.LastCommit. + For height == 1, it's genesis time. - `NumTxs (int32)`: Number of transactions in the block - `TotalTxs (int64)`: Total number of transactions in the blockchain until now diff --git a/docs/spec/blockchain/blockchain.md b/docs/spec/blockchain/blockchain.md index 9f88d6417..cd31c5dc1 100644 --- a/docs/spec/blockchain/blockchain.md +++ b/docs/spec/blockchain/blockchain.md @@ -244,7 +244,7 @@ The height is an incrementing integer. The first block has `block.Header.Height ### Time ``` -block.Header.Timestamp >= prevBlock.Header.Timestamp + 1 ms +block.Header.Timestamp >= prevBlock.Header.Timestamp + state.consensusParams.Block.TimeIotaMs block.Header.Timestamp == MedianTime(block.LastCommit, state.LastValidators) ``` From 9a415b057238d118c1edf23d673ebf9ce4f8d2ae Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Tue, 9 Apr 2019 18:21:35 +0200 Subject: [PATCH 04/12] docs: abci#Commit: better explain the possible deadlock (#3536) --- docs/spec/abci/apps.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/spec/abci/apps.md b/docs/spec/abci/apps.md index ca6abe7f8..47e62eed9 100644 --- a/docs/spec/abci/apps.md +++ b/docs/spec/abci/apps.md @@ -31,8 +31,13 @@ states to the latest committed state at once. When `Commit` completes, it unlocks the mempool. -Note that it is not possible to send transactions to Tendermint during `Commit` - if your app -tries to send a `/broadcast_tx` to Tendermint during Commit, it will deadlock. +WARNING: if the ABCI app logic processing the `Commit` message sends a +`/broadcast_tx_sync` or `/broadcast_tx_commit` and waits for the response +before proceeding, it will deadlock. Executing those `broadcast_tx` calls +involves acquiring a lock that is held during the `Commit` call, so it's not +possible. If you make the call to the `broadcast_tx` endpoints concurrently, +that's no problem, it just can't be part of the sequential logic of the +`Commit` function. ### Consensus Connection From bcec8be035a89b5d08ee00b8cc7115017a58c7e0 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Thu, 11 Apr 2019 15:32:16 +0200 Subject: [PATCH 05/12] p2p: do not log err if peer is private (#3474) * add actionable advice for ErrAddrBookNonRoutable err Should replace https://github.com/tendermint/tendermint/pull/3463 * reorder checks in addrbook#addAddress so ErrAddrBookPrivate is returned first and do not log error in DialPeersAsync if the address is private because it's not an error --- p2p/pex/addrbook.go | 20 ++++++++++---------- p2p/pex/errors.go | 8 ++++++++ p2p/switch.go | 17 ++++++++++++++++- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 6be03d75b..ca788aa66 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -586,16 +586,16 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { return ErrAddrBookNilAddr{addr, src} } - if a.routabilityStrict && !addr.Routable() { - return ErrAddrBookNonRoutable{addr} + if !addr.HasID() { + return ErrAddrBookInvalidAddrNoID{addr} } - if !addr.Valid() { - return ErrAddrBookInvalidAddr{addr} + if _, ok := a.privateIDs[addr.ID]; ok { + return ErrAddrBookPrivate{addr} } - if !addr.HasID() { - return ErrAddrBookInvalidAddrNoID{addr} + if _, ok := a.privateIDs[src.ID]; ok { + return ErrAddrBookPrivateSrc{src} } // TODO: we should track ourAddrs by ID and by IP:PORT and refuse both. @@ -603,12 +603,12 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { return ErrAddrBookSelf{addr} } - if _, ok := a.privateIDs[addr.ID]; ok { - return ErrAddrBookPrivate{addr} + if a.routabilityStrict && !addr.Routable() { + return ErrAddrBookNonRoutable{addr} } - if _, ok := a.privateIDs[src.ID]; ok { - return ErrAddrBookPrivateSrc{src} + if !addr.Valid() { + return ErrAddrBookInvalidAddr{addr} } ka := a.addrLookup[addr.ID] diff --git a/p2p/pex/errors.go b/p2p/pex/errors.go index 1f44ceee7..543056af5 100644 --- a/p2p/pex/errors.go +++ b/p2p/pex/errors.go @@ -30,6 +30,10 @@ func (err ErrAddrBookPrivate) Error() string { return fmt.Sprintf("Cannot add private peer with address %v", err.Addr) } +func (err ErrAddrBookPrivate) PrivateAddr() bool { + return true +} + type ErrAddrBookPrivateSrc struct { Src *p2p.NetAddress } @@ -38,6 +42,10 @@ func (err ErrAddrBookPrivateSrc) Error() string { return fmt.Sprintf("Cannot add peer coming from private peer with address %v", err.Src) } +func (err ErrAddrBookPrivateSrc) PrivateAddr() bool { + return true +} + type ErrAddrBookNilAddr struct { Addr *p2p.NetAddress Src *p2p.NetAddress diff --git a/p2p/switch.go b/p2p/switch.go index 5b1da1ead..04caabdc7 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/pkg/errors" + "github.com/tendermint/tendermint/config" cmn "github.com/tendermint/tendermint/libs/common" "github.com/tendermint/tendermint/p2p/conn" @@ -390,6 +392,15 @@ func (sw *Switch) MarkPeerAsGood(peer Peer) { //--------------------------------------------------------------------- // Dialing +type privateAddr interface { + PrivateAddr() bool +} + +func isPrivateAddr(err error) bool { + te, ok := errors.Cause(err).(privateAddr) + return ok && te.PrivateAddr() +} + // DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent). // Used to dial peers from config on startup or from unsafe-RPC (trusted sources). // TODO: remove addrBook arg since it's now set on the switch @@ -412,7 +423,11 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b // do not add our address or ID if !netAddr.Same(ourAddr) { if err := addrBook.AddAddress(netAddr, ourAddr); err != nil { - sw.Logger.Error("Can't add peer's address to addrbook", "err", err) + if isPrivateAddr(err) { + sw.Logger.Debug("Won't add peer's address to addrbook", "err", err) + } else { + sw.Logger.Error("Can't add peer's address to addrbook", "err", err) + } } } } From c3df21fe827aad0447bb59a9546c32db8406a5fd Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Thu, 11 Apr 2019 18:59:14 +0300 Subject: [PATCH 06/12] add missing changelog entry (#3544) * add missing changelog entry --- CHANGELOG_PENDING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 22854a530..91475e3a9 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -17,6 +17,7 @@ ### FEATURES: ### IMPROVEMENTS: +- [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer ### BUG FIXES: - [p2p] \#2716 Check if we're already connected to peer right before dialing it (@melekes) From 18d2c45c334d9f22621ea0af0686e2e54937fb40 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 12 Apr 2019 10:46:07 +0200 Subject: [PATCH 07/12] rpc: Fix response time grow over time (#3537) * rpc: store validator info periodly * increase ValidatorSetStoreInterval also - unexpose it - add a comment - refactor code - add a benchmark, which shows that 100000 results in ~ 100ms to get 100 validators * make the change non-breaking * expand comment * rename valSetStoreInterval to valSetCheckpointInterval * change the panic msg * add a test and changelog entry * update changelog entry * update changelog entry * add a link to PR * fix test * Update CHANGELOG_PENDING.md Co-Authored-By: melekes * update comment * use MaxInt64 func --- CHANGELOG_PENDING.md | 9 ++++++ state/store.go | 53 +++++++++++++++++++++++++---------- state/store_test.go | 67 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 15 deletions(-) create mode 100644 state/store_test.go diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 91475e3a9..33f5f27ce 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -20,5 +20,14 @@ - [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer ### BUG FIXES: + +- [state] [\#3438](https://github.com/tendermint/tendermint/pull/3438) + Persist validators every 100000 blocks even if no changes to the set + occurred (@guagualvcha). This + 1) Prevents possible DoS attack using `/validators` or `/status` RPC + endpoints. Before response time was growing linearly with height if no + changes were made to the validator set. + 2) Fixes performance degradation in `ExecCommitBlock` where we call + `LoadValidators` for each `Evidence` in the block. - [p2p] \#2716 Check if we're already connected to peer right before dialing it (@melekes) - [docs] \#3514 Fix block.Header.Time description (@melekes) diff --git a/state/store.go b/state/store.go index 6b01a8295..73116b43f 100644 --- a/state/store.go +++ b/state/store.go @@ -9,6 +9,14 @@ import ( "github.com/tendermint/tendermint/types" ) +const ( + // persist validators every valSetCheckpointInterval blocks to avoid + // LoadValidators taking too much time. + // https://github.com/tendermint/tendermint/pull/3438 + // 100000 results in ~ 100ms to get 100 validators (see BenchmarkLoadValidators) + valSetCheckpointInterval = 100000 +) + //------------------------------------------------------------------------ func calcValidatorsKey(height int64) []byte { @@ -182,25 +190,38 @@ func LoadValidators(db dbm.DB, height int64) (*types.ValidatorSet, error) { if valInfo == nil { return nil, ErrNoValSetForHeight{height} } - if valInfo.ValidatorSet == nil { - valInfo2 := loadValidatorsInfo(db, valInfo.LastHeightChanged) + lastStoredHeight := lastStoredHeightFor(height, valInfo.LastHeightChanged) + valInfo2 := loadValidatorsInfo(db, lastStoredHeight) if valInfo2 == nil { - panic( - fmt.Sprintf( - "Couldn't find validators at height %d as last changed from height %d", - valInfo.LastHeightChanged, - height, - ), - ) + // TODO (melekes): remove the below if condition in the 0.33 major + // release and just panic. Old chains might panic otherwise if they + // haven't saved validators at intermediate (%valSetCheckpointInterval) + // height yet. + // https://github.com/tendermint/tendermint/issues/3543 + valInfo2 = loadValidatorsInfo(db, valInfo.LastHeightChanged) + lastStoredHeight = valInfo.LastHeightChanged + if valInfo2 == nil { + panic( + fmt.Sprintf("Couldn't find validators at height %d (height %d was originally requested)", + lastStoredHeight, + height, + ), + ) + } } - valInfo2.ValidatorSet.IncrementProposerPriority(int(height - valInfo.LastHeightChanged)) // mutate + valInfo2.ValidatorSet.IncrementProposerPriority(int(height - lastStoredHeight)) // mutate valInfo = valInfo2 } return valInfo.ValidatorSet, nil } +func lastStoredHeightFor(height, lastHeightChanged int64) int64 { + checkpointHeight := height - height%valSetCheckpointInterval + return cmn.MaxInt64(checkpointHeight, lastHeightChanged) +} + // CONTRACT: Returned ValidatorsInfo can be mutated. func loadValidatorsInfo(db dbm.DB, height int64) *ValidatorsInfo { buf := db.Get(calcValidatorsKey(height)) @@ -221,10 +242,10 @@ func loadValidatorsInfo(db dbm.DB, height int64) *ValidatorsInfo { } // saveValidatorsInfo persists the validator set. -// `height` is the effective height for which the validator is responsible for signing. -// It should be called from s.Save(), right before the state itself is persisted. -// If the validator set did not change after processing the latest block, -// only the last height for which the validators changed is persisted. +// +// `height` is the effective height for which the validator is responsible for +// signing. It should be called from s.Save(), right before the state itself is +// persisted. func saveValidatorsInfo(db dbm.DB, height, lastHeightChanged int64, valSet *types.ValidatorSet) { if lastHeightChanged > height { panic("LastHeightChanged cannot be greater than ValidatorsInfo height") @@ -232,7 +253,9 @@ func saveValidatorsInfo(db dbm.DB, height, lastHeightChanged int64, valSet *type valInfo := &ValidatorsInfo{ LastHeightChanged: lastHeightChanged, } - if lastHeightChanged == height { + // Only persist validator set if it was updated or checkpoint height (see + // valSetCheckpointInterval) is reached. + if height == lastHeightChanged || height%valSetCheckpointInterval == 0 { valInfo.ValidatorSet = valSet } db.Set(calcValidatorsKey(height), valInfo.Bytes()) diff --git a/state/store_test.go b/state/store_test.go new file mode 100644 index 000000000..dd48cae71 --- /dev/null +++ b/state/store_test.go @@ -0,0 +1,67 @@ +package state + +import ( + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" + + cfg "github.com/tendermint/tendermint/config" + dbm "github.com/tendermint/tendermint/libs/db" + "github.com/tendermint/tendermint/types" +) + +func TestSaveValidatorsInfo(t *testing.T) { + // test we persist validators every valSetCheckpointInterval blocks + stateDB := dbm.NewMemDB() + val, _ := types.RandValidator(true, 10) + vals := types.NewValidatorSet([]*types.Validator{val}) + + // TODO(melekes): remove in 0.33 release + // https://github.com/tendermint/tendermint/issues/3543 + saveValidatorsInfo(stateDB, 1, 1, vals) + saveValidatorsInfo(stateDB, 2, 1, vals) + assert.NotPanics(t, func() { + _, err := LoadValidators(stateDB, 2) + if err != nil { + panic(err) + } + }) + //ENDREMOVE + + saveValidatorsInfo(stateDB, valSetCheckpointInterval, 1, vals) + + loadedVals, err := LoadValidators(stateDB, valSetCheckpointInterval) + assert.NoError(t, err) + assert.NotZero(t, loadedVals.Size()) +} + +func BenchmarkLoadValidators(b *testing.B) { + const valSetSize = 100 + + config := cfg.ResetTestRoot("state_") + defer os.RemoveAll(config.RootDir) + dbType := dbm.DBBackendType(config.DBBackend) + stateDB := dbm.NewDB("state", dbType, config.DBDir()) + state, err := LoadStateFromDBOrGenesisFile(stateDB, config.GenesisFile()) + if err != nil { + b.Fatal(err) + } + state.Validators = genValSet(valSetSize) + state.NextValidators = state.Validators.CopyIncrementProposerPriority(1) + SaveState(stateDB, state) + + for i := 10; i < 10000000000; i *= 10 { // 10, 100, 1000, ... + saveValidatorsInfo(stateDB, int64(i), state.LastHeightValidatorsChanged, state.NextValidators) + + b.Run(fmt.Sprintf("height=%d", i), func(b *testing.B) { + for n := 0; n < b.N; n++ { + _, err := LoadValidators(stateDB, int64(i)) + if err != nil { + b.Fatal(err) + } + } + }) + } +} From b5b3b85697a9c29adaab0e1ab5d1380459e2c5eb Mon Sep 17 00:00:00 2001 From: Alexander Simmerl Date: Fri, 12 Apr 2019 12:31:02 +0200 Subject: [PATCH 08/12] Bring back NodeInfo NetAddress form the dead (#3545) A prior change to address accidental DNS lookups introduced the SocketAddr on peer, which was then used to add it to the addressbook. Which in turn swallowed the self reported port of the peer, which is important on a reconnect. This change revives the NetAddress on NodeInfo which the Peer carries, but now returns an error to avoid nil dereferencing another issue observed in the past. Additionally we could potentially address #3532, yet the original problem statemenf of that issue stands. As a drive-by optimisation `MarkAsGood` now takes only a `p2p.ID` which makes it interface a bit stricter and leaner. --- p2p/node_info.go | 20 +++++++------------- p2p/pex/addrbook.go | 6 +++--- p2p/pex/addrbook_test.go | 8 ++++---- p2p/pex/pex_reactor.go | 15 ++++++++++++--- p2p/pex/pex_reactor_test.go | 2 +- p2p/switch.go | 4 ++-- p2p/switch_test.go | 2 +- p2p/test_util.go | 2 +- 8 files changed, 31 insertions(+), 28 deletions(-) diff --git a/p2p/node_info.go b/p2p/node_info.go index e80f1e1b7..8195471ed 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -24,9 +24,14 @@ func MaxNodeInfoSize() int { // and determines if we're compatible. type NodeInfo interface { ID() ID + nodeInfoAddress nodeInfoTransport } +type nodeInfoAddress interface { + NetAddress() (*NetAddress, error) +} + // nodeInfoTransport validates a nodeInfo and checks // our compatibility with it. It's for use in the handshake. type nodeInfoTransport interface { @@ -209,20 +214,9 @@ OUTER_LOOP: // it includes the authenticated peer ID and the self-reported // ListenAddr. Note that the ListenAddr is not authenticated and // may not match that address actually dialed if its an outbound peer. -func (info DefaultNodeInfo) NetAddress() *NetAddress { +func (info DefaultNodeInfo) NetAddress() (*NetAddress, error) { idAddr := IDAddressString(info.ID(), info.ListenAddr) - netAddr, err := NewNetAddressString(idAddr) - if err != nil { - switch err.(type) { - case ErrNetAddressLookup: - // XXX If the peer provided a host name and the lookup fails here - // we're out of luck. - // TODO: use a NetAddress in DefaultNodeInfo - default: - panic(err) // everything should be well formed by now - } - } - return netAddr + return NewNetAddressString(idAddr) } //----------------------------------------------------------- diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index ca788aa66..85dd05248 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -55,7 +55,7 @@ type AddrBook interface { PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress // Mark address - MarkGood(*p2p.NetAddress) + MarkGood(p2p.ID) MarkAttempt(*p2p.NetAddress) MarkBad(*p2p.NetAddress) @@ -296,11 +296,11 @@ func (a *addrBook) PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress { // MarkGood implements AddrBook - it marks the peer as good and // moves it into an "old" bucket. -func (a *addrBook) MarkGood(addr *p2p.NetAddress) { +func (a *addrBook) MarkGood(id p2p.ID) { a.mtx.Lock() defer a.mtx.Unlock() - ka := a.addrLookup[addr.ID] + ka := a.addrLookup[id] if ka == nil { return } diff --git a/p2p/pex/addrbook_test.go b/p2p/pex/addrbook_test.go index fdcb0c8ad..13bac28c8 100644 --- a/p2p/pex/addrbook_test.go +++ b/p2p/pex/addrbook_test.go @@ -41,7 +41,7 @@ func TestAddrBookPickAddress(t *testing.T) { assert.NotNil(t, addr, "expected an address") // pick an address when we only have old address - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) addr = book.PickAddress(0) assert.NotNil(t, addr, "expected an address") addr = book.PickAddress(50) @@ -126,7 +126,7 @@ func TestAddrBookPromoteToOld(t *testing.T) { // Promote half of them for i, addrSrc := range randAddrs { if i%2 == 0 { - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) } } @@ -330,7 +330,7 @@ func TestAddrBookGetSelectionWithBias(t *testing.T) { randAddrsLen := len(randAddrs) for i, addrSrc := range randAddrs { if int((float64(i)/float64(randAddrsLen))*100) >= 20 { - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) } } @@ -569,7 +569,7 @@ func createAddrBookWithMOldAndNNewAddrs(t *testing.T, nOld, nNew int) (book *add randAddrs := randNetAddressPairs(t, nOld) for _, addr := range randAddrs { book.AddAddress(addr.addr, addr.src) - book.MarkGood(addr.addr) + book.MarkGood(addr.addr.ID) } randAddrs = randNetAddressPairs(t, nNew) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index cf8cfe6f5..c24ee983c 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -170,12 +170,18 @@ func (r *PEXReactor) AddPeer(p Peer) { } } else { // inbound peer is its own source - addr := p.SocketAddr() + addr, err := p.NodeInfo().NetAddress() + if err != nil { + r.Logger.Error("Failed to get peer NetAddress", "err", err, "peer", p) + return + } + + // Make it explicit that addr and src are the same for an inbound peer. src := addr // add to book. dont RequestAddrs right away because // we don't trust inbound as much - let ensurePeersRoutine handle it. - err := r.book.AddAddress(addr, src) + err = r.book.AddAddress(addr, src) r.logErrAddrBook(err) } } @@ -312,7 +318,10 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { } r.requestsSent.Delete(id) - srcAddr := src.SocketAddr() + srcAddr, err := src.NodeInfo().NetAddress() + if err != nil { + return err + } for _, netAddr := range addrs { // Validate netAddr. Disconnect from a peer if it sends us invalid data. if netAddr == nil { diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index dda60daaa..077f07a60 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -539,7 +539,7 @@ func testCreateSeed(dir string, id int, knownAddrs, srcAddrs []*p2p.NetAddress) book.SetLogger(log.TestingLogger()) for j := 0; j < len(knownAddrs); j++ { book.AddAddress(knownAddrs[j], srcAddrs[j]) - book.MarkGood(knownAddrs[j]) + book.MarkGood(knownAddrs[j].ID) } sw.SetAddrBook(book) diff --git a/p2p/switch.go b/p2p/switch.go index 04caabdc7..afd7d9656 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -48,7 +48,7 @@ type AddrBook interface { AddAddress(addr *NetAddress, src *NetAddress) error AddOurAddress(*NetAddress) OurAddress(*NetAddress) bool - MarkGood(*NetAddress) + MarkGood(ID) RemoveAddress(*NetAddress) HasAddress(*NetAddress) bool Save() @@ -385,7 +385,7 @@ func (sw *Switch) SetAddrBook(addrBook AddrBook) { // like contributed to consensus. func (sw *Switch) MarkPeerAsGood(peer Peer) { if sw.addrBook != nil { - sw.addrBook.MarkGood(peer.SocketAddr()) + sw.addrBook.MarkGood(peer.ID()) } } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index ab8ae9e9f..bf105e0fa 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -626,7 +626,7 @@ func (book *addrBookMock) OurAddress(addr *NetAddress) bool { _, ok := book.ourAddrs[addr.String()] return ok } -func (book *addrBookMock) MarkGood(*NetAddress) {} +func (book *addrBookMock) MarkGood(ID) {} func (book *addrBookMock) HasAddress(addr *NetAddress) bool { _, ok := book.addrs[addr.String()] return ok diff --git a/p2p/test_util.go b/p2p/test_util.go index df60539ba..f8020924c 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -23,7 +23,7 @@ type mockNodeInfo struct { } func (ni mockNodeInfo) ID() ID { return ni.addr.ID } -func (ni mockNodeInfo) NetAddress() *NetAddress { return ni.addr } +func (ni mockNodeInfo) NetAddress() (*NetAddress, error) { return ni.addr, nil } func (ni mockNodeInfo) Validate() error { return nil } func (ni mockNodeInfo) CompatibleWith(other NodeInfo) error { return nil } From 4e4224213f3eaa89abeac6686efe4ac970ad63d0 Mon Sep 17 00:00:00 2001 From: Sean Braithwaite Date: Fri, 12 Apr 2019 12:32:00 +0200 Subject: [PATCH 09/12] adr: Peer Behaviour (#3539) * [adr] ADR 037: Peer Behaviour inital draft * Update docs/architecture/adr-037-peer-behaviour.md Co-Authored-By: brapse * Update docs/architecture/adr-037-peer-behaviour.md Co-Authored-By: brapse * [docs] adr-037 Better footnote styling * [ADR] ADR-037 adjust Footnotes for github markdown * [ADR] ADR-037 fix numbered list --- docs/architecture/adr-037-peer-behaviour.md | 145 ++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 docs/architecture/adr-037-peer-behaviour.md diff --git a/docs/architecture/adr-037-peer-behaviour.md b/docs/architecture/adr-037-peer-behaviour.md new file mode 100644 index 000000000..36b024482 --- /dev/null +++ b/docs/architecture/adr-037-peer-behaviour.md @@ -0,0 +1,145 @@ +# ADR 037: Peer Behaviour Interface + +## Changelog +* 07-03-2019: Initial draft + +## Context + +The responsibility for signaling and acting upon peer behaviour lacks a single +owning component and is heavily coupled with the network stack[1](#references). Reactors +maintain a reference to the `p2p.Switch` which they use to call +`switch.StopPeerForError(...)` when a peer misbehaves and +`switch.MarkAsGood(...)` when a peer contributes in some meaningful way. +While the switch handles `StopPeerForError` internally, the `MarkAsGood` +method delegates to another component, `p2p.AddrBook`. This scheme of delegation +across Switch obscures the responsibility for handling peer behaviour +and ties up the reactors in a larger dependency graph when testing. + +## Decision + +Introduce a `PeerBehaviour` interface and concrete implementations which +provide methods for reactors to signal peer behaviour without direct +coupling `p2p.Switch`. Introduce a ErrPeer to provide +concrete reasons for stopping peers. + +### Implementation Changes + +PeerBehaviour then becomes an interface for signaling peer errors as well +as for marking peers as `good`. + +XXX: It might be better to pass p2p.ID instead of the whole peer but as +a first draft maintain the underlying implementation as much as +possible. + +```go +type PeerBehaviour interface { + Errored(peer Peer, reason ErrPeer) + MarkPeerAsGood(peer Peer) +} +``` + +Instead of signaling peers to stop with arbitrary reasons: +`reason interface{}` + +We introduce a concrete error type ErrPeer: +```go +type ErrPeer int + +const ( + ErrPeerUnknown = iota + ErrPeerBadMessage + ErrPeerMessageOutofOrder + ... +) +``` + +As a first iteration we provide a concrete implementation which wraps +the switch: +```go +type SwitchedPeerBehaviour struct { + sw *Switch +} + +func (spb *SwitchedPeerBehaviour) Errored(peer Peer, reason ErrPeer) { + spb.sw.StopPeerForError(peer, reason) +} + +func (spb *SwitchedPeerBehaviour) MarkPeerAsGood(peer Peer) { + spb.sw.MarkPeerAsGood(peer) +} + +func NewSwitchedPeerBehaviour(sw *Switch) *SwitchedPeerBehaviour { + return &SwitchedPeerBehaviour{ + sw: sw, + } +} +``` + +Reactors, which are often difficult to unit test[2](#references). could use an implementation which exposes the signals produced by the reactor in +manufactured scenarios: + +```go +type PeerErrors map[Peer][]ErrPeer +type GoodPeers map[Peer]bool + +type StorePeerBehaviour struct { + pe PeerErrors + gp GoodPeers +} + +func NewStorePeerBehaviour() *StorePeerBehaviour{ + return &StorePeerBehaviour{ + pe: make(PeerErrors), + gp: GoodPeers{}, + } +} + +func (spb StorePeerBehaviour) Errored(peer Peer, reason ErrPeer) { + if _, ok := spb.pe[peer]; !ok { + spb.pe[peer] = []ErrPeer{reason} + } else { + spb.pe[peer] = append(spb.pe[peer], reason) + } +} + +func (mpb *StorePeerBehaviour) GetPeerErrors() PeerErrors { + return mpb.pe +} + +func (spb *StorePeerBehaviour) MarkPeerAsGood(peer Peer) { + if _, ok := spb.gp[peer]; !ok { + spb.gp[peer] = true + } +} + +func (spb *StorePeerBehaviour) GetGoodPeers() GoodPeers { + return spb.gp +} +``` + +## Status + +Proposed + +## Consequences + +### Positive + + * De-couple signaling from acting upon peer behaviour. + * Reduce the coupling of reactors and the Switch and the network + stack + * The responsibility of managing peer behaviour can be migrated to + a single component instead of split between the switch and the + address book. + +### Negative + + * The first iteration will simply wrap the Switch and introduce a + level of indirection. + +### Neutral + +## References + +1. Issue [#2067](https://github.com/tendermint/tendermint/issues/2067): P2P Refactor +2. PR: [#3506](https://github.com/tendermint/tendermint/pull/3506): ADR 036: Blockchain Reactor Refactor From a453628c4e43c67c747d5078704479aa73716182 Mon Sep 17 00:00:00 2001 From: Martin Dyring-Andersen Date: Fri, 12 Apr 2019 13:25:14 +0200 Subject: [PATCH 10/12] Fix a couple of typos (#3547) Fix some typos in p2p/transport.go --- p2p/transport.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/p2p/transport.go b/p2p/transport.go index 6717db483..ebf77c9f4 100644 --- a/p2p/transport.go +++ b/p2p/transport.go @@ -364,7 +364,7 @@ func (mt *MultiplexTransport) upgrade( if err != nil { return nil, nil, ErrRejected{ conn: c, - err: fmt.Errorf("secrect conn failed: %v", err), + err: fmt.Errorf("secret conn failed: %v", err), isAuthFailure: true, } } @@ -377,7 +377,7 @@ func (mt *MultiplexTransport) upgrade( conn: c, id: connID, err: fmt.Errorf( - "conn.ID (%v) dialed ID (%v) missmatch", + "conn.ID (%v) dialed ID (%v) mismatch", connID, dialedID, ), @@ -409,7 +409,7 @@ func (mt *MultiplexTransport) upgrade( conn: c, id: connID, err: fmt.Errorf( - "conn.ID (%v) NodeInfo.ID (%v) missmatch", + "conn.ID (%v) NodeInfo.ID (%v) mismatch", connID, nodeInfo.ID(), ), From b6da8880c22202a7061ae2093ef01819438a136e Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Fri, 12 Apr 2019 14:24:51 +0200 Subject: [PATCH 11/12] prepare v0.31.4 release: - prep changelog - add missing changelog entries - fix minor glitch in existing changelog (v0.31.2) - bump versions --- CHANGELOG.md | 39 ++++++++++++++++++++++++++++++++++++++- CHANGELOG_PENDING.md | 14 +------------- version/version.go | 2 +- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52a926aed..d14f6851c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +## v0.31.4 + +*April 12th, 2019* + +This release fixes a regression from v0.31.3 which used the peer's `SocketAddr` to add the peer to +the address book. This swallowed the peer's self-reported port which is important in case of reconnect. +It brings back `NetAddress()` to `NodeInfo` and uses it instead of `SocketAddr` for adding peers. +Additionally, it improves response time on the `/validators` or `/status` RPC endpoints. +As a side-effect it makes these RPC endpoint more difficult to DoS and fixes a performance degradation in `ExecCommitBlock`. + +Special thanks to external contributors on this release: +@brapse, @guagualvcha, @mydring + +### BREAKING CHANGES: + +* Go API + - [p2p] [\#3545](https://github.com/tendermint/tendermint/pull/3545) The `AddrBook` interface method `MarkAsGood` now only takes a `p2p.ID` instead of a `p2p.NetAddress` + - [p2p] [\#3011](https://github.com/tendermint/tendermint/pull/3011) Remove `ListOfKnownAddresses` from the `AddrBook` interface + +### IMPROVEMENTS: +- [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer +- [p2p] [\#3547](https://github.com/tendermint/tendermint/pull/3547) Fix a couple of annoying typos (@mdyring) + +### BUG FIXES: + +- [docs] [\#3514](https://github.com/tendermint/tendermint/issues/3514) Fix block.Header.Time description (@melekes) +- [p2p] [\#2716](https://github.com/tendermint/tendermint/issues/2716) Check if we're already connected to peer right before dialing it (@melekes) +- [p2p] [\#3545](https://github.com/tendermint/tendermint/issues/3545) Add back `NetAddress()` to `NodeInfo` and use it instead of peer's `SocketAddr()` when adding a peer to the `PEXReactor` (potential fix for [\#3532](https://github.com/tendermint/tendermint/issues/3532)) +- [state] [\#3438](https://github.com/tendermint/tendermint/pull/3438) + Persist validators every 100000 blocks even if no changes to the set + occurred (@guagualvcha). This + 1) Prevents possible DoS attack using `/validators` or `/status` RPC + endpoints. Before response time was growing linearly with height if no + changes were made to the validator set. + 2) Fixes performance degradation in `ExecCommitBlock` where we call + `LoadValidators` for each `Evidence` in the block. + ## v0.31.3 *April 1st, 2019* @@ -35,7 +72,7 @@ Special thanks to external contributors on this release: * Apps * Go API -- [libs/autofile] [\#3504](https://github.com/tendermint/tendermint/issues/3504) Remove unused code in autofile package. Deleted functions: `Group.Search`, `Group.FindLast`, `GroupReader.ReadLine`, `GroupReader.PushLine`, `MakeSimpleSearchFunc` (@guagualvcha) + - [libs/autofile] [\#3504](https://github.com/tendermint/tendermint/issues/3504) Remove unused code in autofile package. Deleted functions: `Group.Search`, `Group.FindLast`, `GroupReader.ReadLine`, `GroupReader.PushLine`, `MakeSimpleSearchFunc` (@guagualvcha) * Blockchain Protocol diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 33f5f27ce..bcb2af539 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,4 +1,4 @@ -## v0.31.2 +## v0.31.5 ** @@ -17,17 +17,5 @@ ### FEATURES: ### IMPROVEMENTS: -- [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer ### BUG FIXES: - -- [state] [\#3438](https://github.com/tendermint/tendermint/pull/3438) - Persist validators every 100000 blocks even if no changes to the set - occurred (@guagualvcha). This - 1) Prevents possible DoS attack using `/validators` or `/status` RPC - endpoints. Before response time was growing linearly with height if no - changes were made to the validator set. - 2) Fixes performance degradation in `ExecCommitBlock` where we call - `LoadValidators` for each `Evidence` in the block. -- [p2p] \#2716 Check if we're already connected to peer right before dialing it (@melekes) -- [docs] \#3514 Fix block.Header.Time description (@melekes) diff --git a/version/version.go b/version/version.go index a42a8f005..9ba38de6c 100644 --- a/version/version.go +++ b/version/version.go @@ -20,7 +20,7 @@ const ( // Must be a string because scripts like dist.sh read this file. // XXX: Don't change the name of this variable or you will break // automation :) - TMCoreSemVer = "0.31.3" + TMCoreSemVer = "0.31.4" // ABCISemVer is the semantic version of the ABCI library ABCISemVer = "0.16.0" From def5c8cf124ff58cc9c4a61ea017808a0c81c722 Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Fri, 12 Apr 2019 16:48:34 +0200 Subject: [PATCH 12/12] address review comments: (#3550) - mention ADR in release summary - remove [p2p] api changes - amend v0.31.3 log to contain note about breaking change --- CHANGELOG.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d14f6851c..057b2e7be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,16 +9,12 @@ the address book. This swallowed the peer's self-reported port which is importan It brings back `NetAddress()` to `NodeInfo` and uses it instead of `SocketAddr` for adding peers. Additionally, it improves response time on the `/validators` or `/status` RPC endpoints. As a side-effect it makes these RPC endpoint more difficult to DoS and fixes a performance degradation in `ExecCommitBlock`. +Also, it contains an [ADR](https://github.com/tendermint/tendermint/pull/3539) that proposes decoupling the +responsibility for peer behaviour from the `p2p.Switch` (by @brapse). Special thanks to external contributors on this release: @brapse, @guagualvcha, @mydring -### BREAKING CHANGES: - -* Go API - - [p2p] [\#3545](https://github.com/tendermint/tendermint/pull/3545) The `AddrBook` interface method `MarkAsGood` now only takes a `p2p.ID` instead of a `p2p.NetAddress` - - [p2p] [\#3011](https://github.com/tendermint/tendermint/pull/3011) Remove `ListOfKnownAddresses` from the `AddrBook` interface - ### IMPROVEMENTS: - [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer - [p2p] [\#3547](https://github.com/tendermint/tendermint/pull/3547) Fix a couple of annoying typos (@mdyring) @@ -45,6 +41,12 @@ This release includes two security sensitive fixes: it ensures generated private keys are valid, and it prevents certain DNS lookups that would cause the node to panic if the lookup failed. +### BREAKING CHANGES: +* Go API + - [crypto/secp256k1] [\#3439](https://github.com/tendermint/tendermint/issues/3439) + The `secp256k1.GenPrivKeySecp256k1` function has changed to guarantee that it returns a valid key, which means it + will return a different private key than in previous versions for the same secret. + ### BUG FIXES: - [crypto/secp256k1] [\#3439](https://github.com/tendermint/tendermint/issues/3439)