From af77077f3c1802036fac21651b13bfd7b86b755d Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Thu, 25 Jul 2019 15:06:18 +0400 Subject: [PATCH] p2p: Fix error logging for connection stop (#3824) * p2p: fix false-positive error logging when stopping connections This changeset fixes two types of false-positive errors occurring during connection shutdown. The first occurs when the process invokes FlushStop() or Stop() on a connection. While the previous behavior did properly wait for the sendRoutine to finish, it did not notify the recvRoutine that the connection was shutting down. This would cause the recvRouting to receive and error when reading and log this error. The changeset fixes this by notifying the recvRoutine that the connection is shutting down. The second occurs when the connection is terminated (gracefully) by the other side. The recvRoutine would get an EOF error during the read, log it, and stop the connection with an error. The changeset detects EOF and gracefully shuts down the connection. * bring back the comment about flushing * add changelog entry * listen for quitRecvRoutine too * we have to call stopForError Otherwise peer won't be removed from the peer set and maybe readded later. --- CHANGELOG_PENDING.md | 1 + p2p/conn/connection.go | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index c57b6b82e..e05213fc7 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -29,4 +29,5 @@ program](https://hackerone.com/tendermint). ### BUG FIXES: +- [p2p] [\#3644](https://github.com/tendermint/tendermint/pull/3644) Fix error logging for connection stop (@defunctzombie) - [rpc] \#3813 Return err if page is incorrect (less than 0 or greater than total pages) diff --git a/p2p/conn/connection.go b/p2p/conn/connection.go index ee29fc85c..a206af542 100644 --- a/p2p/conn/connection.go +++ b/p2p/conn/connection.go @@ -90,6 +90,9 @@ type MConnection struct { quitSendRoutine chan struct{} doneSendRoutine chan struct{} + // Closing quitRecvRouting will cause the recvRouting to eventually quit. + quitRecvRoutine chan struct{} + // used to ensure FlushStop and OnStop // are safe to call concurrently. stopMtx sync.Mutex @@ -206,6 +209,7 @@ func (c *MConnection) OnStart() error { c.chStatsTimer = time.NewTicker(updateStats) c.quitSendRoutine = make(chan struct{}) c.doneSendRoutine = make(chan struct{}) + c.quitRecvRoutine = make(chan struct{}) go c.sendRoutine() go c.recvRoutine() return nil @@ -220,7 +224,14 @@ func (c *MConnection) stopServices() (alreadyStopped bool) { select { case <-c.quitSendRoutine: - // already quit via FlushStop or OnStop + // already quit + return true + default: + } + + select { + case <-c.quitRecvRoutine: + // already quit return true default: } @@ -230,6 +241,8 @@ func (c *MConnection) stopServices() (alreadyStopped bool) { c.pingTimer.Stop() c.chStatsTimer.Stop() + // inform the recvRouting that we are shutting down + close(c.quitRecvRoutine) close(c.quitSendRoutine) return false } @@ -250,8 +263,6 @@ func (c *MConnection) FlushStop() { <-c.doneSendRoutine // Send and flush all pending msgs. - // By now, IsRunning == false, - // so any concurrent attempts to send will fail. // Since sendRoutine has exited, we can call this // safely eof := c.sendSomePacketMsgs() @@ -550,9 +561,22 @@ FOR_LOOP: var err error _n, err = cdc.UnmarshalBinaryLengthPrefixedReader(c.bufConnReader, &packet, int64(c._maxPacketMsgSize)) c.recvMonitor.Update(int(_n)) + if err != nil { + // stopServices was invoked and we are shutting down + // receiving is excpected to fail since we will close the connection + select { + case <-c.quitRecvRoutine: + break FOR_LOOP + default: + } + if c.IsRunning() { - c.Logger.Error("Connection failed @ recvRoutine (reading byte)", "conn", c, "err", err) + if err == io.EOF { + c.Logger.Info("Connection is closed @ recvRoutine (likely by the other side)", "conn", c) + } else { + c.Logger.Error("Connection failed @ recvRoutine (reading byte)", "conn", c, "err", err) + } c.stopForError(err) } break FOR_LOOP