Browse Source

p2p: Fix error logging for connection stop (#3824)

* p2p: fix false-positive error logging when stopping connections

This changeset fixes two types of false-positive errors occurring during
connection shutdown.

The first occurs when the process invokes FlushStop() or Stop() on a
connection. While the previous behavior did properly wait for the sendRoutine
to finish, it did not notify the recvRoutine that the connection was shutting
down. This would cause the recvRouting to receive and error when reading and
log this error. The changeset fixes this by notifying the recvRoutine that
the connection is shutting down.

The second occurs when the connection is terminated (gracefully) by the other side.
The recvRoutine would get an EOF error during the read, log it, and stop the connection
with an error. The changeset detects EOF and gracefully shuts down the connection.

* bring back the comment about flushing

* add changelog entry

* listen for quitRecvRoutine too

* we have to call stopForError

Otherwise peer won't be removed from the peer set and maybe readded
later.
pull/3926/head
Anton Kaliaev 5 years ago
committed by Jack Zampolin
parent
commit
55066ceaad
2 changed files with 29 additions and 4 deletions
  1. +1
    -0
      CHANGELOG_PENDING.md
  2. +28
    -4
      p2p/conn/connection.go

+ 1
- 0
CHANGELOG_PENDING.md View File

@ -29,4 +29,5 @@ program](https://hackerone.com/tendermint).
### BUG FIXES:
- [p2p] [\#3644](https://github.com/tendermint/tendermint/pull/3644) Fix error logging for connection stop (@defunctzombie)
- [rpc] \#3813 Return err if page is incorrect (less than 0 or greater than total pages)

+ 28
- 4
p2p/conn/connection.go View File

@ -90,6 +90,9 @@ type MConnection struct {
quitSendRoutine chan struct{}
doneSendRoutine chan struct{}
// Closing quitRecvRouting will cause the recvRouting to eventually quit.
quitRecvRoutine chan struct{}
// used to ensure FlushStop and OnStop
// are safe to call concurrently.
stopMtx sync.Mutex
@ -206,6 +209,7 @@ func (c *MConnection) OnStart() error {
c.chStatsTimer = time.NewTicker(updateStats)
c.quitSendRoutine = make(chan struct{})
c.doneSendRoutine = make(chan struct{})
c.quitRecvRoutine = make(chan struct{})
go c.sendRoutine()
go c.recvRoutine()
return nil
@ -220,7 +224,14 @@ func (c *MConnection) stopServices() (alreadyStopped bool) {
select {
case <-c.quitSendRoutine:
// already quit via FlushStop or OnStop
// already quit
return true
default:
}
select {
case <-c.quitRecvRoutine:
// already quit
return true
default:
}
@ -230,6 +241,8 @@ func (c *MConnection) stopServices() (alreadyStopped bool) {
c.pingTimer.Stop()
c.chStatsTimer.Stop()
// inform the recvRouting that we are shutting down
close(c.quitRecvRoutine)
close(c.quitSendRoutine)
return false
}
@ -250,8 +263,6 @@ func (c *MConnection) FlushStop() {
<-c.doneSendRoutine
// Send and flush all pending msgs.
// By now, IsRunning == false,
// so any concurrent attempts to send will fail.
// Since sendRoutine has exited, we can call this
// safely
eof := c.sendSomePacketMsgs()
@ -550,9 +561,22 @@ FOR_LOOP:
var err error
_n, err = cdc.UnmarshalBinaryLengthPrefixedReader(c.bufConnReader, &packet, int64(c._maxPacketMsgSize))
c.recvMonitor.Update(int(_n))
if err != nil {
// stopServices was invoked and we are shutting down
// receiving is excpected to fail since we will close the connection
select {
case <-c.quitRecvRoutine:
break FOR_LOOP
default:
}
if c.IsRunning() {
c.Logger.Error("Connection failed @ recvRoutine (reading byte)", "conn", c, "err", err)
if err == io.EOF {
c.Logger.Info("Connection is closed @ recvRoutine (likely by the other side)", "conn", c)
} else {
c.Logger.Error("Connection failed @ recvRoutine (reading byte)", "conn", c, "err", err)
}
c.stopForError(err)
}
break FOR_LOOP


Loading…
Cancel
Save