Browse Source

Filter WAL close errors at rethrow.

pull/8111/head
M. J. Fromberger 3 years ago
parent
commit
8ee41c5a2a
2 changed files with 22 additions and 9 deletions
  1. +17
    -4
      internal/consensus/state.go
  2. +5
    -5
      internal/libs/autofile/autofile.go

+ 17
- 4
internal/consensus/state.go View File

@ -20,6 +20,7 @@ import (
cstypes "github.com/tendermint/tendermint/internal/consensus/types"
"github.com/tendermint/tendermint/internal/eventbus"
"github.com/tendermint/tendermint/internal/jsontypes"
"github.com/tendermint/tendermint/internal/libs/autofile"
sm "github.com/tendermint/tendermint/internal/state"
tmevents "github.com/tendermint/tendermint/libs/events"
"github.com/tendermint/tendermint/libs/log"
@ -877,6 +878,18 @@ func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) {
onExit(cs)
// Re-panic to ensure the node terminates.
//
// TODO(creachadair): In ordinary operation, the WAL autofile should
// never be closed. This only happens during shutdown and production
// nodes usually halt by panicking. Many existing tests, however,
// assume a clean shutdown is possible. Prior to #8111, we were
// swallowing the panic in receiveRoutine, making that appear to
// work. Filtering this specific error is slightly risky, but should
// affect only unit tests. In any case, not re-panicking here only
// preserves the pre-existing behaviour for this one error type.
if err, ok := r.(error); ok && errors.Is(err, autofile.ErrAutoFileClosed) {
return
}
panic(r)
}
}()
@ -906,8 +919,8 @@ func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) {
case mi := <-cs.internalMsgQueue:
err := cs.wal.WriteSync(mi) // NOTE: fsync
if err != nil {
panic(fmt.Sprintf(
"failed to write %v msg to consensus WAL due to %v; check your file system and restart the node",
panic(fmt.Errorf(
"failed to write %v msg to consensus WAL due to %w; check your file system and restart the node",
mi, err,
))
}
@ -1900,8 +1913,8 @@ func (cs *State) finalizeCommit(ctx context.Context, height int64) {
// restart).
endMsg := EndHeightMessage{height}
if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync
panic(fmt.Sprintf(
"failed to write %v msg to consensus WAL due to %v; check your file system and restart the node",
panic(fmt.Errorf(
"failed to write %v msg to consensus WAL due to %w; check your file system and restart the node",
endMsg, err,
))
}


+ 5
- 5
internal/libs/autofile/autofile.go View File

@ -41,9 +41,9 @@ const (
autoFilePerms = os.FileMode(0600)
)
// errAutoFileClosed is reported when operations attempt to use an autofile
// ErrAutoFileClosed is reported when operations attempt to use an autofile
// after it has been closed.
var errAutoFileClosed = errors.New("autofile is closed")
var ErrAutoFileClosed = errors.New("autofile is closed")
// AutoFile automatically closes and re-opens file for writing. The file is
// automatically setup to close itself every 1s and upon receiving SIGHUP.
@ -155,7 +155,7 @@ func (af *AutoFile) Write(b []byte) (n int, err error) {
af.mtx.Lock()
defer af.mtx.Unlock()
if af.closed {
return 0, fmt.Errorf("write: %w", errAutoFileClosed)
return 0, fmt.Errorf("write: %w", ErrAutoFileClosed)
}
if af.file == nil {
@ -174,7 +174,7 @@ func (af *AutoFile) Write(b []byte) (n int, err error) {
func (af *AutoFile) Sync() error {
return af.withLock(func() error {
if af.closed {
return fmt.Errorf("sync: %w", errAutoFileClosed)
return fmt.Errorf("sync: %w", ErrAutoFileClosed)
} else if af.file == nil {
return nil // nothing to sync
}
@ -207,7 +207,7 @@ func (af *AutoFile) Size() (int64, error) {
af.mtx.Lock()
defer af.mtx.Unlock()
if af.closed {
return 0, fmt.Errorf("size: %w", errAutoFileClosed)
return 0, fmt.Errorf("size: %w", ErrAutoFileClosed)
}
if af.file == nil {


Loading…
Cancel
Save