From 8ee41c5a2af6814012c80b1e160a5901c53b5eb7 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Fri, 11 Mar 2022 12:30:19 -0800 Subject: [PATCH] Filter WAL close errors at rethrow. --- internal/consensus/state.go | 21 +++++++++++++++++---- internal/libs/autofile/autofile.go | 10 +++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/internal/consensus/state.go b/internal/consensus/state.go index 1e16f84c2..e5ac30ebe 100644 --- a/internal/consensus/state.go +++ b/internal/consensus/state.go @@ -20,6 +20,7 @@ import ( cstypes "github.com/tendermint/tendermint/internal/consensus/types" "github.com/tendermint/tendermint/internal/eventbus" "github.com/tendermint/tendermint/internal/jsontypes" + "github.com/tendermint/tendermint/internal/libs/autofile" sm "github.com/tendermint/tendermint/internal/state" tmevents "github.com/tendermint/tendermint/libs/events" "github.com/tendermint/tendermint/libs/log" @@ -877,6 +878,18 @@ func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) { onExit(cs) // Re-panic to ensure the node terminates. + // + // TODO(creachadair): In ordinary operation, the WAL autofile should + // never be closed. This only happens during shutdown and production + // nodes usually halt by panicking. Many existing tests, however, + // assume a clean shutdown is possible. Prior to #8111, we were + // swallowing the panic in receiveRoutine, making that appear to + // work. Filtering this specific error is slightly risky, but should + // affect only unit tests. In any case, not re-panicking here only + // preserves the pre-existing behaviour for this one error type. + if err, ok := r.(error); ok && errors.Is(err, autofile.ErrAutoFileClosed) { + return + } panic(r) } }() @@ -906,8 +919,8 @@ func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) { case mi := <-cs.internalMsgQueue: err := cs.wal.WriteSync(mi) // NOTE: fsync if err != nil { - panic(fmt.Sprintf( - "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", + panic(fmt.Errorf( + "failed to write %v msg to consensus WAL due to %w; check your file system and restart the node", mi, err, )) } @@ -1900,8 +1913,8 @@ func (cs *State) finalizeCommit(ctx context.Context, height int64) { // restart). endMsg := EndHeightMessage{height} if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync - panic(fmt.Sprintf( - "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", + panic(fmt.Errorf( + "failed to write %v msg to consensus WAL due to %w; check your file system and restart the node", endMsg, err, )) } diff --git a/internal/libs/autofile/autofile.go b/internal/libs/autofile/autofile.go index 6f38fc43b..2cacf6a47 100644 --- a/internal/libs/autofile/autofile.go +++ b/internal/libs/autofile/autofile.go @@ -41,9 +41,9 @@ const ( autoFilePerms = os.FileMode(0600) ) -// errAutoFileClosed is reported when operations attempt to use an autofile +// ErrAutoFileClosed is reported when operations attempt to use an autofile // after it has been closed. -var errAutoFileClosed = errors.New("autofile is closed") +var ErrAutoFileClosed = errors.New("autofile is closed") // AutoFile automatically closes and re-opens file for writing. The file is // automatically setup to close itself every 1s and upon receiving SIGHUP. @@ -155,7 +155,7 @@ func (af *AutoFile) Write(b []byte) (n int, err error) { af.mtx.Lock() defer af.mtx.Unlock() if af.closed { - return 0, fmt.Errorf("write: %w", errAutoFileClosed) + return 0, fmt.Errorf("write: %w", ErrAutoFileClosed) } if af.file == nil { @@ -174,7 +174,7 @@ func (af *AutoFile) Write(b []byte) (n int, err error) { func (af *AutoFile) Sync() error { return af.withLock(func() error { if af.closed { - return fmt.Errorf("sync: %w", errAutoFileClosed) + return fmt.Errorf("sync: %w", ErrAutoFileClosed) } else if af.file == nil { return nil // nothing to sync } @@ -207,7 +207,7 @@ func (af *AutoFile) Size() (int64, error) { af.mtx.Lock() defer af.mtx.Unlock() if af.closed { - return 0, fmt.Errorf("size: %w", errAutoFileClosed) + return 0, fmt.Errorf("size: %w", ErrAutoFileClosed) } if af.file == nil {