Browse Source

Merge pull request #965 from tendermint/573-handle-corrupt-wal-file

Handle corrupt WAL file
pull/980/head
Ethan Buchman 7 years ago
committed by GitHub
parent
commit
eddabab5e4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 169 additions and 25 deletions
  1. +18
    -10
      consensus/replay.go
  2. +3
    -3
      consensus/replay_test.go
  3. +46
    -10
      consensus/wal.go
  4. +31
    -0
      consensus/wal_fuzz.go
  5. +1
    -1
      consensus/wal_generator.go
  6. +1
    -1
      consensus/wal_test.go
  7. +69
    -0
      docs/specification/corruption.rst

+ 18
- 10
consensus/replay.go View File

@ -2,7 +2,6 @@ package consensus
import (
"bytes"
"errors"
"fmt"
"hash/crc32"
"io"
@ -95,10 +94,13 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error {
cs.replayMode = true
defer func() { cs.replayMode = false }()
// Ensure that ENDHEIGHT for this height doesn't exist
// NOTE: This is just a sanity check. As far as we know things work fine without it,
// and Handshake could reuse ConsensusState if it weren't for this check (since we can crash after writing ENDHEIGHT).
gr, found, err := cs.wal.SearchForEndHeight(csHeight)
// Ensure that ENDHEIGHT for this height doesn't exist.
// NOTE: This is just a sanity check. As far as we know things work fine
// without it, and Handshake could reuse ConsensusState if it weren't for
// this check (since we can crash after writing ENDHEIGHT).
//
// Ignore data corruption errors since this is a sanity check.
gr, found, err := cs.wal.SearchForEndHeight(csHeight, &WALSearchOptions{IgnoreDataCorruptionErrors: true})
if err != nil {
return err
}
@ -112,14 +114,16 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error {
}
// Search for last height marker
gr, found, err = cs.wal.SearchForEndHeight(csHeight - 1)
//
// Ignore data corruption errors in previous heights because we only care about last height
gr, found, err = cs.wal.SearchForEndHeight(csHeight-1, &WALSearchOptions{IgnoreDataCorruptionErrors: true})
if err == io.EOF {
cs.Logger.Error("Replay: wal.group.Search returned EOF", "#ENDHEIGHT", csHeight-1)
} else if err != nil {
return err
}
if !found {
return errors.New(cmn.Fmt("Cannot replay height %d. WAL does not contain #ENDHEIGHT for %d.", csHeight, csHeight-1))
return fmt.Errorf("Cannot replay height %d. WAL does not contain #ENDHEIGHT for %d.", csHeight, csHeight-1)
}
defer gr.Close() // nolint: errcheck
@ -132,9 +136,13 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error {
msg, err = dec.Decode()
if err == io.EOF {
break
} else if IsDataCorruptionError(err) {
cs.Logger.Debug("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight)
panic(fmt.Sprintf("data has been corrupted (%v) in last height %d of consensus WAL", err, csHeight))
} else if err != nil {
return err
}
// NOTE: since the priv key is set when the msgs are received
// it will attempt to eg double sign but we can just ignore it
// since the votes will be replayed and we'll get to the next step
@ -202,7 +210,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error {
// handshake is done via info request on the query conn
res, err := proxyApp.Query().InfoSync(abci.RequestInfo{version.Version})
if err != nil {
return errors.New(cmn.Fmt("Error calling Info: %v", err))
return fmt.Errorf("Error calling Info: %v", err)
}
blockHeight := int64(res.LastBlockHeight)
@ -218,7 +226,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error {
// replay blocks up to the latest in the blockstore
_, err = h.ReplayBlocks(appHash, blockHeight, proxyApp)
if err != nil {
return errors.New(cmn.Fmt("Error on replay: %v", err))
return fmt.Errorf("Error on replay: %v", err)
}
h.logger.Info("Completed ABCI Handshake - Tendermint and App are synced", "appHeight", blockHeight, "appHash", fmt.Sprintf("%X", appHash))
@ -358,7 +366,7 @@ func (h *Handshaker) replayBlock(height int64, proxyApp proxy.AppConnConsensus)
func (h *Handshaker) checkAppHash(appHash []byte) error {
if !bytes.Equal(h.state.AppHash, appHash) {
panic(errors.New(cmn.Fmt("Tendermint state.AppHash does not match AppHash after replay. Got %X, expected %X", appHash, h.state.AppHash)).Error())
panic(fmt.Errorf("Tendermint state.AppHash does not match AppHash after replay. Got %X, expected %X", appHash, h.state.AppHash).Error())
}
return nil
}


+ 3
- 3
consensus/replay_test.go View File

@ -246,8 +246,8 @@ func (w *crashingWAL) Save(m WALMessage) {
}
func (w *crashingWAL) Group() *auto.Group { return w.next.Group() }
func (w *crashingWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) {
return w.next.SearchForEndHeight(height)
func (w *crashingWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {
return w.next.SearchForEndHeight(height, options)
}
func (w *crashingWAL) Start() error { return w.next.Start() }
@ -478,7 +478,7 @@ func buildTMStateFromChain(config *cfg.Config, state *sm.State, chain []*types.B
func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) {
// Search for height marker
gr, found, err := wal.SearchForEndHeight(0)
gr, found, err := wal.SearchForEndHeight(0, &WALSearchOptions{})
if err != nil {
return nil, nil, err
}


+ 46
- 10
consensus/wal.go View File

@ -17,6 +17,11 @@ import (
cmn "github.com/tendermint/tmlibs/common"
)
const (
// must be greater than params.BlockGossipParams.BlockPartSizeBytes + a few bytes
maxMsgSizeBytes = 1024 * 1024 // 1MB
)
//--------------------------------------------------------
// types and functions for savings consensus messages
@ -48,7 +53,7 @@ var _ = wire.RegisterInterface(
type WAL interface {
Save(WALMessage)
Group() *auto.Group
SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error)
SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error)
Start() error
Stop() error
@ -133,12 +138,18 @@ func (wal *baseWAL) Save(msg WALMessage) {
}
}
// WALSearchOptions are optional arguments to SearchForEndHeight.
type WALSearchOptions struct {
// IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors.
IgnoreDataCorruptionErrors bool
}
// SearchForEndHeight searches for the EndHeightMessage with the height and
// returns an auto.GroupReader, whenever it was found or not and an error.
// Group reader will be nil if found equals false.
//
// CONTRACT: caller must close group reader.
func (wal *baseWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) {
func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {
var msg *TimedWALMessage
// NOTE: starting from the last file in the group because we're usually
@ -158,7 +169,9 @@ func (wal *baseWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, foun
// check next file
break
}
if err != nil {
if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) {
// do nothing
} else if err != nil {
gr.Close()
return nil, false, err
}
@ -210,6 +223,25 @@ func (enc *WALEncoder) Encode(v *TimedWALMessage) error {
///////////////////////////////////////////////////////////////////////////////
// IsDataCorruptionError returns true if data has been corrupted inside WAL.
func IsDataCorruptionError(err error) bool {
_, ok := err.(DataCorruptionError)
return ok
}
// DataCorruptionError is an error that occures if data on disk was corrupted.
type DataCorruptionError struct {
cause error
}
func (e DataCorruptionError) Error() string {
return fmt.Sprintf("DataCorruptionError[%v]", e.cause)
}
func (e DataCorruptionError) Cause() error {
return e.cause
}
// A WALDecoder reads and decodes custom-encoded WAL messages from an input
// stream. See WALEncoder for the format used.
//
@ -228,7 +260,7 @@ func NewWALDecoder(rd io.Reader) *WALDecoder {
func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
b := make([]byte, 4)
n, err := dec.rd.Read(b)
_, err := dec.rd.Read(b)
if err == io.EOF {
return nil, err
}
@ -238,7 +270,7 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
crc := binary.BigEndian.Uint32(b)
b = make([]byte, 4)
n, err = dec.rd.Read(b)
_, err = dec.rd.Read(b)
if err == io.EOF {
return nil, err
}
@ -247,26 +279,30 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
}
length := binary.BigEndian.Uint32(b)
if length > maxMsgSizeBytes {
return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)}
}
data := make([]byte, length)
n, err = dec.rd.Read(data)
_, err = dec.rd.Read(data)
if err == io.EOF {
return nil, err
}
if err != nil {
return nil, fmt.Errorf("not enough bytes for data: %v (want: %d, read: %v)", err, length, n)
return nil, fmt.Errorf("failed to read data: %v", err)
}
// check checksum before decoding data
actualCRC := crc32.Checksum(data, crc32c)
if actualCRC != crc {
return nil, fmt.Errorf("checksums do not match: (read: %v, actual: %v)", crc, actualCRC)
return nil, DataCorruptionError{fmt.Errorf("checksums do not match: (read: %v, actual: %v)", crc, actualCRC)}
}
var nn int
var res *TimedWALMessage // nolint: gosimple
res = wire.ReadBinary(&TimedWALMessage{}, bytes.NewBuffer(data), int(length), &nn, &err).(*TimedWALMessage)
if err != nil {
return nil, fmt.Errorf("failed to decode data: %v", err)
return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)}
}
return res, err
@ -276,7 +312,7 @@ type nilWAL struct{}
func (nilWAL) Save(m WALMessage) {}
func (nilWAL) Group() *auto.Group { return nil }
func (nilWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) {
func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {
return nil, false, nil
}
func (nilWAL) Start() error { return nil }


+ 31
- 0
consensus/wal_fuzz.go View File

@ -0,0 +1,31 @@
// +build gofuzz
package consensus
import (
"bytes"
"io"
)
func Fuzz(data []byte) int {
dec := NewWALDecoder(bytes.NewReader(data))
for {
msg, err := dec.Decode()
if err == io.EOF {
break
}
if err != nil {
if msg != nil {
panic("msg != nil on error")
}
return 0
}
var w bytes.Buffer
enc := NewWALEncoder(&w)
err = enc.Encode(msg)
if err != nil {
panic(err)
}
}
return 1
}

+ 1
- 1
consensus/wal_generator.go View File

@ -180,7 +180,7 @@ func (w *byteBufferWAL) Save(m WALMessage) {
func (w *byteBufferWAL) Group() *auto.Group {
panic("not implemented")
}
func (w *byteBufferWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) {
func (w *byteBufferWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {
return nil, false, nil
}


+ 1
- 1
consensus/wal_test.go View File

@ -54,7 +54,7 @@ func TestSearchForEndHeight(t *testing.T) {
}
h := int64(3)
gr, found, err := wal.SearchForEndHeight(h)
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
assert.NoError(t, err, cmn.Fmt("expected not to err on height %d", h))
assert.True(t, found, cmn.Fmt("expected to find end height for %d", h))
assert.NotNil(t, gr, "expected group not to be nil")


+ 69
- 0
docs/specification/corruption.rst View File

@ -0,0 +1,69 @@
Corruption
==========
Important step
--------------
Make sure you have a backup of the Tendermint data directory.
Possible causes
---------------
Remember that most corruption is caused by hardware issues:
- RAID controllers with faulty / worn out battery backup, and an unexpected power loss
- Hard disk drives with write-back cache enabled, and an unexpected power loss
- Cheap SSDs with insufficient power-loss protection, and an unexpected power-loss
- Defective RAM
- Defective or overheating CPU(s)
Other causes can be:
- Database systems configured with fsync=off and an OS crash or power loss
- Filesystems configured to use write barriers plus a storage layer that ignores write barriers. LVM is a particular culprit.
- Tendermint bugs
- Operating system bugs
- Admin error
- directly modifying Tendermint data-directory contents
(Source: https://wiki.postgresql.org/wiki/Corruption)
WAL Corruption
--------------
If consensus WAL is corrupted at the lastest height and you are trying to start
Tendermint, replay will fail with panic.
Recovering from data corruption can be hard and time-consuming. Here are two approaches you can take:
1) Delete the WAL file and restart Tendermint. It will attempt to sync with other peers.
2) Try to repair the WAL file manually:
1. Create a backup of the corrupted WAL file:
.. code:: bash
cp "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal_backup
2. Use ./scripts/wal2json to create a human-readable version
.. code:: bash
./scripts/wal2json/wal2json "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal
3. Search for a "CORRUPTED MESSAGE" line.
4. By looking at the previous message and the message after the corrupted one
and looking at the logs, try to rebuild the message. If the consequent
messages are marked as corrupted too (this may happen if length header
got corrupted or some writes did not make it to the WAL ~ truncation),
then remove all the lines starting from the corrupted one and restart
Tendermint.
.. code:: bash
$EDITOR /tmp/corrupted_wal
5. After editing, convert this file back into binary form by running:
.. code:: bash
./scripts/json2wal/json2wal /tmp/corrupted_wal > "$TMHOME/data/cs.wal/wal"

Loading…
Cancel
Save