From 0b0a8b3128e52bf34a34685804922a47cd3c7810 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Mon, 18 Feb 2019 11:23:06 +0400 Subject: [PATCH] cs/wal: refuse to encode msg that is bigger than maxMsgSizeBytes (#3303) Earlier this week somebody posted this in GoS Riot chat: ``` E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 878916964 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 825701731 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 1631073634 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 912418148 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.600] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[failed to read data: EOF]" E[2019-02-12|10:38:37.600] Error on catchup replay. Proceeding to start ConsensusState anyway module=consensus err="Cannot replay height 7242. WAL does not contain #ENDHEIGHT for 7241" E[2019-02-12|10:38:37.861] Error dialing peer module=p2p err="dial tcp 35.183.126.181:26656: i/o timeout ``` Note the length error messages. What has happened is the length field got corrupted probably. I've looked at the code and noticed that we don't check the msg size during encoding. This PR fixes that. It also improves a few error messages in WALDecoder. --- consensus/wal.go | 13 +++++++++---- consensus/wal_test.go | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/consensus/wal.go b/consensus/wal.go index 26428a4c6..f8988691c 100644 --- a/consensus/wal.go +++ b/consensus/wal.go @@ -229,12 +229,17 @@ func NewWALEncoder(wr io.Writer) *WALEncoder { return &WALEncoder{wr} } -// Encode writes the custom encoding of v to the stream. +// Encode writes the custom encoding of v to the stream. It returns an error if +// the amino-encoded size of v is greater than 1MB. Any error encountered +// during the write is also returned. func (enc *WALEncoder) Encode(v *TimedWALMessage) error { data := cdc.MustMarshalBinaryBare(v) crc := crc32.Checksum(data, crc32c) length := uint32(len(data)) + if length > maxMsgSizeBytes { + return fmt.Errorf("Msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) + } totalLength := 8 + int(length) msg := make([]byte, totalLength) @@ -307,15 +312,15 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { } data := make([]byte, length) - _, err = dec.rd.Read(data) + n, err := dec.rd.Read(data) if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v", err)} + return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} } // check checksum before decoding data actualCRC := crc32.Checksum(data, crc32c) if actualCRC != crc { - return nil, DataCorruptionError{fmt.Errorf("checksums do not match: (read: %v, actual: %v)", crc, actualCRC)} + return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} } var res = new(TimedWALMessage) // nolint: gosimple diff --git a/consensus/wal_test.go b/consensus/wal_test.go index 93beb68bb..7ec338345 100644 --- a/consensus/wal_test.go +++ b/consensus/wal_test.go @@ -95,6 +95,26 @@ func TestWALEncoderDecoder(t *testing.T) { } } +func TestWALWritePanicsIfMsgIsTooBig(t *testing.T) { + walDir, err := ioutil.TempDir("", "wal") + require.NoError(t, err) + defer os.RemoveAll(walDir) + walFile := filepath.Join(walDir, "wal") + + wal, err := NewWAL(walFile) + require.NoError(t, err) + err = wal.Start() + require.NoError(t, err) + defer func() { + wal.Stop() + // wait for the wal to finish shutting down so we + // can safely remove the directory + wal.Wait() + }() + + assert.Panics(t, func() { wal.Write(make([]byte, maxMsgSizeBytes+1)) }) +} + func TestWALSearchForEndHeight(t *testing.T) { walBody, err := WALWithNBlocks(6) if err != nil {