zolfa
/
tendermint

package consensus
import (	"context"	"encoding/binary"	"errors"	"fmt"	"hash/crc32"	"io"	"path/filepath"	"time"
	"github.com/gogo/protobuf/proto"
	auto "github.com/tendermint/tendermint/internal/libs/autofile"	tmjson "github.com/tendermint/tendermint/libs/json"	"github.com/tendermint/tendermint/libs/log"	tmos "github.com/tendermint/tendermint/libs/os"	"github.com/tendermint/tendermint/libs/service"	tmtime "github.com/tendermint/tendermint/libs/time"	tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus")
const (	// time.Time + max consensus msg size
	maxMsgSizeBytes = maxMsgSize + 24
	// how often the WAL should be sync'd during period sync'ing
	walDefaultFlushInterval = 2 * time.Second)
//--------------------------------------------------------
// types and functions for savings consensus messages

// TimedWALMessage wraps WALMessage and adds Time for debugging purposes.
type TimedWALMessage struct {	Time time.Time  `json:"time"`	Msg  WALMessage `json:"msg"`}
// EndHeightMessage marks the end of the given height inside WAL.
// @internal used by scripts/wal2json util.
type EndHeightMessage struct {	Height int64 `json:"height"`}
type WALMessage interface{}
func init() {	tmjson.RegisterType(msgInfo{}, "tendermint/wal/MsgInfo")	tmjson.RegisterType(timeoutInfo{}, "tendermint/wal/TimeoutInfo")	tmjson.RegisterType(EndHeightMessage{}, "tendermint/wal/EndHeightMessage")}
//--------------------------------------------------------
// Simple write-ahead logger

// WAL is an interface for any write-ahead logger.
type WAL interface {	Write(WALMessage) error	WriteSync(WALMessage) error	FlushAndSync() error
	SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error)
	// service methods
	Start(context.Context) error	Stop() error	Wait()}
// Write ahead logger writes msgs to disk before they are processed.
// Can be used for crash-recovery and deterministic replay.
// TODO: currently the wal is overwritten during replay catchup, give it a mode
// so it's either reading or appending - must read to end to start appending
// again.
type BaseWAL struct {	service.BaseService	logger log.Logger
	group *auto.Group
	enc *WALEncoder
	flushTicker   *time.Ticker	flushInterval time.Duration}
var _ WAL = &BaseWAL{}
// NewWAL returns a new write-ahead logger based on `baseWAL`, which implements
// WAL. It's flushed and synced to disk every 2s and once when stopped.
func NewWAL(ctx context.Context, logger log.Logger, walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) {	err := tmos.EnsureDir(filepath.Dir(walFile), 0700)	if err != nil {		return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err)	}
	group, err := auto.OpenGroup(ctx, logger, walFile, groupOptions...)	if err != nil {		return nil, err	}	wal := &BaseWAL{		logger:        logger,		group:         group,		enc:           NewWALEncoder(group),		flushInterval: walDefaultFlushInterval,	}	wal.BaseService = *service.NewBaseService(logger, "baseWAL", wal)	return wal, nil}
// SetFlushInterval allows us to override the periodic flush interval for the WAL.
func (wal *BaseWAL) SetFlushInterval(i time.Duration) {	wal.flushInterval = i}
func (wal *BaseWAL) Group() *auto.Group {	return wal.group}
func (wal *BaseWAL) OnStart(ctx context.Context) error {	size, err := wal.group.Head.Size()	if err != nil {		return err	} else if size == 0 {		if err := wal.WriteSync(EndHeightMessage{0}); err != nil {			return err		}	}	err = wal.group.Start(ctx)	if err != nil {		return err	}	wal.flushTicker = time.NewTicker(wal.flushInterval)	go wal.processFlushTicks(ctx)	return nil}
func (wal *BaseWAL) processFlushTicks(ctx context.Context) {	for {		select {		case <-wal.flushTicker.C:			if err := wal.FlushAndSync(); err != nil {				wal.logger.Error("Periodic WAL flush failed", "err", err)			}		case <-ctx.Done():			return		}	}}
// FlushAndSync flushes and fsync's the underlying group's data to disk.
// See auto#FlushAndSync
func (wal *BaseWAL) FlushAndSync() error {	return wal.group.FlushAndSync()}
// Stop the underlying autofile group.
// Use Wait() to ensure it's finished shutting down
// before cleaning up files.
func (wal *BaseWAL) OnStop() {	wal.flushTicker.Stop()	if err := wal.FlushAndSync(); err != nil {		if !errors.Is(err, service.ErrAlreadyStopped) {			wal.logger.Error("error on flush data to disk", "error", err)		}	}	if err := wal.group.Stop(); err != nil {		if !errors.Is(err, service.ErrAlreadyStopped) {			wal.logger.Error("error trying to stop wal", "error", err)		}	}	wal.group.Close()}
// Wait for the underlying autofile group to finish shutting down
// so it's safe to cleanup files.
func (wal *BaseWAL) Wait() {	if wal.IsRunning() {		wal.BaseService.Wait()	}	if wal.group.IsRunning() {		wal.group.Wait()	}}
// Write is called in newStep and for each receive on the
// peerMsgQueue and the timeoutTicker.
// NOTE: does not call fsync()
func (wal *BaseWAL) Write(msg WALMessage) error {	if wal == nil {		return nil	}
	if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil {		wal.logger.Error("error writing msg to consensus wal. WARNING: recover may not be possible for the current height",			"err", err, "msg", msg)		return err	}
	return nil}
// WriteSync is called when we receive a msg from ourselves
// so that we write to disk before sending signed messages.
// NOTE: calls fsync()
func (wal *BaseWAL) WriteSync(msg WALMessage) error {	if wal == nil {		return nil	}
	if err := wal.Write(msg); err != nil {		return err	}
	if err := wal.FlushAndSync(); err != nil {		wal.logger.Error(`WriteSync failed to flush consensus wal.		WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`,			"err", err)		return err	}
	return nil}
// WALSearchOptions are optional arguments to SearchForEndHeight.
type WALSearchOptions struct {	// IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors.
	IgnoreDataCorruptionErrors bool}
// SearchForEndHeight searches for the EndHeightMessage with the given height
// and returns an auto.GroupReader, whenever it was found or not and an error.
// Group reader will be nil if found equals false.
//
// CONTRACT: caller must close group reader.
func (wal *BaseWAL) SearchForEndHeight(	height int64,	options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {	var (		msg *TimedWALMessage		gr  *auto.GroupReader	)	lastHeightFound := int64(-1)
	// NOTE: starting from the last file in the group because we're usually
	// searching for the last height. See replay.go
	min, max := wal.group.MinIndex(), wal.group.MaxIndex()	wal.logger.Info("Searching for height", "height", height, "min", min, "max", max)	for index := max; index >= min; index-- {		gr, err = wal.group.NewReader(index)		if err != nil {			return nil, false, err		}
		dec := NewWALDecoder(gr)		for {			msg, err = dec.Decode()			if err == io.EOF {				// OPTIMISATION: no need to look for height in older files if we've seen h < height
				if lastHeightFound > 0 && lastHeightFound < height {					gr.Close()					return nil, false, nil				}				// check next file
				break			}			if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) {				wal.logger.Error("Corrupted entry. Skipping...", "err", err)				// do nothing
				continue			} else if err != nil {				gr.Close()				return nil, false, err			}
			if m, ok := msg.Msg.(EndHeightMessage); ok {				lastHeightFound = m.Height				if m.Height == height { // found
					wal.logger.Info("Found", "height", height, "index", index)					return gr, true, nil				}			}		}		gr.Close()	}
	return nil, false, nil}
// A WALEncoder writes custom-encoded WAL messages to an output stream.
//
// Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value
type WALEncoder struct {	wr io.Writer}
// NewWALEncoder returns a new encoder that writes to wr.
func NewWALEncoder(wr io.Writer) *WALEncoder {	return &WALEncoder{wr}}
// Encode writes the custom encoding of v to the stream. It returns an error if
// the encoded size of v is greater than 1MB. Any error encountered
// during the write is also returned.
func (enc *WALEncoder) Encode(v *TimedWALMessage) error {	pbMsg, err := WALToProto(v.Msg)	if err != nil {		return err	}	pv := tmcons.TimedWALMessage{		Time: v.Time,		Msg:  pbMsg,	}
	data, err := proto.Marshal(&pv)	if err != nil {		panic(fmt.Errorf("encode timed wall message failure: %w", err))	}
	crc := crc32.Checksum(data, crc32c)	length := uint32(len(data))	if length > maxMsgSizeBytes {		return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes)	}	totalLength := 8 + int(length)
	msg := make([]byte, totalLength)	binary.BigEndian.PutUint32(msg[0:4], crc)	binary.BigEndian.PutUint32(msg[4:8], length)	copy(msg[8:], data)
	_, err = enc.wr.Write(msg)	return err}
// IsDataCorruptionError returns true if data has been corrupted inside WAL.
func IsDataCorruptionError(err error) bool {	_, ok := err.(DataCorruptionError)	return ok}
// DataCorruptionError is an error that occures if data on disk was corrupted.
type DataCorruptionError struct {	cause error}
func (e DataCorruptionError) Error() string {	return fmt.Sprintf("DataCorruptionError[%v]", e.cause)}
func (e DataCorruptionError) Cause() error {	return e.cause}
// A WALDecoder reads and decodes custom-encoded WAL messages from an input
// stream. See WALEncoder for the format used.
//
// It will also compare the checksums and make sure data size is equal to the
// length from the header. If that is not the case, error will be returned.
type WALDecoder struct {	rd io.Reader}
// NewWALDecoder returns a new decoder that reads from rd.
func NewWALDecoder(rd io.Reader) *WALDecoder {	return &WALDecoder{rd}}
// Decode reads the next custom-encoded value from its reader and returns it.
func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {	b := make([]byte, 4)
	_, err := dec.rd.Read(b)	if errors.Is(err, io.EOF) {		return nil, err	}	if err != nil {		return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %w", err)}	}	crc := binary.BigEndian.Uint32(b)
	b = make([]byte, 4)	_, err = dec.rd.Read(b)	if err != nil {		return nil, DataCorruptionError{fmt.Errorf("failed to read length: %w", err)}	}	length := binary.BigEndian.Uint32(b)
	if length > maxMsgSizeBytes {		return nil, DataCorruptionError{fmt.Errorf(			"length %d exceeded maximum possible value of %d bytes",			length,			maxMsgSizeBytes)}	}
	data := make([]byte, length)	n, err := dec.rd.Read(data)	if err != nil {		return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)}	}
	// check checksum before decoding data
	actualCRC := crc32.Checksum(data, crc32c)	if actualCRC != crc {		return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)}	}
	var res = new(tmcons.TimedWALMessage)	err = proto.Unmarshal(data, res)	if err != nil {		return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %w", err)}	}
	walMsg, err := WALFromProto(res.Msg)	if err != nil {		return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)}	}	tMsgWal := &TimedWALMessage{		Time: res.Time,		Msg:  walMsg,	}
	return tMsgWal, err}
type nilWAL struct{}
var _ WAL = nilWAL{}
func (nilWAL) Write(m WALMessage) error     { return nil }func (nilWAL) WriteSync(m WALMessage) error { return nil }func (nilWAL) FlushAndSync() error          { return nil }func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {	return nil, false, nil}func (nilWAL) Start(context.Context) error { return nil }func (nilWAL) Stop() error                 { return nil }func (nilWAL) Wait()                       {}