From 054b993a3876ac2c129292046bd5d020e45b695f Mon Sep 17 00:00:00 2001 From: HuangYi Date: Thu, 10 Mar 2022 12:32:52 +0800 Subject: [PATCH 1/6] rollback command doesn't rollback state if block store has one more block it's not very useful under the app hash mismatch scenario. this patch will rollback the block store in such case. changelog --- CHANGELOG_PENDING.md | 1 + internal/consensus/replay_test.go | 4 ++ internal/state/mocks/block_store.go | 5 ++ internal/state/rollback.go | 7 ++- internal/state/services.go | 2 + internal/store/store.go | 71 ++++++++++++++++++++--------- 6 files changed, 66 insertions(+), 24 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index e7aa904e3..360d99c1e 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -73,6 +73,7 @@ Special thanks to external contributors on this release: - [rpc] \#7612 paginate mempool /unconfirmed_txs rpc endpoint (@spacech1mp) - [light] [\#7536](https://github.com/tendermint/tendermint/pull/7536) rpc /status call returns info about the light client (@jmalicevic) - [types] \#7765 Replace EvidenceData with EvidenceList to avoid unnecessary nesting of evidence fields within a block. (@jmalicevic) +- [cli] [\#8101](https://github.com/tendermint/tendermint/pull/8101) rollback command rollback block store if it has one more block than block state. ### BUG FIXES diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go index 2f6a531da..1a6cfda67 100644 --- a/internal/consensus/replay_test.go +++ b/internal/consensus/replay_test.go @@ -1231,6 +1231,10 @@ func (bs *mockBlockStore) PruneBlocks(height int64) (uint64, error) { return pruned, nil } +func (bs *mockBlockStore) Rollback() error { + return nil +} + //--------------------------------------- // Test handshake/init chain diff --git a/internal/state/mocks/block_store.go b/internal/state/mocks/block_store.go index 563183437..2557580b9 100644 --- a/internal/state/mocks/block_store.go +++ b/internal/state/mocks/block_store.go @@ -208,3 +208,8 @@ func (_m *BlockStore) Size() int64 { return r0 } + +func (_m *BlockStore) Rollback() error { + _m.Called() + return nil +} diff --git a/internal/state/rollback.go b/internal/state/rollback.go index 38f042559..6ca22a7bb 100644 --- a/internal/state/rollback.go +++ b/internal/state/rollback.go @@ -23,9 +23,12 @@ func Rollback(bs BlockStore, ss Store) (int64, []byte, error) { // NOTE: persistence of state and blocks don't happen atomically. Therefore it is possible that // when the user stopped the node the state wasn't updated but the blockstore was. In this situation - // we don't need to rollback any state and can just return early + // we need to rollback the block store too. if height == invalidState.LastBlockHeight+1 { - return invalidState.LastBlockHeight, invalidState.AppHash, nil + if err := bs.Rollback(); err != nil { + return -1, nil, err + } + height-- } // If the state store isn't one below nor equal to the blockstore height than this violates the diff --git a/internal/state/services.go b/internal/state/services.go index 5d04d2c82..413fe1ad7 100644 --- a/internal/state/services.go +++ b/internal/state/services.go @@ -36,6 +36,8 @@ type BlockStore interface { LoadBlockCommit(height int64) *types.Commit LoadSeenCommit() *types.Commit + + Rollback() error } //----------------------------------------------------------------------------- diff --git a/internal/store/store.go b/internal/store/store.go index eb03e5fe6..8e748d6e2 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -313,28 +313,6 @@ func (bs *BlockStore) PruneBlocks(height int64) (uint64, error) { return 0, fmt.Errorf("height must be equal to or less than the latest height %d", bs.Height()) } - // when removing the block meta, use the hash to remove the hash key at the same time - removeBlockHash := func(key, value []byte, batch dbm.Batch) error { - // unmarshal block meta - var pbbm = new(tmproto.BlockMeta) - err := proto.Unmarshal(value, pbbm) - if err != nil { - return fmt.Errorf("unmarshal to tmproto.BlockMeta: %w", err) - } - - blockMeta, err := types.BlockMetaFromProto(pbbm) - if err != nil { - return fmt.Errorf("error from proto blockMeta: %w", err) - } - - // delete the hash key corresponding to the block meta's hash - if err := batch.Delete(blockHashKey(blockMeta.BlockID.Hash)); err != nil { - return fmt.Errorf("failed to delete hash key: %X: %w", blockHashKey(blockMeta.BlockID.Hash), err) - } - - return nil - } - // remove block meta first as this is used to indicate whether the block exists. // For this reason, we also use ony block meta as a measure of the amount of blocks pruned pruned, err := bs.pruneRange(blockMetaKey(0), blockMetaKey(height), removeBlockHash) @@ -353,6 +331,28 @@ func (bs *BlockStore) PruneBlocks(height int64) (uint64, error) { return pruned, nil } +// when removing the block meta, use the hash to remove the hash key at the same time +func removeBlockHash(key, value []byte, batch dbm.Batch) error { + // unmarshal block meta + var pbbm = new(tmproto.BlockMeta) + err := proto.Unmarshal(value, pbbm) + if err != nil { + return fmt.Errorf("unmarshal to tmproto.BlockMeta: %w", err) + } + + blockMeta, err := types.BlockMetaFromProto(pbbm) + if err != nil { + return fmt.Errorf("error from proto blockMeta: %w", err) + } + + // delete the hash key corresponding to the block meta's hash + if err := batch.Delete(blockHashKey(blockMeta.BlockID.Hash)); err != nil { + return fmt.Errorf("failed to delete hash key: %X: %w", blockHashKey(blockMeta.BlockID.Hash), err) + } + + return nil +} + // pruneRange is a generic function for deleting a range of values based on the lowest // height up to but excluding retainHeight. For each key/value pair, an optional hook can be // executed before the deletion itself is made. pruneRange will use batch delete to delete @@ -576,6 +576,33 @@ func (bs *BlockStore) Close() error { return bs.db.Close() } +// Rollback rollbacks the latest block from BlockStore. +func (bs *BlockStore) Rollback() error { + height := bs.Height() + if height <= 0 { + return fmt.Errorf("can't rollback height %v", height) + } + + pruned, err := bs.pruneRange(blockMetaKey(height), blockMetaKey(height+1), removeBlockHash) + if err != nil { + return err + } + + if pruned != 1 { + return fmt.Errorf("the number of rollbacked blocks don't match %v", pruned) + } + + if _, err := bs.pruneRange(blockPartKey(height, 0), blockPartKey(height+1, 0), nil); err != nil { + return err + } + + if _, err := bs.pruneRange(blockCommitKey(height), blockCommitKey(height+1), nil); err != nil { + return err + } + + return nil +} + //---------------------------------- KEY ENCODING ----------------------------------------- // key prefixes From 3810b29f38d00ddb0644a5c65e7526a689861027 Mon Sep 17 00:00:00 2001 From: HuangYi Date: Mon, 14 Mar 2022 09:40:30 +0800 Subject: [PATCH 2/6] fix pr suggestion --- internal/store/store.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/store/store.go b/internal/store/store.go index 8e748d6e2..968e1b551 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -580,7 +580,7 @@ func (bs *BlockStore) Close() error { func (bs *BlockStore) Rollback() error { height := bs.Height() if height <= 0 { - return fmt.Errorf("can't rollback height %v", height) + return fmt.Errorf("can't rollback height %d", height) } pruned, err := bs.pruneRange(blockMetaKey(height), blockMetaKey(height+1), removeBlockHash) @@ -589,7 +589,7 @@ func (bs *BlockStore) Rollback() error { } if pruned != 1 { - return fmt.Errorf("the number of rollbacked blocks don't match %v", pruned) + return fmt.Errorf("the number of rollbacked blocks don't match %d", pruned) } if _, err := bs.pruneRange(blockPartKey(height, 0), blockPartKey(height+1, 0), nil); err != nil { From fd305d30a317a1a91edc81d2b24fcd08384fb32a Mon Sep 17 00:00:00 2001 From: HuangYi Date: Mon, 14 Mar 2022 10:53:22 +0800 Subject: [PATCH 3/6] unit test --- cmd/tendermint/commands/rollback_test.go | 9 +++- test/e2e/app/state.go | 56 +++++++++++++----------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/cmd/tendermint/commands/rollback_test.go b/cmd/tendermint/commands/rollback_test.go index 760dbf0ec..189329468 100644 --- a/cmd/tendermint/commands/rollback_test.go +++ b/cmd/tendermint/commands/rollback_test.go @@ -46,6 +46,13 @@ func TestRollbackIntegration(t *testing.T) { height, _, err = commands.RollbackState(cfg) require.NoError(t, err, "%d", height) }) + t.Run("Rollback agian", func(t *testing.T) { + // should be able to rollback agian. + require.NoError(t, app.Rollback()) + height2, _, err := commands.RollbackState(cfg) + require.NoError(t, err, "%d", height2) + require.Equal(t, height-1, height2) + }) t.Run("Restart", func(t *testing.T) { require.True(t, height > 0, "%d", height) @@ -68,7 +75,7 @@ func TestRollbackIntegration(t *testing.T) { status, err := client.Status(ctx) require.NoError(t, err) - if status.SyncInfo.LatestBlockHeight > height { + if status.SyncInfo.LatestBlockHeight > height+2 { return } } diff --git a/test/e2e/app/state.go b/test/e2e/app/state.go index e82a22539..302eaab66 100644 --- a/test/e2e/app/state.go +++ b/test/e2e/app/state.go @@ -12,8 +12,7 @@ import ( "sync" ) -const stateFileName = "app_state.json" -const prevStateFileName = "prev_app_state.json" +const stateFileName = "app_state_%d.json" // State is the application state. type State struct { @@ -23,9 +22,9 @@ type State struct { Hash []byte // private fields aren't marshaled to disk. - currentFile string - // app saves current and previous state for rollback functionality - previousFile string + // app saves current and historical states for rollback functionality + stateFileTemplate string + persistInterval uint64 initialHeight uint64 } @@ -33,10 +32,9 @@ type State struct { // NewState creates a new state. func NewState(dir string, persistInterval uint64) (*State, error) { state := &State{ - Values: make(map[string]string), - currentFile: filepath.Join(dir, stateFileName), - previousFile: filepath.Join(dir, prevStateFileName), - persistInterval: persistInterval, + Values: make(map[string]string), + stateFileTemplate: filepath.Join(dir, stateFileName), + persistInterval: persistInterval, } state.Hash = hashItems(state.Values) err := state.load() @@ -48,25 +46,37 @@ func NewState(dir string, persistInterval uint64) (*State, error) { return state, nil } +func (s *State) currentFile() string { + return s.stateFileAtVersion(s.Height) +} + +func (s *State) previousFile() string { + return s.stateFileAtVersion(s.Height - 1) +} + +func (s *State) stateFileAtVersion(i uint64) string { + return fmt.Sprintf(s.stateFileTemplate, i) +} + // load loads state from disk. It does not take out a lock, since it is called // during construction. func (s *State) load() error { - bz, err := os.ReadFile(s.currentFile) + bz, err := os.ReadFile(s.currentFile()) if err != nil { // if the current state doesn't exist then we try recover from the previous state if errors.Is(err, os.ErrNotExist) { - bz, err = os.ReadFile(s.previousFile) + bz, err = os.ReadFile(s.previousFile()) if err != nil { return fmt.Errorf("failed to read both current and previous state (%q): %w", - s.previousFile, err) + s.previousFile(), err) } } else { - return fmt.Errorf("failed to read state from %q: %w", s.currentFile, err) + return fmt.Errorf("failed to read state from %q: %w", s.currentFile(), err) } } err = json.Unmarshal(bz, s) if err != nil { - return fmt.Errorf("invalid state data in %q: %w", s.currentFile, err) + return fmt.Errorf("invalid state data in %q: %w", s.currentFile(), err) } return nil } @@ -80,19 +90,13 @@ func (s *State) save() error { } // We write the state to a separate file and move it to the destination, to // make it atomic. - newFile := fmt.Sprintf("%v.new", s.currentFile) + newFile := fmt.Sprintf("%v.new", s.currentFile()) err = os.WriteFile(newFile, bz, 0644) if err != nil { - return fmt.Errorf("failed to write state to %q: %w", s.currentFile, err) - } - // We take the current state and move it to the previous state, replacing it - if _, err := os.Stat(s.currentFile); err == nil { - if err := os.Rename(s.currentFile, s.previousFile); err != nil { - return fmt.Errorf("failed to replace previous state: %w", err) - } + return fmt.Errorf("failed to write state to %q: %w", s.currentFile(), err) } // Finally, we take the new state and replace the current state. - return os.Rename(newFile, s.currentFile) + return os.Rename(newFile, s.currentFile()) } // Export exports key/value pairs as JSON, used for state sync snapshots. @@ -159,13 +163,13 @@ func (s *State) Commit() (uint64, []byte, error) { } func (s *State) Rollback() error { - bz, err := os.ReadFile(s.previousFile) + bz, err := os.ReadFile(s.previousFile()) if err != nil { - return fmt.Errorf("failed to read state from %q: %w", s.previousFile, err) + return fmt.Errorf("failed to read state from %q: %w", s.previousFile(), err) } err = json.Unmarshal(bz, s) if err != nil { - return fmt.Errorf("invalid state data in %q: %w", s.previousFile, err) + return fmt.Errorf("invalid state data in %q: %w", s.previousFile(), err) } return nil } From 57aae384ec5d0faac3fc771e1a580f9ea0fc6779 Mon Sep 17 00:00:00 2001 From: HuangYi Date: Tue, 15 Mar 2022 11:52:18 +0800 Subject: [PATCH 4/6] update unit test, the single node chain is expected to halt --- cmd/tendermint/commands/rollback_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/tendermint/commands/rollback_test.go b/cmd/tendermint/commands/rollback_test.go index 189329468..724011188 100644 --- a/cmd/tendermint/commands/rollback_test.go +++ b/cmd/tendermint/commands/rollback_test.go @@ -70,12 +70,13 @@ func TestRollbackIntegration(t *testing.T) { for { select { case <-ctx.Done(): - t.Fatalf("failed to make progress after 20 seconds. Min height: %d", height) + return case <-ticker.C: status, err := client.Status(ctx) require.NoError(t, err) if status.SyncInfo.LatestBlockHeight > height+2 { + t.Fatalf("chain is expect to halt, because validator is not expected to sign the new blocks %d", status.SyncInfo.LatestBlockHeight) return } } From ca4f79d1361550abd7ebf7dfb45988ae24df87b1 Mon Sep 17 00:00:00 2001 From: yihuang Date: Tue, 15 Mar 2022 11:56:37 +0800 Subject: [PATCH 5/6] Apply suggestions from code review --- cmd/tendermint/commands/rollback_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/tendermint/commands/rollback_test.go b/cmd/tendermint/commands/rollback_test.go index 724011188..8d1760088 100644 --- a/cmd/tendermint/commands/rollback_test.go +++ b/cmd/tendermint/commands/rollback_test.go @@ -46,8 +46,8 @@ func TestRollbackIntegration(t *testing.T) { height, _, err = commands.RollbackState(cfg) require.NoError(t, err, "%d", height) }) - t.Run("Rollback agian", func(t *testing.T) { - // should be able to rollback agian. + t.Run("Rollback again", func(t *testing.T) { + // should be able to rollback again. require.NoError(t, app.Rollback()) height2, _, err := commands.RollbackState(cfg) require.NoError(t, err, "%d", height2) From c6e7524a8e0a89533099bfd1482fc30832f55b00 Mon Sep 17 00:00:00 2001 From: yihuang Date: Tue, 15 Mar 2022 12:25:39 +0800 Subject: [PATCH 6/6] Update cmd/tendermint/commands/rollback_test.go --- cmd/tendermint/commands/rollback_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/tendermint/commands/rollback_test.go b/cmd/tendermint/commands/rollback_test.go index 8d1760088..340522345 100644 --- a/cmd/tendermint/commands/rollback_test.go +++ b/cmd/tendermint/commands/rollback_test.go @@ -76,7 +76,7 @@ func TestRollbackIntegration(t *testing.T) { require.NoError(t, err) if status.SyncInfo.LatestBlockHeight > height+2 { - t.Fatalf("chain is expect to halt, because validator is not expected to sign the new blocks %d", status.SyncInfo.LatestBlockHeight) + t.Fatalf("chain is expect to halt, because the validator won't recreate the old blocks %d", status.SyncInfo.LatestBlockHeight) return } }