You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

283 lines
7.5 KiB

  1. package statesync
  2. import (
  3. "errors"
  4. "sort"
  5. "time"
  6. abci "github.com/tendermint/tendermint/abci/types"
  7. "github.com/tendermint/tendermint/config"
  8. tmsync "github.com/tendermint/tendermint/libs/sync"
  9. "github.com/tendermint/tendermint/p2p"
  10. ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync"
  11. "github.com/tendermint/tendermint/proxy"
  12. sm "github.com/tendermint/tendermint/state"
  13. "github.com/tendermint/tendermint/types"
  14. )
  15. const (
  16. // SnapshotChannel exchanges snapshot metadata
  17. SnapshotChannel = byte(0x60)
  18. // ChunkChannel exchanges chunk contents
  19. ChunkChannel = byte(0x61)
  20. // recentSnapshots is the number of recent snapshots to send and receive per peer.
  21. recentSnapshots = 10
  22. )
  23. // Reactor handles state sync, both restoring snapshots for the local node and serving snapshots
  24. // for other nodes.
  25. type Reactor struct {
  26. p2p.BaseReactor
  27. cfg config.StateSyncConfig
  28. conn proxy.AppConnSnapshot
  29. connQuery proxy.AppConnQuery
  30. tempDir string
  31. // This will only be set when a state sync is in progress. It is used to feed received
  32. // snapshots and chunks into the sync.
  33. mtx tmsync.RWMutex
  34. syncer *syncer
  35. }
  36. // NewReactor creates a new state sync reactor.
  37. func NewReactor(
  38. cfg config.StateSyncConfig,
  39. conn proxy.AppConnSnapshot,
  40. connQuery proxy.AppConnQuery,
  41. tempDir string,
  42. ) *Reactor {
  43. r := &Reactor{
  44. cfg: cfg,
  45. conn: conn,
  46. connQuery: connQuery,
  47. }
  48. r.BaseReactor = *p2p.NewBaseReactor("StateSync", r)
  49. return r
  50. }
  51. // GetChannels implements p2p.Reactor.
  52. func (r *Reactor) GetChannels() []*p2p.ChannelDescriptor {
  53. return []*p2p.ChannelDescriptor{
  54. {
  55. ID: SnapshotChannel,
  56. Priority: 5,
  57. SendQueueCapacity: 10,
  58. RecvMessageCapacity: snapshotMsgSize,
  59. },
  60. {
  61. ID: ChunkChannel,
  62. Priority: 3,
  63. SendQueueCapacity: 10,
  64. RecvMessageCapacity: chunkMsgSize,
  65. },
  66. }
  67. }
  68. // OnStart implements p2p.Reactor.
  69. func (r *Reactor) OnStart() error {
  70. return nil
  71. }
  72. // AddPeer implements p2p.Reactor.
  73. func (r *Reactor) AddPeer(peer p2p.Peer) {
  74. r.mtx.RLock()
  75. defer r.mtx.RUnlock()
  76. if r.syncer != nil {
  77. r.syncer.AddPeer(peer)
  78. }
  79. }
  80. // RemovePeer implements p2p.Reactor.
  81. func (r *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {
  82. r.mtx.RLock()
  83. defer r.mtx.RUnlock()
  84. if r.syncer != nil {
  85. r.syncer.RemovePeer(peer)
  86. }
  87. }
  88. // Receive implements p2p.Reactor.
  89. func (r *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
  90. if !r.IsRunning() {
  91. return
  92. }
  93. msg, err := decodeMsg(msgBytes)
  94. if err != nil {
  95. r.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
  96. r.Switch.StopPeerForError(src, err)
  97. return
  98. }
  99. err = validateMsg(msg)
  100. if err != nil {
  101. r.Logger.Error("Invalid message", "peer", src, "msg", msg, "err", err)
  102. r.Switch.StopPeerForError(src, err)
  103. return
  104. }
  105. switch chID {
  106. case SnapshotChannel:
  107. switch msg := msg.(type) {
  108. case *ssproto.SnapshotsRequest:
  109. snapshots, err := r.recentSnapshots(recentSnapshots)
  110. if err != nil {
  111. r.Logger.Error("Failed to fetch snapshots", "err", err)
  112. return
  113. }
  114. for _, snapshot := range snapshots {
  115. r.Logger.Debug("Advertising snapshot", "height", snapshot.Height,
  116. "format", snapshot.Format, "peer", src.ID())
  117. src.Send(chID, mustEncodeMsg(&ssproto.SnapshotsResponse{
  118. Height: snapshot.Height,
  119. Format: snapshot.Format,
  120. Chunks: snapshot.Chunks,
  121. Hash: snapshot.Hash,
  122. Metadata: snapshot.Metadata,
  123. }))
  124. }
  125. case *ssproto.SnapshotsResponse:
  126. r.mtx.RLock()
  127. defer r.mtx.RUnlock()
  128. if r.syncer == nil {
  129. r.Logger.Debug("Received unexpected snapshot, no state sync in progress")
  130. return
  131. }
  132. r.Logger.Debug("Received snapshot", "height", msg.Height, "format", msg.Format, "peer", src.ID())
  133. _, err := r.syncer.AddSnapshot(src, &snapshot{
  134. Height: msg.Height,
  135. Format: msg.Format,
  136. Chunks: msg.Chunks,
  137. Hash: msg.Hash,
  138. Metadata: msg.Metadata,
  139. })
  140. // TODO: We may want to consider punishing the peer for certain errors
  141. if err != nil {
  142. r.Logger.Error("Failed to add snapshot", "height", msg.Height, "format", msg.Format,
  143. "peer", src.ID(), "err", err)
  144. return
  145. }
  146. default:
  147. r.Logger.Error("Received unknown message %T", msg)
  148. }
  149. case ChunkChannel:
  150. switch msg := msg.(type) {
  151. case *ssproto.ChunkRequest:
  152. r.Logger.Debug("Received chunk request", "height", msg.Height, "format", msg.Format,
  153. "chunk", msg.Index, "peer", src.ID())
  154. resp, err := r.conn.LoadSnapshotChunkSync(abci.RequestLoadSnapshotChunk{
  155. Height: msg.Height,
  156. Format: msg.Format,
  157. Chunk: msg.Index,
  158. })
  159. if err != nil {
  160. r.Logger.Error("Failed to load chunk", "height", msg.Height, "format", msg.Format,
  161. "chunk", msg.Index, "err", err)
  162. return
  163. }
  164. r.Logger.Debug("Sending chunk", "height", msg.Height, "format", msg.Format,
  165. "chunk", msg.Index, "peer", src.ID())
  166. src.Send(ChunkChannel, mustEncodeMsg(&ssproto.ChunkResponse{
  167. Height: msg.Height,
  168. Format: msg.Format,
  169. Index: msg.Index,
  170. Chunk: resp.Chunk,
  171. Missing: resp.Chunk == nil,
  172. }))
  173. case *ssproto.ChunkResponse:
  174. r.mtx.RLock()
  175. defer r.mtx.RUnlock()
  176. if r.syncer == nil {
  177. r.Logger.Debug("Received unexpected chunk, no state sync in progress", "peer", src.ID())
  178. return
  179. }
  180. r.Logger.Debug("Received chunk, adding to sync", "height", msg.Height, "format", msg.Format,
  181. "chunk", msg.Index, "peer", src.ID())
  182. _, err := r.syncer.AddChunk(&chunk{
  183. Height: msg.Height,
  184. Format: msg.Format,
  185. Index: msg.Index,
  186. Chunk: msg.Chunk,
  187. Sender: src.ID(),
  188. })
  189. if err != nil {
  190. r.Logger.Error("Failed to add chunk", "height", msg.Height, "format", msg.Format,
  191. "chunk", msg.Index, "err", err)
  192. return
  193. }
  194. default:
  195. r.Logger.Error("Received unknown message %T", msg)
  196. }
  197. default:
  198. r.Logger.Error("Received message on invalid channel %x", chID)
  199. }
  200. }
  201. // recentSnapshots fetches the n most recent snapshots from the app
  202. func (r *Reactor) recentSnapshots(n uint32) ([]*snapshot, error) {
  203. resp, err := r.conn.ListSnapshotsSync(abci.RequestListSnapshots{})
  204. if err != nil {
  205. return nil, err
  206. }
  207. sort.Slice(resp.Snapshots, func(i, j int) bool {
  208. a := resp.Snapshots[i]
  209. b := resp.Snapshots[j]
  210. switch {
  211. case a.Height > b.Height:
  212. return true
  213. case a.Height == b.Height && a.Format > b.Format:
  214. return true
  215. default:
  216. return false
  217. }
  218. })
  219. snapshots := make([]*snapshot, 0, n)
  220. for i, s := range resp.Snapshots {
  221. if i >= recentSnapshots {
  222. break
  223. }
  224. snapshots = append(snapshots, &snapshot{
  225. Height: s.Height,
  226. Format: s.Format,
  227. Chunks: s.Chunks,
  228. Hash: s.Hash,
  229. Metadata: s.Metadata,
  230. })
  231. }
  232. return snapshots, nil
  233. }
  234. // Sync runs a state sync, returning the new state and last commit at the snapshot height.
  235. // The caller must store the state and commit in the state database and block store.
  236. func (r *Reactor) Sync(stateProvider StateProvider, discoveryTime time.Duration) (sm.State, *types.Commit, error) {
  237. r.mtx.Lock()
  238. if r.syncer != nil {
  239. r.mtx.Unlock()
  240. return sm.State{}, nil, errors.New("a state sync is already in progress")
  241. }
  242. r.syncer = newSyncer(r.cfg, r.Logger, r.conn, r.connQuery, stateProvider, r.tempDir)
  243. r.mtx.Unlock()
  244. hook := func() {
  245. r.Logger.Debug("Requesting snapshots from known peers")
  246. // Request snapshots from all currently connected peers
  247. r.Switch.Broadcast(SnapshotChannel, mustEncodeMsg(&ssproto.SnapshotsRequest{}))
  248. }
  249. hook()
  250. state, commit, err := r.syncer.SyncAny(discoveryTime, hook)
  251. r.mtx.Lock()
  252. r.syncer = nil
  253. r.mtx.Unlock()
  254. return state, commit, err
  255. }