You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

263 lines
6.2 KiB

  1. package statesync
  2. import (
  3. "container/heap"
  4. "fmt"
  5. "sync"
  6. "time"
  7. "github.com/tendermint/tendermint/internal/p2p"
  8. "github.com/tendermint/tendermint/types"
  9. )
  10. type lightBlockResponse struct {
  11. block *types.LightBlock
  12. peer p2p.NodeID
  13. }
  14. // a block queue is used for asynchronously fetching and verifying light blocks
  15. type blockQueue struct {
  16. mtx sync.Mutex
  17. // cursors to keep track of which heights need to be fetched and verified
  18. fetchHeight int64
  19. verifyHeight int64
  20. // termination conditions
  21. stopHeight int64
  22. stopTime time.Time
  23. terminal *types.LightBlock
  24. // track failed heights so we know what blocks to try fetch again
  25. failed *maxIntHeap
  26. // also count retries to know when to give up
  27. retries int
  28. maxRetries int
  29. // store inbound blocks and serve them to a verifying thread via a channel
  30. pending map[int64]lightBlockResponse
  31. verifyCh chan lightBlockResponse
  32. // waiters are workers on idle until a height is required
  33. waiters []chan int64
  34. // this channel is closed once the verification process is complete
  35. doneCh chan struct{}
  36. }
  37. func newBlockQueue(
  38. startHeight, stopHeight int64,
  39. stopTime time.Time,
  40. maxRetries int,
  41. ) *blockQueue {
  42. return &blockQueue{
  43. stopHeight: stopHeight,
  44. stopTime: stopTime,
  45. fetchHeight: startHeight,
  46. verifyHeight: startHeight,
  47. pending: make(map[int64]lightBlockResponse),
  48. failed: &maxIntHeap{},
  49. retries: 0,
  50. maxRetries: maxRetries,
  51. waiters: make([]chan int64, 0),
  52. doneCh: make(chan struct{}),
  53. }
  54. }
  55. // Add adds a block to the queue to be verified and stored
  56. // CONTRACT: light blocks should have passed basic validation
  57. func (q *blockQueue) add(l lightBlockResponse) {
  58. q.mtx.Lock()
  59. defer q.mtx.Unlock()
  60. // return early if the process has already finished
  61. select {
  62. case <-q.doneCh:
  63. return
  64. default:
  65. }
  66. // sometimes more blocks are fetched then what is necessary. If we already
  67. // have what we need then ignore this
  68. if q.terminal != nil && l.block.Height < q.terminal.Height {
  69. return
  70. }
  71. // if the block that was returned is at the verify height then the verifier
  72. // is already waiting for this block so we send it directly to them
  73. if l.block.Height == q.verifyHeight && q.verifyCh != nil {
  74. q.verifyCh <- l
  75. close(q.verifyCh)
  76. q.verifyCh = nil
  77. } else {
  78. // else we add it in the pending bucket
  79. q.pending[l.block.Height] = l
  80. }
  81. // Lastly, if the incoming block is past the stop time and stop height then
  82. // we mark it as the terminal block
  83. if l.block.Height <= q.stopHeight && l.block.Time.Before(q.stopTime) {
  84. q.terminal = l.block
  85. }
  86. }
  87. // NextHeight returns the next height that needs to be retrieved.
  88. // We assume that for every height allocated that the peer will eventually add
  89. // the block or signal that it needs to be retried
  90. func (q *blockQueue) nextHeight() <-chan int64 {
  91. q.mtx.Lock()
  92. defer q.mtx.Unlock()
  93. ch := make(chan int64, 1)
  94. // if a previous process failed then we pick up this one
  95. if q.failed.Len() > 0 {
  96. failedHeight := heap.Pop(q.failed)
  97. ch <- failedHeight.(int64)
  98. close(ch)
  99. return ch
  100. }
  101. if q.terminal == nil {
  102. // return and decrement the fetch height
  103. ch <- q.fetchHeight
  104. q.fetchHeight--
  105. close(ch)
  106. return ch
  107. }
  108. // at this point there is no height that we know we need so we create a
  109. // waiter to hold out for either an outgoing request to fail or a block to
  110. // fail verification
  111. q.waiters = append(q.waiters, ch)
  112. return ch
  113. }
  114. // Finished returns true when the block queue has has all light blocks retrieved,
  115. // verified and stored. There is no more work left to be done
  116. func (q *blockQueue) done() <-chan struct{} {
  117. return q.doneCh
  118. }
  119. // VerifyNext pulls the next block off the pending queue and adds it to a
  120. // channel if it's already there or creates a waiter to add it to the
  121. // channel once it comes in. NOTE: This is assumed to
  122. // be a single thread as light blocks need to be sequentially verified.
  123. func (q *blockQueue) verifyNext() <-chan lightBlockResponse {
  124. q.mtx.Lock()
  125. defer q.mtx.Unlock()
  126. ch := make(chan lightBlockResponse, 1)
  127. select {
  128. case <-q.doneCh:
  129. return ch
  130. default:
  131. }
  132. if lb, ok := q.pending[q.verifyHeight]; ok {
  133. ch <- lb
  134. close(ch)
  135. delete(q.pending, q.verifyHeight)
  136. } else {
  137. q.verifyCh = ch
  138. }
  139. return ch
  140. }
  141. // Retry is called when a dispatcher failed to fetch a light block or the
  142. // fetched light block failed verification. It signals to the queue to add the
  143. // height back to the request queue
  144. func (q *blockQueue) retry(height int64) {
  145. q.mtx.Lock()
  146. defer q.mtx.Unlock()
  147. select {
  148. case <-q.doneCh:
  149. return
  150. default:
  151. }
  152. // we don't need to retry if this is below the terminal height
  153. if q.terminal != nil && height < q.terminal.Height {
  154. return
  155. }
  156. q.retries++
  157. if q.retries >= q.maxRetries {
  158. q._closeChannels()
  159. return
  160. }
  161. if len(q.waiters) > 0 {
  162. q.waiters[0] <- height
  163. close(q.waiters[0])
  164. q.waiters = q.waiters[1:]
  165. } else {
  166. heap.Push(q.failed, height)
  167. }
  168. }
  169. // Success is called when a light block has been successfully verified and
  170. // processed
  171. func (q *blockQueue) success(height int64) {
  172. q.mtx.Lock()
  173. defer q.mtx.Unlock()
  174. if q.terminal != nil && q.verifyHeight == q.terminal.Height {
  175. q._closeChannels()
  176. }
  177. q.verifyHeight--
  178. }
  179. func (q *blockQueue) error() error {
  180. q.mtx.Lock()
  181. defer q.mtx.Unlock()
  182. if q.retries >= q.maxRetries {
  183. return fmt.Errorf("max retries to fetch valid blocks exceeded (%d); "+
  184. "target height: %d, height reached: %d", q.maxRetries, q.stopHeight, q.verifyHeight)
  185. }
  186. return nil
  187. }
  188. // close the queue and respective channels
  189. func (q *blockQueue) close() {
  190. q.mtx.Lock()
  191. defer q.mtx.Unlock()
  192. q._closeChannels()
  193. }
  194. // CONTRACT: must have a write lock. Use close instead
  195. func (q *blockQueue) _closeChannels() {
  196. close(q.doneCh)
  197. // wait for the channel to be drained
  198. select {
  199. case <-q.doneCh:
  200. return
  201. default:
  202. }
  203. for _, ch := range q.waiters {
  204. close(ch)
  205. }
  206. if q.verifyCh != nil {
  207. close(q.verifyCh)
  208. }
  209. }
  210. // A max-heap of ints.
  211. type maxIntHeap []int64
  212. func (h maxIntHeap) Len() int { return len(h) }
  213. func (h maxIntHeap) Less(i, j int) bool { return h[i] < h[j] }
  214. func (h maxIntHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
  215. func (h *maxIntHeap) Push(x interface{}) {
  216. *h = append(*h, x.(int64))
  217. }
  218. func (h *maxIntHeap) Pop() interface{} {
  219. old := *h
  220. n := len(old)
  221. x := old[n-1]
  222. *h = old[0 : n-1]
  223. return x
  224. }