You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

265 lines
6.3 KiB

  1. package statesync
  2. import (
  3. "container/heap"
  4. "fmt"
  5. "sync"
  6. "time"
  7. "github.com/tendermint/tendermint/types"
  8. )
  9. type lightBlockResponse struct {
  10. block *types.LightBlock
  11. peer types.NodeID
  12. }
  13. // a block queue is used for asynchronously fetching and verifying light blocks
  14. type blockQueue struct {
  15. mtx sync.Mutex
  16. // cursors to keep track of which heights need to be fetched and verified
  17. fetchHeight int64
  18. verifyHeight int64
  19. // termination conditions
  20. initialHeight int64
  21. stopHeight int64
  22. stopTime time.Time
  23. terminal *types.LightBlock
  24. // track failed heights so we know what blocks to try fetch again
  25. failed *maxIntHeap
  26. // also count retries to know when to give up
  27. retries int
  28. maxRetries int
  29. // store inbound blocks and serve them to a verifying thread via a channel
  30. pending map[int64]lightBlockResponse
  31. verifyCh chan lightBlockResponse
  32. // waiters are workers on idle until a height is required
  33. waiters []chan int64
  34. // this channel is closed once the verification process is complete
  35. doneCh chan struct{}
  36. }
  37. func newBlockQueue(
  38. startHeight, stopHeight, initialHeight int64,
  39. stopTime time.Time,
  40. maxRetries int,
  41. ) *blockQueue {
  42. return &blockQueue{
  43. stopHeight: stopHeight,
  44. initialHeight: initialHeight,
  45. stopTime: stopTime,
  46. fetchHeight: startHeight,
  47. verifyHeight: startHeight,
  48. pending: make(map[int64]lightBlockResponse),
  49. failed: &maxIntHeap{},
  50. retries: 0,
  51. maxRetries: maxRetries,
  52. waiters: make([]chan int64, 0),
  53. doneCh: make(chan struct{}),
  54. }
  55. }
  56. // Add adds a block to the queue to be verified and stored
  57. // CONTRACT: light blocks should have passed basic validation
  58. func (q *blockQueue) add(l lightBlockResponse) {
  59. q.mtx.Lock()
  60. defer q.mtx.Unlock()
  61. // return early if the process has already finished
  62. select {
  63. case <-q.doneCh:
  64. return
  65. default:
  66. }
  67. // sometimes more blocks are fetched then what is necessary. If we already
  68. // have what we need then ignore this
  69. if q.terminal != nil && l.block.Height < q.terminal.Height {
  70. return
  71. }
  72. // if the block that was returned is at the verify height then the verifier
  73. // is already waiting for this block so we send it directly to them
  74. if l.block.Height == q.verifyHeight && q.verifyCh != nil {
  75. q.verifyCh <- l
  76. close(q.verifyCh)
  77. q.verifyCh = nil
  78. } else {
  79. // else we add it in the pending bucket
  80. q.pending[l.block.Height] = l
  81. }
  82. // Lastly, if the incoming block is past the stop time and stop height or
  83. // is equal to the initial height then we mark it as the terminal block.
  84. if l.block.Height <= q.stopHeight && l.block.Time.Before(q.stopTime) ||
  85. l.block.Height == q.initialHeight {
  86. q.terminal = l.block
  87. }
  88. }
  89. // NextHeight returns the next height that needs to be retrieved.
  90. // We assume that for every height allocated that the peer will eventually add
  91. // the block or signal that it needs to be retried
  92. func (q *blockQueue) nextHeight() <-chan int64 {
  93. q.mtx.Lock()
  94. defer q.mtx.Unlock()
  95. ch := make(chan int64, 1)
  96. // if a previous process failed then we pick up this one
  97. if q.failed.Len() > 0 {
  98. failedHeight := heap.Pop(q.failed)
  99. ch <- failedHeight.(int64)
  100. close(ch)
  101. return ch
  102. }
  103. if q.terminal == nil && q.fetchHeight >= q.initialHeight {
  104. // return and decrement the fetch height
  105. ch <- q.fetchHeight
  106. q.fetchHeight--
  107. close(ch)
  108. return ch
  109. }
  110. // at this point there is no height that we know we need so we create a
  111. // waiter to hold out for either an outgoing request to fail or a block to
  112. // fail verification
  113. q.waiters = append(q.waiters, ch)
  114. return ch
  115. }
  116. // Finished returns true when the block queue has has all light blocks retrieved,
  117. // verified and stored. There is no more work left to be done
  118. func (q *blockQueue) done() <-chan struct{} {
  119. return q.doneCh
  120. }
  121. // VerifyNext pulls the next block off the pending queue and adds it to a
  122. // channel if it's already there or creates a waiter to add it to the
  123. // channel once it comes in. NOTE: This is assumed to
  124. // be a single thread as light blocks need to be sequentially verified.
  125. func (q *blockQueue) verifyNext() <-chan lightBlockResponse {
  126. q.mtx.Lock()
  127. defer q.mtx.Unlock()
  128. ch := make(chan lightBlockResponse, 1)
  129. select {
  130. case <-q.doneCh:
  131. return ch
  132. default:
  133. }
  134. if lb, ok := q.pending[q.verifyHeight]; ok {
  135. ch <- lb
  136. close(ch)
  137. delete(q.pending, q.verifyHeight)
  138. } else {
  139. q.verifyCh = ch
  140. }
  141. return ch
  142. }
  143. // Retry is called when a dispatcher failed to fetch a light block or the
  144. // fetched light block failed verification. It signals to the queue to add the
  145. // height back to the request queue
  146. func (q *blockQueue) retry(height int64) {
  147. q.mtx.Lock()
  148. defer q.mtx.Unlock()
  149. select {
  150. case <-q.doneCh:
  151. return
  152. default:
  153. }
  154. // we don't need to retry if this is below the terminal height
  155. if q.terminal != nil && height < q.terminal.Height {
  156. return
  157. }
  158. q.retries++
  159. if q.retries >= q.maxRetries {
  160. q._closeChannels()
  161. return
  162. }
  163. if len(q.waiters) > 0 {
  164. q.waiters[0] <- height
  165. close(q.waiters[0])
  166. q.waiters = q.waiters[1:]
  167. } else {
  168. heap.Push(q.failed, height)
  169. }
  170. }
  171. // Success is called when a light block has been successfully verified and
  172. // processed
  173. func (q *blockQueue) success(height int64) {
  174. q.mtx.Lock()
  175. defer q.mtx.Unlock()
  176. if q.terminal != nil && q.verifyHeight == q.terminal.Height {
  177. q._closeChannels()
  178. }
  179. q.verifyHeight--
  180. }
  181. func (q *blockQueue) error() error {
  182. q.mtx.Lock()
  183. defer q.mtx.Unlock()
  184. if q.retries >= q.maxRetries {
  185. return fmt.Errorf("max retries to fetch valid blocks exceeded (%d); "+
  186. "target height: %d, height reached: %d", q.maxRetries, q.stopHeight, q.verifyHeight)
  187. }
  188. return nil
  189. }
  190. // close the queue and respective channels
  191. func (q *blockQueue) close() {
  192. q.mtx.Lock()
  193. defer q.mtx.Unlock()
  194. q._closeChannels()
  195. }
  196. // CONTRACT: must have a write lock. Use close instead
  197. func (q *blockQueue) _closeChannels() {
  198. close(q.doneCh)
  199. // wait for the channel to be drained
  200. select {
  201. case <-q.doneCh:
  202. return
  203. default:
  204. }
  205. for _, ch := range q.waiters {
  206. close(ch)
  207. }
  208. if q.verifyCh != nil {
  209. close(q.verifyCh)
  210. }
  211. }
  212. // A max-heap of ints.
  213. type maxIntHeap []int64
  214. func (h maxIntHeap) Len() int { return len(h) }
  215. func (h maxIntHeap) Less(i, j int) bool { return h[i] < h[j] }
  216. func (h maxIntHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
  217. func (h *maxIntHeap) Push(x interface{}) {
  218. *h = append(*h, x.(int64))
  219. }
  220. func (h *maxIntHeap) Pop() interface{} {
  221. old := *h
  222. n := len(old)
  223. x := old[n-1]
  224. *h = old[0 : n-1]
  225. return x
  226. }