You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

436 lines
10 KiB

  1. package consensus
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "reflect"
  10. "strconv"
  11. "strings"
  12. "time"
  13. . "github.com/tendermint/go-common"
  14. "github.com/tendermint/go-wire"
  15. sm "github.com/tendermint/tendermint/state"
  16. "github.com/tendermint/tendermint/tailseek"
  17. "github.com/tendermint/tendermint/types"
  18. )
  19. //--------------------------------------------------------
  20. // types and functions for savings consensus messages
  21. type ConsensusLogMessage struct {
  22. Time time.Time `json:"time"`
  23. Msg ConsensusLogMessageInterface `json:"msg"`
  24. }
  25. type ConsensusLogMessageInterface interface{}
  26. var _ = wire.RegisterInterface(
  27. struct{ ConsensusLogMessageInterface }{},
  28. wire.ConcreteType{&types.EventDataRoundState{}, 0x01},
  29. wire.ConcreteType{msgInfo{}, 0x02},
  30. wire.ConcreteType{timeoutInfo{}, 0x03},
  31. )
  32. // called in newStep and for each pass in receiveRoutine
  33. func (cs *ConsensusState) saveMsg(msg ConsensusLogMessageInterface) {
  34. if cs.msgLogFP != nil {
  35. var n int
  36. var err error
  37. wire.WriteJSON(ConsensusLogMessage{time.Now(), msg}, cs.msgLogFP, &n, &err)
  38. wire.WriteTo([]byte("\n"), cs.msgLogFP, &n, &err) // one message per line
  39. if err != nil {
  40. log.Error("Error writing to consensus message log file", "err", err, "msg", msg)
  41. }
  42. }
  43. }
  44. //--------------------------------------------------------
  45. // replay the messages
  46. // Interactive playback
  47. func (cs ConsensusState) ReplayConsole(file string) error {
  48. return cs.replay(file, true)
  49. }
  50. // Full playback, with tests
  51. func (cs ConsensusState) ReplayMessages(file string) error {
  52. return cs.replay(file, false)
  53. }
  54. func (cs *ConsensusState) catchupReplay(height int) error {
  55. if !cs.msgLogExists {
  56. return nil
  57. }
  58. if cs.msgLogFP == nil {
  59. log.Warn("consensus msg log is nil")
  60. return nil
  61. }
  62. log.Notice("Catchup by replaying consensus messages")
  63. f := cs.msgLogFP
  64. n, err := seek.SeekFromEndOfFile(f, func(lineBytes []byte) bool {
  65. var err error
  66. var msg ConsensusLogMessage
  67. wire.ReadJSON(&msg, lineBytes, &err)
  68. if err != nil {
  69. panic(Fmt("Failed to read cs_msg_log json: %v", err))
  70. }
  71. m, ok := msg.Msg.(*types.EventDataRoundState)
  72. if ok && m.Step == RoundStepNewHeight.String() {
  73. r, err := f.Seek(0, 1)
  74. // TODO: ensure the height matches
  75. return true
  76. }
  77. return false
  78. })
  79. if err != nil {
  80. return err
  81. }
  82. // we found it, now we can replay everything
  83. pb := newPlayback("", cs.msgLogFP, cs, nil, cs.state.Copy())
  84. reader := bufio.NewReader(cs.msgLogFP)
  85. i := 0
  86. for {
  87. i += 1
  88. msgBytes, err := reader.ReadBytes('\n')
  89. if err == io.EOF {
  90. break
  91. } else if err != nil {
  92. return err
  93. } else if len(msgBytes) == 0 {
  94. continue
  95. }
  96. // the first msg is the NewHeight event, so we can ignore it
  97. if i == 1 {
  98. continue
  99. }
  100. // NOTE: since the priv key is set when the msgs are received
  101. // it will attempt to eg double sign but we can just ignore it
  102. // since the votes will be replayed and we'll get to the next step
  103. if err := pb.readReplayMessage(msgBytes); err != nil {
  104. return err
  105. }
  106. if i >= n {
  107. break
  108. }
  109. }
  110. return nil
  111. }
  112. // replay all msgs or start the console
  113. func (cs *ConsensusState) replay(file string, console bool) error {
  114. if cs.IsRunning() {
  115. return errors.New("cs is already running, cannot replay")
  116. }
  117. cs.startForReplay()
  118. // set the FP to nil so we don't overwrite
  119. if cs.msgLogFP != nil {
  120. cs.msgLogFP.Close()
  121. cs.msgLogFP = nil
  122. }
  123. // ensure all new step events are regenerated as expected
  124. newStepCh := cs.evsw.SubscribeToEvent("replay-test", types.EventStringNewRoundStep(), 1)
  125. fp, err := os.OpenFile(file, os.O_RDONLY, 0666)
  126. if err != nil {
  127. return err
  128. }
  129. pb := newPlayback(file, fp, cs, newStepCh, cs.state.Copy())
  130. defer pb.fp.Close()
  131. var nextN int // apply N msgs in a row
  132. for pb.scanner.Scan() {
  133. if nextN == 0 && console {
  134. nextN = pb.replayConsoleLoop()
  135. }
  136. if err := pb.readReplayMessage(pb.scanner.Bytes()); err != nil {
  137. return err
  138. }
  139. if nextN > 0 {
  140. nextN -= 1
  141. }
  142. pb.count += 1
  143. }
  144. return nil
  145. }
  146. //------------------------------------------------
  147. // playback manager
  148. type playback struct {
  149. cs *ConsensusState
  150. file string
  151. fp *os.File
  152. scanner *bufio.Scanner
  153. newStepCh chan interface{}
  154. genesisState *sm.State
  155. count int
  156. }
  157. func newPlayback(file string, fp *os.File, cs *ConsensusState, ch chan interface{}, genState *sm.State) *playback {
  158. return &playback{
  159. cs: cs,
  160. file: file,
  161. newStepCh: ch,
  162. genesisState: genState,
  163. fp: fp,
  164. scanner: bufio.NewScanner(fp),
  165. }
  166. }
  167. // go back count steps by resetting the state and running (pb.count - count) steps
  168. func (pb *playback) replayReset(count int) error {
  169. pb.cs.Stop()
  170. newCs := NewConsensusState(pb.genesisState.Copy(), pb.cs.proxyAppCtx, pb.cs.blockStore, pb.cs.mempool)
  171. newCs.SetEventSwitch(pb.cs.evsw)
  172. // ensure all new step events are regenerated as expected
  173. pb.newStepCh = newCs.evsw.SubscribeToEvent("replay-test", types.EventStringNewRoundStep(), 1)
  174. newCs.startForReplay()
  175. pb.fp.Close()
  176. fp, err := os.OpenFile(pb.file, os.O_RDONLY, 0666)
  177. if err != nil {
  178. return err
  179. }
  180. pb.fp = fp
  181. pb.scanner = bufio.NewScanner(fp)
  182. count = pb.count - count
  183. log.Notice(Fmt("Reseting from %d to %d", pb.count, count))
  184. pb.count = 0
  185. pb.cs = newCs
  186. for i := 0; pb.scanner.Scan() && i < count; i++ {
  187. if err := pb.readReplayMessage(pb.scanner.Bytes()); err != nil {
  188. return err
  189. }
  190. pb.count += 1
  191. }
  192. return nil
  193. }
  194. func (cs *ConsensusState) startForReplay() {
  195. cs.BaseService.OnStart()
  196. go cs.receiveRoutine(0)
  197. // since we replay tocks we just ignore ticks
  198. go func() {
  199. for {
  200. select {
  201. case <-cs.tickChan:
  202. case <-cs.Quit:
  203. return
  204. }
  205. }
  206. }()
  207. }
  208. // console function for parsing input and running commands
  209. func (pb *playback) replayConsoleLoop() int {
  210. for {
  211. fmt.Printf("> ")
  212. bufReader := bufio.NewReader(os.Stdin)
  213. line, more, err := bufReader.ReadLine()
  214. if more {
  215. Exit("input is too long")
  216. } else if err != nil {
  217. Exit(err.Error())
  218. }
  219. tokens := strings.Split(string(line), " ")
  220. if len(tokens) == 0 {
  221. continue
  222. }
  223. switch tokens[0] {
  224. case "next":
  225. // "next" -> replay next message
  226. // "next N" -> replay next N messages
  227. if len(tokens) == 1 {
  228. return 0
  229. } else {
  230. i, err := strconv.Atoi(tokens[1])
  231. if err != nil {
  232. fmt.Println("next takes an integer argument")
  233. } else {
  234. return i
  235. }
  236. }
  237. case "back":
  238. // "back" -> go back one message
  239. // "back N" -> go back N messages
  240. // NOTE: "back" is not supported in the state machine design,
  241. // so we restart and replay up to
  242. if len(tokens) == 1 {
  243. pb.replayReset(1)
  244. } else {
  245. i, err := strconv.Atoi(tokens[1])
  246. if err != nil {
  247. fmt.Println("back takes an integer argument")
  248. } else if i > pb.count {
  249. fmt.Printf("argument to back must not be larger than the current count (%d)\n", pb.count)
  250. } else {
  251. pb.replayReset(i)
  252. }
  253. }
  254. case "rs":
  255. // "rs" -> print entire round state
  256. // "rs short" -> print height/round/step
  257. // "rs <field>" -> print another field of the round state
  258. rs := pb.cs.RoundState
  259. if len(tokens) == 1 {
  260. fmt.Println(rs)
  261. } else {
  262. switch tokens[1] {
  263. case "short":
  264. fmt.Printf("%v/%v/%v\n", rs.Height, rs.Round, rs.Step)
  265. case "validators":
  266. fmt.Println(rs.Validators)
  267. case "proposal":
  268. fmt.Println(rs.Proposal)
  269. case "proposal_block":
  270. fmt.Printf("%v %v\n", rs.ProposalBlockParts.StringShort(), rs.ProposalBlock.StringShort())
  271. case "locked_round":
  272. fmt.Println(rs.LockedRound)
  273. case "locked_block":
  274. fmt.Printf("%v %v\n", rs.LockedBlockParts.StringShort(), rs.LockedBlock.StringShort())
  275. case "votes":
  276. fmt.Println(rs.Votes.StringIndented(" "))
  277. default:
  278. fmt.Println("Unknown option", tokens[1])
  279. }
  280. }
  281. case "n":
  282. fmt.Println(pb.count)
  283. }
  284. }
  285. return 0
  286. }
  287. func (pb *playback) readReplayMessage(msgBytes []byte) error {
  288. var err error
  289. var msg ConsensusLogMessage
  290. wire.ReadJSON(&msg, msgBytes, &err)
  291. if err != nil {
  292. return fmt.Errorf("Error reading json data: %v", err)
  293. }
  294. // for logging
  295. switch m := msg.Msg.(type) {
  296. case *types.EventDataRoundState:
  297. log.Notice("New Step", "height", m.Height, "round", m.Round, "step", m.Step)
  298. // these are playback checks
  299. ticker := time.After(time.Second * 2)
  300. if pb.newStepCh != nil {
  301. select {
  302. case mi := <-pb.newStepCh:
  303. m2 := mi.(*types.EventDataRoundState)
  304. if m.Height != m2.Height || m.Round != m2.Round || m.Step != m2.Step {
  305. return fmt.Errorf("RoundState mismatch. Got %v; Expected %v", m2, m)
  306. }
  307. case <-ticker:
  308. return fmt.Errorf("Failed to read off newStepCh")
  309. }
  310. }
  311. case msgInfo:
  312. peerKey := m.PeerKey
  313. if peerKey == "" {
  314. peerKey = "local"
  315. }
  316. switch msg := m.Msg.(type) {
  317. case *ProposalMessage:
  318. p := msg.Proposal
  319. log.Notice("Proposal", "height", p.Height, "round", p.Round, "header",
  320. p.BlockPartsHeader, "pol", p.POLRound, "peer", peerKey)
  321. case *BlockPartMessage:
  322. log.Notice("BlockPart", "height", msg.Height, "round", msg.Round, "peer", peerKey)
  323. case *VoteMessage:
  324. v := msg.Vote
  325. log.Notice("Vote", "height", v.Height, "round", v.Round, "type", v.Type,
  326. "hash", v.BlockHash, "header", v.BlockPartsHeader, "peer", peerKey)
  327. }
  328. // internal or from peer
  329. if m.PeerKey == "" {
  330. pb.cs.internalMsgQueue <- m
  331. } else {
  332. pb.cs.peerMsgQueue <- m
  333. }
  334. case timeoutInfo:
  335. log.Notice("Timeout", "height", m.Height, "round", m.Round, "step", m.Step, "dur", m.Duration)
  336. pb.cs.tockChan <- m
  337. default:
  338. return fmt.Errorf("Unknown ConsensusLogMessage type: %v", reflect.TypeOf(msg.Msg))
  339. }
  340. return nil
  341. }
  342. // Read lines starting from the end of the file until we read a line that causes found to return true
  343. func SeekFromEndOfFile(f *os.File, found func([]byte) bool) (nLines int, err error) {
  344. var current int64
  345. // start at the end
  346. current, err = f.Seek(0, 2)
  347. if err != nil {
  348. fmt.Println("1")
  349. return
  350. }
  351. // backup until we find the the right line
  352. for {
  353. current -= 1
  354. if current < 0 {
  355. return
  356. }
  357. // backup one and read a new byte
  358. if _, err = f.Seek(current, 0); err != nil {
  359. fmt.Println("2", current)
  360. return
  361. }
  362. b := make([]byte, 1)
  363. if _, err = f.Read(b); err != nil {
  364. return
  365. }
  366. if b[0] == '\n' || len(b) == 0 {
  367. nLines += 1
  368. // read a full line
  369. reader := bufio.NewReader(f)
  370. lineBytes, _ := reader.ReadBytes('\n')
  371. if len(lineBytes) == 0 {
  372. continue
  373. }
  374. if found(lineBytes) {
  375. f.Seek(current, 0)
  376. return
  377. }
  378. }
  379. }
  380. }