You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

388 lines
8.8 KiB

p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
  1. package p2p
  2. import (
  3. "fmt"
  4. "net"
  5. "time"
  6. cmn "github.com/tendermint/tendermint/libs/common"
  7. "github.com/tendermint/tendermint/libs/log"
  8. tmconn "github.com/tendermint/tendermint/p2p/conn"
  9. )
  10. const metricsTickerDuration = 10 * time.Second
  11. // Peer is an interface representing a peer connected on a reactor.
  12. type Peer interface {
  13. cmn.Service
  14. FlushStop()
  15. ID() ID // peer's cryptographic ID
  16. RemoteIP() net.IP // remote IP of the connection
  17. RemoteAddr() net.Addr // remote address of the connection
  18. IsOutbound() bool // did we dial the peer
  19. IsPersistent() bool // do we redial this peer when we disconnect
  20. CloseConn() error // close original connection
  21. NodeInfo() NodeInfo // peer's info
  22. Status() tmconn.ConnectionStatus
  23. OriginalAddr() *NetAddress // original address for outbound peers
  24. Send(byte, []byte) bool
  25. TrySend(byte, []byte) bool
  26. Set(string, interface{})
  27. Get(string) interface{}
  28. }
  29. //----------------------------------------------------------
  30. // peerConn contains the raw connection and its config.
  31. type peerConn struct {
  32. outbound bool
  33. persistent bool
  34. conn net.Conn // source connection
  35. originalAddr *NetAddress // nil for inbound connections
  36. // cached RemoteIP()
  37. ip net.IP
  38. }
  39. func newPeerConn(
  40. outbound, persistent bool,
  41. conn net.Conn,
  42. originalAddr *NetAddress,
  43. ) peerConn {
  44. return peerConn{
  45. outbound: outbound,
  46. persistent: persistent,
  47. conn: conn,
  48. originalAddr: originalAddr,
  49. }
  50. }
  51. // ID only exists for SecretConnection.
  52. // NOTE: Will panic if conn is not *SecretConnection.
  53. func (pc peerConn) ID() ID {
  54. return PubKeyToID(pc.conn.(*tmconn.SecretConnection).RemotePubKey())
  55. }
  56. // Return the IP from the connection RemoteAddr
  57. func (pc peerConn) RemoteIP() net.IP {
  58. if pc.ip != nil {
  59. return pc.ip
  60. }
  61. host, _, err := net.SplitHostPort(pc.conn.RemoteAddr().String())
  62. if err != nil {
  63. panic(err)
  64. }
  65. ips, err := net.LookupIP(host)
  66. if err != nil {
  67. panic(err)
  68. }
  69. pc.ip = ips[0]
  70. return pc.ip
  71. }
  72. // peer implements Peer.
  73. //
  74. // Before using a peer, you will need to perform a handshake on connection.
  75. type peer struct {
  76. cmn.BaseService
  77. // raw peerConn and the multiplex connection
  78. peerConn
  79. mconn *tmconn.MConnection
  80. // peer's node info and the channel it knows about
  81. // channels = nodeInfo.Channels
  82. // cached to avoid copying nodeInfo in hasChannel
  83. nodeInfo NodeInfo
  84. channels []byte
  85. // User data
  86. Data *cmn.CMap
  87. metrics *Metrics
  88. metricsTicker *time.Ticker
  89. }
  90. type PeerOption func(*peer)
  91. func newPeer(
  92. pc peerConn,
  93. mConfig tmconn.MConnConfig,
  94. nodeInfo NodeInfo,
  95. reactorsByCh map[byte]Reactor,
  96. chDescs []*tmconn.ChannelDescriptor,
  97. onPeerError func(Peer, interface{}),
  98. options ...PeerOption,
  99. ) *peer {
  100. p := &peer{
  101. peerConn: pc,
  102. nodeInfo: nodeInfo,
  103. channels: nodeInfo.(DefaultNodeInfo).Channels, // TODO
  104. Data: cmn.NewCMap(),
  105. metricsTicker: time.NewTicker(metricsTickerDuration),
  106. metrics: NopMetrics(),
  107. }
  108. p.mconn = createMConnection(
  109. pc.conn,
  110. p,
  111. reactorsByCh,
  112. chDescs,
  113. onPeerError,
  114. mConfig,
  115. )
  116. p.BaseService = *cmn.NewBaseService(nil, "Peer", p)
  117. for _, option := range options {
  118. option(p)
  119. }
  120. return p
  121. }
  122. // String representation.
  123. func (p *peer) String() string {
  124. if p.outbound {
  125. return fmt.Sprintf("Peer{%v %v out}", p.mconn, p.ID())
  126. }
  127. return fmt.Sprintf("Peer{%v %v in}", p.mconn, p.ID())
  128. }
  129. //---------------------------------------------------
  130. // Implements cmn.Service
  131. // SetLogger implements BaseService.
  132. func (p *peer) SetLogger(l log.Logger) {
  133. p.Logger = l
  134. p.mconn.SetLogger(l)
  135. }
  136. // OnStart implements BaseService.
  137. func (p *peer) OnStart() error {
  138. if err := p.BaseService.OnStart(); err != nil {
  139. return err
  140. }
  141. if err := p.mconn.Start(); err != nil {
  142. return err
  143. }
  144. go p.metricsReporter()
  145. return nil
  146. }
  147. // FlushStop mimics OnStop but additionally ensures that all successful
  148. // .Send() calls will get flushed before closing the connection.
  149. // NOTE: it is not safe to call this method more than once.
  150. func (p *peer) FlushStop() {
  151. p.metricsTicker.Stop()
  152. p.BaseService.OnStop()
  153. p.mconn.FlushStop() // stop everything and close the conn
  154. }
  155. // OnStop implements BaseService.
  156. func (p *peer) OnStop() {
  157. p.metricsTicker.Stop()
  158. p.BaseService.OnStop()
  159. p.mconn.Stop() // stop everything and close the conn
  160. }
  161. //---------------------------------------------------
  162. // Implements Peer
  163. // ID returns the peer's ID - the hex encoded hash of its pubkey.
  164. func (p *peer) ID() ID {
  165. return p.nodeInfo.ID()
  166. }
  167. // IsOutbound returns true if the connection is outbound, false otherwise.
  168. func (p *peer) IsOutbound() bool {
  169. return p.peerConn.outbound
  170. }
  171. // IsPersistent returns true if the peer is persitent, false otherwise.
  172. func (p *peer) IsPersistent() bool {
  173. return p.peerConn.persistent
  174. }
  175. // NodeInfo returns a copy of the peer's NodeInfo.
  176. func (p *peer) NodeInfo() NodeInfo {
  177. return p.nodeInfo
  178. }
  179. // OriginalAddr returns the original address, which was used to connect with
  180. // the peer. Returns nil for inbound peers.
  181. func (p *peer) OriginalAddr() *NetAddress {
  182. if p.peerConn.outbound {
  183. return p.peerConn.originalAddr
  184. }
  185. return nil
  186. }
  187. // Status returns the peer's ConnectionStatus.
  188. func (p *peer) Status() tmconn.ConnectionStatus {
  189. return p.mconn.Status()
  190. }
  191. // Send msg bytes to the channel identified by chID byte. Returns false if the
  192. // send queue is full after timeout, specified by MConnection.
  193. func (p *peer) Send(chID byte, msgBytes []byte) bool {
  194. if !p.IsRunning() {
  195. // see Switch#Broadcast, where we fetch the list of peers and loop over
  196. // them - while we're looping, one peer may be removed and stopped.
  197. return false
  198. } else if !p.hasChannel(chID) {
  199. return false
  200. }
  201. res := p.mconn.Send(chID, msgBytes)
  202. if res {
  203. p.metrics.PeerSendBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
  204. }
  205. return res
  206. }
  207. // TrySend msg bytes to the channel identified by chID byte. Immediately returns
  208. // false if the send queue is full.
  209. func (p *peer) TrySend(chID byte, msgBytes []byte) bool {
  210. if !p.IsRunning() {
  211. return false
  212. } else if !p.hasChannel(chID) {
  213. return false
  214. }
  215. res := p.mconn.TrySend(chID, msgBytes)
  216. if res {
  217. p.metrics.PeerSendBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
  218. }
  219. return res
  220. }
  221. // Get the data for a given key.
  222. func (p *peer) Get(key string) interface{} {
  223. return p.Data.Get(key)
  224. }
  225. // Set sets the data for the given key.
  226. func (p *peer) Set(key string, data interface{}) {
  227. p.Data.Set(key, data)
  228. }
  229. // hasChannel returns true if the peer reported
  230. // knowing about the given chID.
  231. func (p *peer) hasChannel(chID byte) bool {
  232. for _, ch := range p.channels {
  233. if ch == chID {
  234. return true
  235. }
  236. }
  237. // NOTE: probably will want to remove this
  238. // but could be helpful while the feature is new
  239. p.Logger.Debug(
  240. "Unknown channel for peer",
  241. "channel",
  242. chID,
  243. "channels",
  244. p.channels,
  245. )
  246. return false
  247. }
  248. // CloseConn closes original connection. Used for cleaning up in cases where the peer had not been started at all.
  249. func (p *peer) CloseConn() error {
  250. return p.peerConn.conn.Close()
  251. }
  252. //---------------------------------------------------
  253. // methods only used for testing
  254. // TODO: can we remove these?
  255. // CloseConn closes the underlying connection
  256. func (pc *peerConn) CloseConn() {
  257. pc.conn.Close() // nolint: errcheck
  258. }
  259. // RemoteAddr returns peer's remote network address.
  260. func (p *peer) RemoteAddr() net.Addr {
  261. return p.peerConn.conn.RemoteAddr()
  262. }
  263. // CanSend returns true if the send queue is not full, false otherwise.
  264. func (p *peer) CanSend(chID byte) bool {
  265. if !p.IsRunning() {
  266. return false
  267. }
  268. return p.mconn.CanSend(chID)
  269. }
  270. //---------------------------------------------------
  271. func PeerMetrics(metrics *Metrics) PeerOption {
  272. return func(p *peer) {
  273. p.metrics = metrics
  274. }
  275. }
  276. func (p *peer) metricsReporter() {
  277. for {
  278. select {
  279. case <-p.metricsTicker.C:
  280. status := p.mconn.Status()
  281. var sendQueueSize float64
  282. for _, chStatus := range status.Channels {
  283. sendQueueSize += float64(chStatus.SendQueueSize)
  284. }
  285. p.metrics.PeerPendingSendBytes.With("peer_id", string(p.ID())).Set(sendQueueSize)
  286. case <-p.Quit():
  287. return
  288. }
  289. }
  290. }
  291. //------------------------------------------------------------------
  292. // helper funcs
  293. func createMConnection(
  294. conn net.Conn,
  295. p *peer,
  296. reactorsByCh map[byte]Reactor,
  297. chDescs []*tmconn.ChannelDescriptor,
  298. onPeerError func(Peer, interface{}),
  299. config tmconn.MConnConfig,
  300. ) *tmconn.MConnection {
  301. onReceive := func(chID byte, msgBytes []byte) {
  302. reactor := reactorsByCh[chID]
  303. if reactor == nil {
  304. // Note that its ok to panic here as it's caught in the conn._recover,
  305. // which does onPeerError.
  306. panic(fmt.Sprintf("Unknown channel %X", chID))
  307. }
  308. p.metrics.PeerReceiveBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
  309. reactor.Receive(chID, p, msgBytes)
  310. }
  311. onError := func(r interface{}) {
  312. onPeerError(p, r)
  313. }
  314. return tmconn.NewMConnectionWithConfig(
  315. conn,
  316. chDescs,
  317. onReceive,
  318. onError,
  319. config,
  320. )
  321. }