You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

399 lines
9.1 KiB

p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
  1. package p2p
  2. import (
  3. "fmt"
  4. "net"
  5. "time"
  6. cmn "github.com/tendermint/tendermint/libs/common"
  7. "github.com/tendermint/tendermint/libs/log"
  8. tmconn "github.com/tendermint/tendermint/p2p/conn"
  9. )
  10. const metricsTickerDuration = 10 * time.Second
  11. // Peer is an interface representing a peer connected on a reactor.
  12. type Peer interface {
  13. cmn.Service
  14. FlushStop()
  15. ID() ID // peer's cryptographic ID
  16. RemoteIP() net.IP // remote IP of the connection
  17. RemoteAddr() net.Addr // remote address of the connection
  18. IsOutbound() bool // did we dial the peer
  19. IsPersistent() bool // do we redial this peer when we disconnect
  20. CloseConn() error // close original connection
  21. NodeInfo() NodeInfo // peer's info
  22. Status() tmconn.ConnectionStatus
  23. SocketAddr() *NetAddress // actual address of the socket
  24. Send(byte, []byte) bool
  25. TrySend(byte, []byte) bool
  26. Set(string, interface{})
  27. Get(string) interface{}
  28. }
  29. //----------------------------------------------------------
  30. // peerConn contains the raw connection and its config.
  31. type peerConn struct {
  32. outbound bool
  33. persistent bool
  34. conn net.Conn // source connection
  35. socketAddr *NetAddress
  36. // cached RemoteIP()
  37. ip net.IP
  38. }
  39. func newPeerConn(
  40. outbound, persistent bool,
  41. conn net.Conn,
  42. socketAddr *NetAddress,
  43. ) peerConn {
  44. return peerConn{
  45. outbound: outbound,
  46. persistent: persistent,
  47. conn: conn,
  48. socketAddr: socketAddr,
  49. }
  50. }
  51. // ID only exists for SecretConnection.
  52. // NOTE: Will panic if conn is not *SecretConnection.
  53. func (pc peerConn) ID() ID {
  54. return PubKeyToID(pc.conn.(*tmconn.SecretConnection).RemotePubKey())
  55. }
  56. // Return the IP from the connection RemoteAddr
  57. func (pc peerConn) RemoteIP() net.IP {
  58. if pc.ip != nil {
  59. return pc.ip
  60. }
  61. host, _, err := net.SplitHostPort(pc.conn.RemoteAddr().String())
  62. if err != nil {
  63. panic(err)
  64. }
  65. ips, err := net.LookupIP(host)
  66. if err != nil {
  67. panic(err)
  68. }
  69. pc.ip = ips[0]
  70. return pc.ip
  71. }
  72. // peer implements Peer.
  73. //
  74. // Before using a peer, you will need to perform a handshake on connection.
  75. type peer struct {
  76. cmn.BaseService
  77. // raw peerConn and the multiplex connection
  78. peerConn
  79. mconn *tmconn.MConnection
  80. // peer's node info and the channel it knows about
  81. // channels = nodeInfo.Channels
  82. // cached to avoid copying nodeInfo in hasChannel
  83. nodeInfo NodeInfo
  84. channels []byte
  85. // User data
  86. Data *cmn.CMap
  87. metrics *Metrics
  88. metricsTicker *time.Ticker
  89. }
  90. type PeerOption func(*peer)
  91. func newPeer(
  92. pc peerConn,
  93. mConfig tmconn.MConnConfig,
  94. nodeInfo NodeInfo,
  95. reactorsByCh map[byte]Reactor,
  96. chDescs []*tmconn.ChannelDescriptor,
  97. onPeerError func(Peer, interface{}),
  98. options ...PeerOption,
  99. ) *peer {
  100. p := &peer{
  101. peerConn: pc,
  102. nodeInfo: nodeInfo,
  103. channels: nodeInfo.(DefaultNodeInfo).Channels, // TODO
  104. Data: cmn.NewCMap(),
  105. metricsTicker: time.NewTicker(metricsTickerDuration),
  106. metrics: NopMetrics(),
  107. }
  108. p.mconn = createMConnection(
  109. pc.conn,
  110. p,
  111. reactorsByCh,
  112. chDescs,
  113. onPeerError,
  114. mConfig,
  115. )
  116. p.BaseService = *cmn.NewBaseService(nil, "Peer", p)
  117. for _, option := range options {
  118. option(p)
  119. }
  120. return p
  121. }
  122. // String representation.
  123. func (p *peer) String() string {
  124. if p.outbound {
  125. return fmt.Sprintf("Peer{%v %v out}", p.mconn, p.ID())
  126. }
  127. return fmt.Sprintf("Peer{%v %v in}", p.mconn, p.ID())
  128. }
  129. //---------------------------------------------------
  130. // Implements cmn.Service
  131. // SetLogger implements BaseService.
  132. func (p *peer) SetLogger(l log.Logger) {
  133. p.Logger = l
  134. p.mconn.SetLogger(l)
  135. }
  136. // OnStart implements BaseService.
  137. func (p *peer) OnStart() error {
  138. if err := p.BaseService.OnStart(); err != nil {
  139. return err
  140. }
  141. if err := p.mconn.Start(); err != nil {
  142. return err
  143. }
  144. go p.metricsReporter()
  145. return nil
  146. }
  147. // FlushStop mimics OnStop but additionally ensures that all successful
  148. // .Send() calls will get flushed before closing the connection.
  149. // NOTE: it is not safe to call this method more than once.
  150. func (p *peer) FlushStop() {
  151. p.metricsTicker.Stop()
  152. p.BaseService.OnStop()
  153. p.mconn.FlushStop() // stop everything and close the conn
  154. }
  155. // OnStop implements BaseService.
  156. func (p *peer) OnStop() {
  157. p.metricsTicker.Stop()
  158. p.BaseService.OnStop()
  159. p.mconn.Stop() // stop everything and close the conn
  160. }
  161. //---------------------------------------------------
  162. // Implements Peer
  163. // ID returns the peer's ID - the hex encoded hash of its pubkey.
  164. func (p *peer) ID() ID {
  165. return p.nodeInfo.ID()
  166. }
  167. // IsOutbound returns true if the connection is outbound, false otherwise.
  168. func (p *peer) IsOutbound() bool {
  169. return p.peerConn.outbound
  170. }
  171. // IsPersistent returns true if the peer is persitent, false otherwise.
  172. func (p *peer) IsPersistent() bool {
  173. return p.peerConn.persistent
  174. }
  175. // NodeInfo returns a copy of the peer's NodeInfo.
  176. func (p *peer) NodeInfo() NodeInfo {
  177. return p.nodeInfo
  178. }
  179. // SocketAddr returns the address of the socket.
  180. // For outbound peers, it's the address dialed (after DNS resolution).
  181. // For inbound peers, it's the address returned by the underlying connection
  182. // (not what's reported in the peer's NodeInfo).
  183. func (p *peer) SocketAddr() *NetAddress {
  184. return p.peerConn.socketAddr
  185. }
  186. // Status returns the peer's ConnectionStatus.
  187. func (p *peer) Status() tmconn.ConnectionStatus {
  188. return p.mconn.Status()
  189. }
  190. // Send msg bytes to the channel identified by chID byte. Returns false if the
  191. // send queue is full after timeout, specified by MConnection.
  192. func (p *peer) Send(chID byte, msgBytes []byte) bool {
  193. if !p.IsRunning() {
  194. // see Switch#Broadcast, where we fetch the list of peers and loop over
  195. // them - while we're looping, one peer may be removed and stopped.
  196. return false
  197. } else if !p.hasChannel(chID) {
  198. return false
  199. }
  200. res := p.mconn.Send(chID, msgBytes)
  201. if res {
  202. labels := []string{
  203. "peer_id", string(p.ID()),
  204. "chID", fmt.Sprintf("%#x", chID),
  205. }
  206. p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  207. }
  208. return res
  209. }
  210. // TrySend msg bytes to the channel identified by chID byte. Immediately returns
  211. // false if the send queue is full.
  212. func (p *peer) TrySend(chID byte, msgBytes []byte) bool {
  213. if !p.IsRunning() {
  214. return false
  215. } else if !p.hasChannel(chID) {
  216. return false
  217. }
  218. res := p.mconn.TrySend(chID, msgBytes)
  219. if res {
  220. labels := []string{
  221. "peer_id", string(p.ID()),
  222. "chID", fmt.Sprintf("%#x", chID),
  223. }
  224. p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  225. }
  226. return res
  227. }
  228. // Get the data for a given key.
  229. func (p *peer) Get(key string) interface{} {
  230. return p.Data.Get(key)
  231. }
  232. // Set sets the data for the given key.
  233. func (p *peer) Set(key string, data interface{}) {
  234. p.Data.Set(key, data)
  235. }
  236. // hasChannel returns true if the peer reported
  237. // knowing about the given chID.
  238. func (p *peer) hasChannel(chID byte) bool {
  239. for _, ch := range p.channels {
  240. if ch == chID {
  241. return true
  242. }
  243. }
  244. // NOTE: probably will want to remove this
  245. // but could be helpful while the feature is new
  246. p.Logger.Debug(
  247. "Unknown channel for peer",
  248. "channel",
  249. chID,
  250. "channels",
  251. p.channels,
  252. )
  253. return false
  254. }
  255. // CloseConn closes original connection. Used for cleaning up in cases where the peer had not been started at all.
  256. func (p *peer) CloseConn() error {
  257. return p.peerConn.conn.Close()
  258. }
  259. //---------------------------------------------------
  260. // methods only used for testing
  261. // TODO: can we remove these?
  262. // CloseConn closes the underlying connection
  263. func (pc *peerConn) CloseConn() {
  264. pc.conn.Close() // nolint: errcheck
  265. }
  266. // RemoteAddr returns peer's remote network address.
  267. func (p *peer) RemoteAddr() net.Addr {
  268. return p.peerConn.conn.RemoteAddr()
  269. }
  270. // CanSend returns true if the send queue is not full, false otherwise.
  271. func (p *peer) CanSend(chID byte) bool {
  272. if !p.IsRunning() {
  273. return false
  274. }
  275. return p.mconn.CanSend(chID)
  276. }
  277. //---------------------------------------------------
  278. func PeerMetrics(metrics *Metrics) PeerOption {
  279. return func(p *peer) {
  280. p.metrics = metrics
  281. }
  282. }
  283. func (p *peer) metricsReporter() {
  284. for {
  285. select {
  286. case <-p.metricsTicker.C:
  287. status := p.mconn.Status()
  288. var sendQueueSize float64
  289. for _, chStatus := range status.Channels {
  290. sendQueueSize += float64(chStatus.SendQueueSize)
  291. }
  292. p.metrics.PeerPendingSendBytes.With("peer_id", string(p.ID())).Set(sendQueueSize)
  293. case <-p.Quit():
  294. return
  295. }
  296. }
  297. }
  298. //------------------------------------------------------------------
  299. // helper funcs
  300. func createMConnection(
  301. conn net.Conn,
  302. p *peer,
  303. reactorsByCh map[byte]Reactor,
  304. chDescs []*tmconn.ChannelDescriptor,
  305. onPeerError func(Peer, interface{}),
  306. config tmconn.MConnConfig,
  307. ) *tmconn.MConnection {
  308. onReceive := func(chID byte, msgBytes []byte) {
  309. reactor := reactorsByCh[chID]
  310. if reactor == nil {
  311. // Note that its ok to panic here as it's caught in the conn._recover,
  312. // which does onPeerError.
  313. panic(fmt.Sprintf("Unknown channel %X", chID))
  314. }
  315. labels := []string{
  316. "peer_id", string(p.ID()),
  317. "chID", fmt.Sprintf("%#x", chID),
  318. }
  319. p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  320. reactor.Receive(chID, p, msgBytes)
  321. }
  322. onError := func(r interface{}) {
  323. onPeerError(p, r)
  324. }
  325. return tmconn.NewMConnectionWithConfig(
  326. conn,
  327. chDescs,
  328. onReceive,
  329. onError,
  330. config,
  331. )
  332. }