You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

402 lines
9.2 KiB

p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
p2p: file descriptor leaks (#3150) * close peer's connection to avoid fd leak Fixes #2967 * rename peer#Addr to RemoteAddr * fix test * fixes after Ethan's review * bring back the check * changelog entry * write a test for switch#acceptRoutine * increase timeouts? :( * remove extra assertNPeersWithTimeout * simplify test * assert number of peers (just to be safe) * Cleanup in OnStop * run tests with verbose flag on CircleCI * spawn a reading routine to prevent connection from closing * get port from the listener random port is faster, but often results in ``` panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered] panic: listen tcp 127.0.0.1:44068: bind: address already in use goroutine 79 [running]: testing.tRunner.func1(0xc0001bd600) /usr/local/go/src/testing/testing.go:792 +0x387 panic(0x974d20, 0xc0001b0500) /usr/local/go/src/runtime/panic.go:513 +0x1b9 github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28 github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9 github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600) /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58 testing.tRunner(0xc0001bd600, 0xb42038) /usr/local/go/src/testing/testing.go:827 +0xbf created by testing.(*T).Run /usr/local/go/src/testing/testing.go:878 +0x353 exit status 2 FAIL github.com/tendermint/tendermint/p2p 0.350s ```
6 years ago
  1. package p2p
  2. import (
  3. "fmt"
  4. "net"
  5. "time"
  6. "github.com/tendermint/tendermint/libs/cmap"
  7. "github.com/tendermint/tendermint/libs/log"
  8. "github.com/tendermint/tendermint/libs/service"
  9. tmconn "github.com/tendermint/tendermint/p2p/conn"
  10. )
  11. //go:generate mockery -case underscore -name Peer
  12. const metricsTickerDuration = 10 * time.Second
  13. // Peer is an interface representing a peer connected on a reactor.
  14. type Peer interface {
  15. service.Service
  16. FlushStop()
  17. ID() ID // peer's cryptographic ID
  18. RemoteIP() net.IP // remote IP of the connection
  19. RemoteAddr() net.Addr // remote address of the connection
  20. IsOutbound() bool // did we dial the peer
  21. IsPersistent() bool // do we redial this peer when we disconnect
  22. CloseConn() error // close original connection
  23. NodeInfo() NodeInfo // peer's info
  24. Status() tmconn.ConnectionStatus
  25. SocketAddr() *NetAddress // actual address of the socket
  26. Send(byte, []byte) bool
  27. TrySend(byte, []byte) bool
  28. Set(string, interface{})
  29. Get(string) interface{}
  30. }
  31. //----------------------------------------------------------
  32. // peerConn contains the raw connection and its config.
  33. type peerConn struct {
  34. outbound bool
  35. persistent bool
  36. conn net.Conn // source connection
  37. socketAddr *NetAddress
  38. // cached RemoteIP()
  39. ip net.IP
  40. }
  41. func newPeerConn(
  42. outbound, persistent bool,
  43. conn net.Conn,
  44. socketAddr *NetAddress,
  45. ) peerConn {
  46. return peerConn{
  47. outbound: outbound,
  48. persistent: persistent,
  49. conn: conn,
  50. socketAddr: socketAddr,
  51. }
  52. }
  53. // ID only exists for SecretConnection.
  54. // NOTE: Will panic if conn is not *SecretConnection.
  55. func (pc peerConn) ID() ID {
  56. return PubKeyToID(pc.conn.(*tmconn.SecretConnection).RemotePubKey())
  57. }
  58. // Return the IP from the connection RemoteAddr
  59. func (pc peerConn) RemoteIP() net.IP {
  60. if pc.ip != nil {
  61. return pc.ip
  62. }
  63. host, _, err := net.SplitHostPort(pc.conn.RemoteAddr().String())
  64. if err != nil {
  65. panic(err)
  66. }
  67. ips, err := net.LookupIP(host)
  68. if err != nil {
  69. panic(err)
  70. }
  71. pc.ip = ips[0]
  72. return pc.ip
  73. }
  74. // peer implements Peer.
  75. //
  76. // Before using a peer, you will need to perform a handshake on connection.
  77. type peer struct {
  78. service.BaseService
  79. // raw peerConn and the multiplex connection
  80. peerConn
  81. mconn *tmconn.MConnection
  82. // peer's node info and the channel it knows about
  83. // channels = nodeInfo.Channels
  84. // cached to avoid copying nodeInfo in hasChannel
  85. nodeInfo NodeInfo
  86. channels []byte
  87. // User data
  88. Data *cmap.CMap
  89. metrics *Metrics
  90. metricsTicker *time.Ticker
  91. }
  92. type PeerOption func(*peer)
  93. func newPeer(
  94. pc peerConn,
  95. mConfig tmconn.MConnConfig,
  96. nodeInfo NodeInfo,
  97. reactorsByCh map[byte]Reactor,
  98. chDescs []*tmconn.ChannelDescriptor,
  99. onPeerError func(Peer, interface{}),
  100. options ...PeerOption,
  101. ) *peer {
  102. p := &peer{
  103. peerConn: pc,
  104. nodeInfo: nodeInfo,
  105. channels: nodeInfo.(DefaultNodeInfo).Channels, // TODO
  106. Data: cmap.NewCMap(),
  107. metricsTicker: time.NewTicker(metricsTickerDuration),
  108. metrics: NopMetrics(),
  109. }
  110. p.mconn = createMConnection(
  111. pc.conn,
  112. p,
  113. reactorsByCh,
  114. chDescs,
  115. onPeerError,
  116. mConfig,
  117. )
  118. p.BaseService = *service.NewBaseService(nil, "Peer", p)
  119. for _, option := range options {
  120. option(p)
  121. }
  122. return p
  123. }
  124. // String representation.
  125. func (p *peer) String() string {
  126. if p.outbound {
  127. return fmt.Sprintf("Peer{%v %v out}", p.mconn, p.ID())
  128. }
  129. return fmt.Sprintf("Peer{%v %v in}", p.mconn, p.ID())
  130. }
  131. //---------------------------------------------------
  132. // Implements service.Service
  133. // SetLogger implements BaseService.
  134. func (p *peer) SetLogger(l log.Logger) {
  135. p.Logger = l
  136. p.mconn.SetLogger(l)
  137. }
  138. // OnStart implements BaseService.
  139. func (p *peer) OnStart() error {
  140. if err := p.BaseService.OnStart(); err != nil {
  141. return err
  142. }
  143. if err := p.mconn.Start(); err != nil {
  144. return err
  145. }
  146. go p.metricsReporter()
  147. return nil
  148. }
  149. // FlushStop mimics OnStop but additionally ensures that all successful
  150. // .Send() calls will get flushed before closing the connection.
  151. // NOTE: it is not safe to call this method more than once.
  152. func (p *peer) FlushStop() {
  153. p.metricsTicker.Stop()
  154. p.BaseService.OnStop()
  155. p.mconn.FlushStop() // stop everything and close the conn
  156. }
  157. // OnStop implements BaseService.
  158. func (p *peer) OnStop() {
  159. p.metricsTicker.Stop()
  160. p.BaseService.OnStop()
  161. p.mconn.Stop() // stop everything and close the conn
  162. }
  163. //---------------------------------------------------
  164. // Implements Peer
  165. // ID returns the peer's ID - the hex encoded hash of its pubkey.
  166. func (p *peer) ID() ID {
  167. return p.nodeInfo.ID()
  168. }
  169. // IsOutbound returns true if the connection is outbound, false otherwise.
  170. func (p *peer) IsOutbound() bool {
  171. return p.peerConn.outbound
  172. }
  173. // IsPersistent returns true if the peer is persitent, false otherwise.
  174. func (p *peer) IsPersistent() bool {
  175. return p.peerConn.persistent
  176. }
  177. // NodeInfo returns a copy of the peer's NodeInfo.
  178. func (p *peer) NodeInfo() NodeInfo {
  179. return p.nodeInfo
  180. }
  181. // SocketAddr returns the address of the socket.
  182. // For outbound peers, it's the address dialed (after DNS resolution).
  183. // For inbound peers, it's the address returned by the underlying connection
  184. // (not what's reported in the peer's NodeInfo).
  185. func (p *peer) SocketAddr() *NetAddress {
  186. return p.peerConn.socketAddr
  187. }
  188. // Status returns the peer's ConnectionStatus.
  189. func (p *peer) Status() tmconn.ConnectionStatus {
  190. return p.mconn.Status()
  191. }
  192. // Send msg bytes to the channel identified by chID byte. Returns false if the
  193. // send queue is full after timeout, specified by MConnection.
  194. func (p *peer) Send(chID byte, msgBytes []byte) bool {
  195. if !p.IsRunning() {
  196. // see Switch#Broadcast, where we fetch the list of peers and loop over
  197. // them - while we're looping, one peer may be removed and stopped.
  198. return false
  199. } else if !p.hasChannel(chID) {
  200. return false
  201. }
  202. res := p.mconn.Send(chID, msgBytes)
  203. if res {
  204. labels := []string{
  205. "peer_id", string(p.ID()),
  206. "chID", fmt.Sprintf("%#x", chID),
  207. }
  208. p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  209. }
  210. return res
  211. }
  212. // TrySend msg bytes to the channel identified by chID byte. Immediately returns
  213. // false if the send queue is full.
  214. func (p *peer) TrySend(chID byte, msgBytes []byte) bool {
  215. if !p.IsRunning() {
  216. return false
  217. } else if !p.hasChannel(chID) {
  218. return false
  219. }
  220. res := p.mconn.TrySend(chID, msgBytes)
  221. if res {
  222. labels := []string{
  223. "peer_id", string(p.ID()),
  224. "chID", fmt.Sprintf("%#x", chID),
  225. }
  226. p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  227. }
  228. return res
  229. }
  230. // Get the data for a given key.
  231. func (p *peer) Get(key string) interface{} {
  232. return p.Data.Get(key)
  233. }
  234. // Set sets the data for the given key.
  235. func (p *peer) Set(key string, data interface{}) {
  236. p.Data.Set(key, data)
  237. }
  238. // hasChannel returns true if the peer reported
  239. // knowing about the given chID.
  240. func (p *peer) hasChannel(chID byte) bool {
  241. for _, ch := range p.channels {
  242. if ch == chID {
  243. return true
  244. }
  245. }
  246. // NOTE: probably will want to remove this
  247. // but could be helpful while the feature is new
  248. p.Logger.Debug(
  249. "Unknown channel for peer",
  250. "channel",
  251. chID,
  252. "channels",
  253. p.channels,
  254. )
  255. return false
  256. }
  257. // CloseConn closes original connection. Used for cleaning up in cases where the peer had not been started at all.
  258. func (p *peer) CloseConn() error {
  259. return p.peerConn.conn.Close()
  260. }
  261. //---------------------------------------------------
  262. // methods only used for testing
  263. // TODO: can we remove these?
  264. // CloseConn closes the underlying connection
  265. func (pc *peerConn) CloseConn() {
  266. pc.conn.Close() // nolint: errcheck
  267. }
  268. // RemoteAddr returns peer's remote network address.
  269. func (p *peer) RemoteAddr() net.Addr {
  270. return p.peerConn.conn.RemoteAddr()
  271. }
  272. // CanSend returns true if the send queue is not full, false otherwise.
  273. func (p *peer) CanSend(chID byte) bool {
  274. if !p.IsRunning() {
  275. return false
  276. }
  277. return p.mconn.CanSend(chID)
  278. }
  279. //---------------------------------------------------
  280. func PeerMetrics(metrics *Metrics) PeerOption {
  281. return func(p *peer) {
  282. p.metrics = metrics
  283. }
  284. }
  285. func (p *peer) metricsReporter() {
  286. for {
  287. select {
  288. case <-p.metricsTicker.C:
  289. status := p.mconn.Status()
  290. var sendQueueSize float64
  291. for _, chStatus := range status.Channels {
  292. sendQueueSize += float64(chStatus.SendQueueSize)
  293. }
  294. p.metrics.PeerPendingSendBytes.With("peer_id", string(p.ID())).Set(sendQueueSize)
  295. case <-p.Quit():
  296. return
  297. }
  298. }
  299. }
  300. //------------------------------------------------------------------
  301. // helper funcs
  302. func createMConnection(
  303. conn net.Conn,
  304. p *peer,
  305. reactorsByCh map[byte]Reactor,
  306. chDescs []*tmconn.ChannelDescriptor,
  307. onPeerError func(Peer, interface{}),
  308. config tmconn.MConnConfig,
  309. ) *tmconn.MConnection {
  310. onReceive := func(chID byte, msgBytes []byte) {
  311. reactor := reactorsByCh[chID]
  312. if reactor == nil {
  313. // Note that its ok to panic here as it's caught in the conn._recover,
  314. // which does onPeerError.
  315. panic(fmt.Sprintf("Unknown channel %X", chID))
  316. }
  317. labels := []string{
  318. "peer_id", string(p.ID()),
  319. "chID", fmt.Sprintf("%#x", chID),
  320. }
  321. p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
  322. reactor.Receive(chID, p, msgBytes)
  323. }
  324. onError := func(r interface{}) {
  325. onPeerError(p, r)
  326. }
  327. return tmconn.NewMConnectionWithConfig(
  328. conn,
  329. chDescs,
  330. onReceive,
  331. onError,
  332. config,
  333. )
  334. }