You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

494 lines
11 KiB

p2p: Implement PeerTransport This is the implementation for the design described in ADR 12[0]. It's the first step of a larger refactor of the p2p package as tracked in interface bundling all concerns of low-level connection handling and isolating the rest of peer lifecycle management from the specifics of the low-level internet protocols. Even if the swappable implementation will never be utilised, already the isolation of conn related code in one place will help with the reasoning about execution path and addressation of security sensitive issues surfaced through bounty programs and audits. We deliberately decided to not have Peer filtering and other management in the Transport, its sole responsibility is the translation of connections to Peers, handing those to the caller fully setup. It's the responsibility of the caller to reject those and or keep track. Peer filtering will take place in the Switch and can be inspected in a the following commit. This changeset additionally is an exercise in clean separation of logic and other infrastructural concerns like logging and instrumentation. By leveraging a clean and minimal interface. How this looks can be seen in a follow-up change. Design #2069[2] Refs #2067[3] Fixes #2047[4] Fixes #2046[5] changes: * describe Transport interface * implement new default Transport: MultiplexTransport * test MultiplexTransport with new constraints * implement ConnSet for concurrent management of net.Conn, synchronous to PeerSet * implement and expose duplicate IP filter * implemnt TransportOption for optional parametirisation [0] https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-012-peer-transport.md [1] https://github.com/tendermint/tendermint/issues/2067 [2] https://github.com/tendermint/tendermint/pull/2069 [3] https://github.com/tendermint/tendermint/issues/2067 [4] https://github.com/tendermint/tendermint/issues/2047 [5] https://github.com/tendermint/tendermint/issues/2046
6 years ago
  1. package p2p
  2. import (
  3. "context"
  4. "fmt"
  5. "net"
  6. "time"
  7. "github.com/tendermint/tendermint/config"
  8. crypto "github.com/tendermint/tendermint/crypto"
  9. "github.com/tendermint/tendermint/p2p/conn"
  10. )
  11. const (
  12. defaultDialTimeout = time.Second
  13. defaultFilterTimeout = 5 * time.Second
  14. defaultHandshakeTimeout = 3 * time.Second
  15. )
  16. // IPResolver is a behaviour subset of net.Resolver.
  17. type IPResolver interface {
  18. LookupIPAddr(context.Context, string) ([]net.IPAddr, error)
  19. }
  20. // accept is the container to carry the upgraded connection and NodeInfo from an
  21. // asynchronously running routine to the Accept method.
  22. type accept struct {
  23. conn net.Conn
  24. nodeInfo NodeInfo
  25. err error
  26. }
  27. // peerConfig is used to bundle data we need to fully setup a Peer with an
  28. // MConn, provided by the caller of Accept and Dial (currently the Switch). This
  29. // a temporary measure until reactor setup is less dynamic and we introduce the
  30. // concept of PeerBehaviour to communicate about significant Peer lifecycle
  31. // events.
  32. // TODO(xla): Refactor out with more static Reactor setup and PeerBehaviour.
  33. type peerConfig struct {
  34. chDescs []*conn.ChannelDescriptor
  35. onPeerError func(Peer, interface{})
  36. outbound, persistent bool
  37. reactorsByCh map[byte]Reactor
  38. }
  39. // Transport emits and connects to Peers. The implementation of Peer is left to
  40. // the transport. Each transport is also responsible to filter establishing
  41. // peers specific to its domain.
  42. type Transport interface {
  43. // Accept returns a newly connected Peer.
  44. Accept(peerConfig) (Peer, error)
  45. // Dial connects to the Peer for the address.
  46. Dial(NetAddress, peerConfig) (Peer, error)
  47. }
  48. // transportLifecycle bundles the methods for callers to control start and stop
  49. // behaviour.
  50. type transportLifecycle interface {
  51. Close() error
  52. Listen(NetAddress) error
  53. }
  54. // ConnFilterFunc to be implemented by filter hooks after a new connection has
  55. // been established. The set of exisiting connections is passed along together
  56. // with all resolved IPs for the new connection.
  57. type ConnFilterFunc func(ConnSet, net.Conn, []net.IP) error
  58. // ConnDuplicateIPFilter resolves and keeps all ips for an incoming connection
  59. // and refuses new ones if they come from a known ip.
  60. func ConnDuplicateIPFilter() ConnFilterFunc {
  61. return func(cs ConnSet, c net.Conn, ips []net.IP) error {
  62. for _, ip := range ips {
  63. if cs.HasIP(ip) {
  64. return ErrRejected{
  65. conn: c,
  66. err: fmt.Errorf("IP<%v> already connected", ip),
  67. isDuplicate: true,
  68. }
  69. }
  70. }
  71. return nil
  72. }
  73. }
  74. // MultiplexTransportOption sets an optional parameter on the
  75. // MultiplexTransport.
  76. type MultiplexTransportOption func(*MultiplexTransport)
  77. // MultiplexTransportConnFilters sets the filters for rejection new connections.
  78. func MultiplexTransportConnFilters(
  79. filters ...ConnFilterFunc,
  80. ) MultiplexTransportOption {
  81. return func(mt *MultiplexTransport) { mt.connFilters = filters }
  82. }
  83. // MultiplexTransportFilterTimeout sets the timeout waited for filter calls to
  84. // return.
  85. func MultiplexTransportFilterTimeout(
  86. timeout time.Duration,
  87. ) MultiplexTransportOption {
  88. return func(mt *MultiplexTransport) { mt.filterTimeout = timeout }
  89. }
  90. // MultiplexTransportResolver sets the Resolver used for ip lokkups, defaults to
  91. // net.DefaultResolver.
  92. func MultiplexTransportResolver(resolver IPResolver) MultiplexTransportOption {
  93. return func(mt *MultiplexTransport) { mt.resolver = resolver }
  94. }
  95. // MultiplexTransport accepts and dials tcp connections and upgrades them to
  96. // multiplexed peers.
  97. type MultiplexTransport struct {
  98. listener net.Listener
  99. acceptc chan accept
  100. closec chan struct{}
  101. // Lookup table for duplicate ip and id checks.
  102. conns ConnSet
  103. connFilters []ConnFilterFunc
  104. dialTimeout time.Duration
  105. filterTimeout time.Duration
  106. handshakeTimeout time.Duration
  107. nodeInfo NodeInfo
  108. nodeKey NodeKey
  109. resolver IPResolver
  110. // TODO(xla): Those configs are still needed as we parameterise peerConn and
  111. // peer currently. All relevant configuration should be refactored into options
  112. // with sane defaults.
  113. mConfig conn.MConnConfig
  114. p2pConfig config.P2PConfig
  115. }
  116. // Test multiplexTransport for interface completeness.
  117. var _ Transport = (*MultiplexTransport)(nil)
  118. var _ transportLifecycle = (*MultiplexTransport)(nil)
  119. // NewMultiplexTransport returns a tcp connected multiplexed peer.
  120. func NewMultiplexTransport(
  121. nodeInfo NodeInfo,
  122. nodeKey NodeKey,
  123. ) *MultiplexTransport {
  124. return &MultiplexTransport{
  125. acceptc: make(chan accept),
  126. closec: make(chan struct{}),
  127. dialTimeout: defaultDialTimeout,
  128. filterTimeout: defaultFilterTimeout,
  129. handshakeTimeout: defaultHandshakeTimeout,
  130. mConfig: conn.DefaultMConnConfig(),
  131. nodeInfo: nodeInfo,
  132. nodeKey: nodeKey,
  133. conns: NewConnSet(),
  134. resolver: net.DefaultResolver,
  135. }
  136. }
  137. // Accept implements Transport.
  138. func (mt *MultiplexTransport) Accept(cfg peerConfig) (Peer, error) {
  139. select {
  140. // This case should never have any side-effectful/blocking operations to
  141. // ensure that quality peers are ready to be used.
  142. case a := <-mt.acceptc:
  143. if a.err != nil {
  144. return nil, a.err
  145. }
  146. cfg.outbound = false
  147. return mt.wrapPeer(a.conn, a.nodeInfo, cfg), nil
  148. case <-mt.closec:
  149. return nil, &ErrTransportClosed{}
  150. }
  151. }
  152. // Dial implements Transport.
  153. func (mt *MultiplexTransport) Dial(
  154. addr NetAddress,
  155. cfg peerConfig,
  156. ) (Peer, error) {
  157. c, err := addr.DialTimeout(mt.dialTimeout)
  158. if err != nil {
  159. return nil, err
  160. }
  161. // TODO(xla): Evaluate if we should apply filters if we explicitly dial.
  162. if err := mt.filterConn(c); err != nil {
  163. return nil, err
  164. }
  165. secretConn, nodeInfo, err := mt.upgrade(c)
  166. if err != nil {
  167. return nil, err
  168. }
  169. cfg.outbound = true
  170. p := mt.wrapPeer(secretConn, nodeInfo, cfg)
  171. return p, nil
  172. }
  173. // Close implements transportLifecycle.
  174. func (mt *MultiplexTransport) Close() error {
  175. close(mt.closec)
  176. return mt.listener.Close()
  177. }
  178. // Listen implements transportLifecycle.
  179. func (mt *MultiplexTransport) Listen(addr NetAddress) error {
  180. ln, err := net.Listen("tcp", addr.DialString())
  181. if err != nil {
  182. return err
  183. }
  184. mt.listener = ln
  185. go mt.acceptPeers()
  186. return nil
  187. }
  188. func (mt *MultiplexTransport) acceptPeers() {
  189. for {
  190. c, err := mt.listener.Accept()
  191. if err != nil {
  192. // If Close() has been called, silently exit.
  193. select {
  194. case _, ok := <-mt.closec:
  195. if !ok {
  196. return
  197. }
  198. default:
  199. // Transport is not closed
  200. }
  201. mt.acceptc <- accept{err: err}
  202. return
  203. }
  204. // Connection upgrade and filtering should be asynchronous to avoid
  205. // Head-of-line blocking[0].
  206. // Reference: https://github.com/tendermint/tendermint/issues/2047
  207. //
  208. // [0] https://en.wikipedia.org/wiki/Head-of-line_blocking
  209. go func(c net.Conn) {
  210. var (
  211. nodeInfo NodeInfo
  212. secretConn *conn.SecretConnection
  213. )
  214. err := mt.filterConn(c)
  215. if err == nil {
  216. secretConn, nodeInfo, err = mt.upgrade(c)
  217. }
  218. select {
  219. case mt.acceptc <- accept{secretConn, nodeInfo, err}:
  220. // Make the upgraded peer available.
  221. case <-mt.closec:
  222. // Give up if the transport was closed.
  223. _ = c.Close()
  224. return
  225. }
  226. }(c)
  227. }
  228. }
  229. func (mt *MultiplexTransport) cleanup(c net.Conn) error {
  230. mt.conns.Remove(c)
  231. return c.Close()
  232. }
  233. func (mt *MultiplexTransport) filterConn(c net.Conn) (err error) {
  234. defer func() {
  235. if err != nil {
  236. _ = c.Close()
  237. }
  238. }()
  239. // Reject if connection is already present.
  240. if mt.conns.Has(c) {
  241. return ErrRejected{conn: c, isDuplicate: true}
  242. }
  243. // Resolve ips for incoming conn.
  244. ips, err := resolveIPs(mt.resolver, c)
  245. if err != nil {
  246. return err
  247. }
  248. errc := make(chan error, len(mt.connFilters))
  249. for _, f := range mt.connFilters {
  250. go func(f ConnFilterFunc, c net.Conn, ips []net.IP, errc chan<- error) {
  251. errc <- f(mt.conns, c, ips)
  252. }(f, c, ips, errc)
  253. }
  254. for i := 0; i < cap(errc); i++ {
  255. select {
  256. case err := <-errc:
  257. if err != nil {
  258. return ErrRejected{conn: c, err: err, isFiltered: true}
  259. }
  260. case <-time.After(mt.filterTimeout):
  261. return ErrFilterTimeout{}
  262. }
  263. }
  264. mt.conns.Set(c, ips)
  265. return nil
  266. }
  267. func (mt *MultiplexTransport) upgrade(
  268. c net.Conn,
  269. ) (secretConn *conn.SecretConnection, nodeInfo NodeInfo, err error) {
  270. defer func() {
  271. if err != nil {
  272. _ = mt.cleanup(c)
  273. }
  274. }()
  275. secretConn, err = upgradeSecretConn(c, mt.handshakeTimeout, mt.nodeKey.PrivKey)
  276. if err != nil {
  277. return nil, NodeInfo{}, ErrRejected{
  278. conn: c,
  279. err: fmt.Errorf("secrect conn failed: %v", err),
  280. isAuthFailure: true,
  281. }
  282. }
  283. nodeInfo, err = handshake(secretConn, mt.handshakeTimeout, mt.nodeInfo)
  284. if err != nil {
  285. return nil, NodeInfo{}, ErrRejected{
  286. conn: c,
  287. err: fmt.Errorf("handshake failed: %v", err),
  288. isAuthFailure: true,
  289. }
  290. }
  291. if err := nodeInfo.Validate(); err != nil {
  292. return nil, NodeInfo{}, ErrRejected{
  293. conn: c,
  294. err: err,
  295. isNodeInfoInvalid: true,
  296. }
  297. }
  298. // Ensure connection key matches self reported key.
  299. if connID := PubKeyToID(secretConn.RemotePubKey()); connID != nodeInfo.ID {
  300. return nil, NodeInfo{}, ErrRejected{
  301. conn: c,
  302. id: connID,
  303. err: fmt.Errorf(
  304. "conn.ID (%v) NodeInfo.ID (%v) missmatch",
  305. connID,
  306. nodeInfo.ID,
  307. ),
  308. isAuthFailure: true,
  309. }
  310. }
  311. // Reject self.
  312. if mt.nodeInfo.ID == nodeInfo.ID {
  313. return nil, NodeInfo{}, ErrRejected{
  314. addr: *NewNetAddress(nodeInfo.ID, c.RemoteAddr()),
  315. conn: c,
  316. id: nodeInfo.ID,
  317. isSelf: true,
  318. }
  319. }
  320. if err := mt.nodeInfo.CompatibleWith(nodeInfo); err != nil {
  321. return nil, NodeInfo{}, ErrRejected{
  322. conn: c,
  323. err: err,
  324. id: nodeInfo.ID,
  325. isIncompatible: true,
  326. }
  327. }
  328. return secretConn, nodeInfo, nil
  329. }
  330. func (mt *MultiplexTransport) wrapPeer(
  331. c net.Conn,
  332. ni NodeInfo,
  333. cfg peerConfig,
  334. ) Peer {
  335. p := newPeer(
  336. peerConn{
  337. conn: c,
  338. config: &mt.p2pConfig,
  339. outbound: cfg.outbound,
  340. persistent: cfg.persistent,
  341. },
  342. mt.mConfig,
  343. ni,
  344. cfg.reactorsByCh,
  345. cfg.chDescs,
  346. cfg.onPeerError,
  347. )
  348. // Wait for Peer to Stop so we can cleanup.
  349. go func(c net.Conn) {
  350. <-p.Quit()
  351. _ = mt.cleanup(c)
  352. }(c)
  353. return p
  354. }
  355. func handshake(
  356. c net.Conn,
  357. timeout time.Duration,
  358. nodeInfo NodeInfo,
  359. ) (NodeInfo, error) {
  360. if err := c.SetDeadline(time.Now().Add(timeout)); err != nil {
  361. return NodeInfo{}, err
  362. }
  363. var (
  364. errc = make(chan error, 2)
  365. peerNodeInfo NodeInfo
  366. )
  367. go func(errc chan<- error, c net.Conn) {
  368. _, err := cdc.MarshalBinaryWriter(c, nodeInfo)
  369. errc <- err
  370. }(errc, c)
  371. go func(errc chan<- error, c net.Conn) {
  372. _, err := cdc.UnmarshalBinaryReader(
  373. c,
  374. &peerNodeInfo,
  375. int64(MaxNodeInfoSize()),
  376. )
  377. errc <- err
  378. }(errc, c)
  379. for i := 0; i < cap(errc); i++ {
  380. err := <-errc
  381. if err != nil {
  382. return NodeInfo{}, err
  383. }
  384. }
  385. return peerNodeInfo, c.SetDeadline(time.Time{})
  386. }
  387. func upgradeSecretConn(
  388. c net.Conn,
  389. timeout time.Duration,
  390. privKey crypto.PrivKey,
  391. ) (*conn.SecretConnection, error) {
  392. if err := c.SetDeadline(time.Now().Add(timeout)); err != nil {
  393. return nil, err
  394. }
  395. sc, err := conn.MakeSecretConnection(c, privKey)
  396. if err != nil {
  397. return nil, err
  398. }
  399. return sc, sc.SetDeadline(time.Time{})
  400. }
  401. func resolveIPs(resolver IPResolver, c net.Conn) ([]net.IP, error) {
  402. host, _, err := net.SplitHostPort(c.RemoteAddr().String())
  403. if err != nil {
  404. return nil, err
  405. }
  406. addrs, err := resolver.LookupIPAddr(context.Background(), host)
  407. if err != nil {
  408. return nil, err
  409. }
  410. ips := []net.IP{}
  411. for _, addr := range addrs {
  412. ips = append(ips, addr.IP)
  413. }
  414. return ips, nil
  415. }