You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

720 lines
21 KiB

7 years ago
7 years ago
pubsub 2.0 (#3227) * green pubsub tests :OK: * get rid of clientToQueryMap * Subscribe and SubscribeUnbuffered * start adapting other pkgs to new pubsub * nope * rename MsgAndTags to Message * remove TagMap it does not bring any additional benefits * bring back EventSubscriber * fix test * fix data race in TestStartNextHeightCorrectly ``` Write at 0x00c0001c7418 by goroutine 796: github.com/tendermint/tendermint/consensus.TestStartNextHeightCorrectly() /go/src/github.com/tendermint/tendermint/consensus/state_test.go:1296 +0xad testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 Previous read at 0x00c0001c7418 by goroutine 858: github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote() /go/src/github.com/tendermint/tendermint/consensus/state.go:1631 +0x1366 github.com/tendermint/tendermint/consensus.(*ConsensusState).tryAddVote() /go/src/github.com/tendermint/tendermint/consensus/state.go:1476 +0x8f github.com/tendermint/tendermint/consensus.(*ConsensusState).handleMsg() /go/src/github.com/tendermint/tendermint/consensus/state.go:667 +0xa1e github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine() /go/src/github.com/tendermint/tendermint/consensus/state.go:628 +0x794 Goroutine 796 (running) created at: testing.(*T).Run() /usr/local/go/src/testing/testing.go:878 +0x659 testing.runTests.func1() /usr/local/go/src/testing/testing.go:1119 +0xa8 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 testing.runTests() /usr/local/go/src/testing/testing.go:1117 +0x4ee testing.(*M).Run() /usr/local/go/src/testing/testing.go:1034 +0x2ee main.main() _testmain.go:214 +0x332 Goroutine 858 (running) created at: github.com/tendermint/tendermint/consensus.(*ConsensusState).startRoutines() /go/src/github.com/tendermint/tendermint/consensus/state.go:334 +0x221 github.com/tendermint/tendermint/consensus.startTestRound() /go/src/github.com/tendermint/tendermint/consensus/common_test.go:122 +0x63 github.com/tendermint/tendermint/consensus.TestStateFullRound1() /go/src/github.com/tendermint/tendermint/consensus/state_test.go:255 +0x397 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 ``` * fixes after my own review * fix formatting * wait 100ms before kicking a subscriber out + a test for indexer_service * fixes after my second review * no timeout * add changelog entries * fix merge conflicts * fix typos after Thane's review Co-Authored-By: melekes <anton.kalyaev@gmail.com> * reformat code * rewrite indexer service in the attempt to fix failing test https://github.com/tendermint/tendermint/pull/3227/#issuecomment-462316527 * Revert "rewrite indexer service in the attempt to fix failing test" This reverts commit 0d9107a098230de7138abb1c201877c246e89ed1. * another attempt to fix indexer * fixes after Ethan's review * use unbuffered channel when indexing transactions Refs https://github.com/tendermint/tendermint/pull/3227#discussion_r258786716 * add a comment for EventBus#SubscribeUnbuffered * format code
5 years ago
pubsub 2.0 (#3227) * green pubsub tests :OK: * get rid of clientToQueryMap * Subscribe and SubscribeUnbuffered * start adapting other pkgs to new pubsub * nope * rename MsgAndTags to Message * remove TagMap it does not bring any additional benefits * bring back EventSubscriber * fix test * fix data race in TestStartNextHeightCorrectly ``` Write at 0x00c0001c7418 by goroutine 796: github.com/tendermint/tendermint/consensus.TestStartNextHeightCorrectly() /go/src/github.com/tendermint/tendermint/consensus/state_test.go:1296 +0xad testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 Previous read at 0x00c0001c7418 by goroutine 858: github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote() /go/src/github.com/tendermint/tendermint/consensus/state.go:1631 +0x1366 github.com/tendermint/tendermint/consensus.(*ConsensusState).tryAddVote() /go/src/github.com/tendermint/tendermint/consensus/state.go:1476 +0x8f github.com/tendermint/tendermint/consensus.(*ConsensusState).handleMsg() /go/src/github.com/tendermint/tendermint/consensus/state.go:667 +0xa1e github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine() /go/src/github.com/tendermint/tendermint/consensus/state.go:628 +0x794 Goroutine 796 (running) created at: testing.(*T).Run() /usr/local/go/src/testing/testing.go:878 +0x659 testing.runTests.func1() /usr/local/go/src/testing/testing.go:1119 +0xa8 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 testing.runTests() /usr/local/go/src/testing/testing.go:1117 +0x4ee testing.(*M).Run() /usr/local/go/src/testing/testing.go:1034 +0x2ee main.main() _testmain.go:214 +0x332 Goroutine 858 (running) created at: github.com/tendermint/tendermint/consensus.(*ConsensusState).startRoutines() /go/src/github.com/tendermint/tendermint/consensus/state.go:334 +0x221 github.com/tendermint/tendermint/consensus.startTestRound() /go/src/github.com/tendermint/tendermint/consensus/common_test.go:122 +0x63 github.com/tendermint/tendermint/consensus.TestStateFullRound1() /go/src/github.com/tendermint/tendermint/consensus/state_test.go:255 +0x397 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 ``` * fixes after my own review * fix formatting * wait 100ms before kicking a subscriber out + a test for indexer_service * fixes after my second review * no timeout * add changelog entries * fix merge conflicts * fix typos after Thane's review Co-Authored-By: melekes <anton.kalyaev@gmail.com> * reformat code * rewrite indexer service in the attempt to fix failing test https://github.com/tendermint/tendermint/pull/3227/#issuecomment-462316527 * Revert "rewrite indexer service in the attempt to fix failing test" This reverts commit 0d9107a098230de7138abb1c201877c246e89ed1. * another attempt to fix indexer * fixes after Ethan's review * use unbuffered channel when indexing transactions Refs https://github.com/tendermint/tendermint/pull/3227#discussion_r258786716 * add a comment for EventBus#SubscribeUnbuffered * format code
5 years ago
6 years ago
fix non deterministic test failures and race in privval socket (#3258) * node: decrease retry conn timeout in test Should fix #3256 The retry timeout was set to the default, which is the same as the accept timeout, so it's no wonder this would fail. Here we decrease the retry timeout so we can try many times before the accept timeout. * p2p: increase handshake timeout in test This fails sometimes, presumably because the handshake timeout is so low (only 50ms). So increase it to 1s. Should fix #3187 * privval: fix race with ping. closes #3237 Pings happen in a go-routine and can happen concurrently with other messages. Since we use a request/response protocol, we expect to send a request and get back the corresponding response. But with pings happening concurrently, this assumption could be violated. We were using a mutex, but only a RWMutex, where the RLock was being held for sending messages - this was to allow the underlying connection to be replaced if it fails. Turns out we actually need to use a full lock (not just a read lock) to prevent multiple requests from happening concurrently. * node: fix test name. DelayedStop -> DelayedStart * autofile: Wait() method In the TestWALTruncate in consensus/wal_test.go we remove the WAL directory at the end of the test. However the wal.Stop() does not properly wait for the autofile group to finish shutting down. Hence it was possible that the group's go-routine is still running when the cleanup happens, which causes a panic since the directory disappeared. Here we add a Wait() method to properly wait until the go-routine exits so we can safely clean up. This fixes #2852.
5 years ago
fix non deterministic test failures and race in privval socket (#3258) * node: decrease retry conn timeout in test Should fix #3256 The retry timeout was set to the default, which is the same as the accept timeout, so it's no wonder this would fail. Here we decrease the retry timeout so we can try many times before the accept timeout. * p2p: increase handshake timeout in test This fails sometimes, presumably because the handshake timeout is so low (only 50ms). So increase it to 1s. Should fix #3187 * privval: fix race with ping. closes #3237 Pings happen in a go-routine and can happen concurrently with other messages. Since we use a request/response protocol, we expect to send a request and get back the corresponding response. But with pings happening concurrently, this assumption could be violated. We were using a mutex, but only a RWMutex, where the RLock was being held for sending messages - this was to allow the underlying connection to be replaced if it fails. Turns out we actually need to use a full lock (not just a read lock) to prevent multiple requests from happening concurrently. * node: fix test name. DelayedStop -> DelayedStart * autofile: Wait() method In the TestWALTruncate in consensus/wal_test.go we remove the WAL directory at the end of the test. However the wal.Stop() does not properly wait for the autofile group to finish shutting down. Hence it was possible that the group's go-routine is still running when the cleanup happens, which causes a panic since the directory disappeared. Here we add a Wait() method to properly wait until the go-routine exits so we can safely clean up. This fixes #2852.
5 years ago
Close and retry a RemoteSigner on err (#2923) * Close and recreate a RemoteSigner on err * Update changelog * Address Anton's comments / suggestions: - update changelog - restart TCPVal - shut down on `ErrUnexpectedResponse` * re-init remote signer client with fresh connection if Ping fails - add/update TODOs in secret connection - rename tcp.go -> tcp_client.go, same with ipc to clarify their purpose * account for `conn returned by waitConnection can be `nil` - also add TODO about RemoteSigner conn field * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn - add rwmutex for conn field in IPC * comments and doc.go * fix ipc tests. fixes #2677 * use constants for tests * cleanup some error statements * fixes #2784, race in tests * remove print statement * minor fixes from review * update comment on sts spec * cosmetics * p2p/conn: add failing tests * p2p/conn: make SecretConnection thread safe * changelog * IPCVal signer refactor - use a .reset() method - don't use embedded RemoteSignerClient - guard RemoteSignerClient with mutex - drop the .conn - expose Close() on RemoteSignerClient * apply IPCVal refactor to TCPVal * remove mtx from RemoteSignerClient * consolidate IPCVal and TCPVal, fixes #3104 - done in tcp_client.go - now called SocketVal - takes a listener in the constructor - make tcpListener and unixListener contain all the differences * delete ipc files * introduce unix and tcp dialer for RemoteSigner * rename files - drop tcp_ prefix - rename priv_validator.go to file.go * bring back listener options * fix node * fix priv_val_server * fix node test * minor cleanup and comments
6 years ago
privval: refactor Remote signers (#3370) This PR is related to #3107 and a continuation of #3351 It is important to emphasise that in the privval original design, client/server and listening/dialing roles are inverted and do not follow a conventional interaction. Given two hosts A and B: Host A is listener/client Host B is dialer/server (contains the secret key) When A requires a signature, it needs to wait for B to dial in before it can issue a request. A only accepts a single connection and any failure leads to dropping the connection and waiting for B to reconnect. The original rationale behind this design was based on security. Host B only allows outbound connections to a list of whitelisted hosts. It is not possible to reach B unless B dials in. There are no listening/open ports in B. This PR results in the following changes: Refactors ping/heartbeat to avoid previously existing race conditions. Separates transport (dialer/listener) from signing (client/server) concerns to simplify workflow. Unifies and abstracts away the differences between unix and tcp sockets. A single signer endpoint implementation unifies connection handling code (read/write/close/connection obj) The signer request handler (server side) is customizable to increase testability. Updates and extends unit tests A high level overview of the classes is as follows: Transport (endpoints): The following classes take care of establishing a connection SignerDialerEndpoint SignerListeningEndpoint SignerEndpoint groups common functionality (read/write/timeouts/etc.) Signing (client/server): The following classes take care of exchanging request/responses SignerClient SignerServer This PR also closes #3601 Commits: * refactoring - work in progress * reworking unit tests * Encapsulating and fixing unit tests * Improve tests * Clean up * Fix/improve unit tests * clean up tests * Improving service endpoint * fixing unit test * fix linter issues * avoid invalid cache values (improve later?) * complete implementation * wip * improved connection loop * Improve reconnections + fixing unit tests * addressing comments * small formatting changes * clean up * Update node/node.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * check during initialization * dropping connecting when writing fails * removing break * use t.log instead * unifying and using cmn.GetFreePort() * review fixes * reordering and unifying drop connection * closing instead of signalling * refactored service loop * removed superfluous brackets * GetPubKey can return errors * Revert "GetPubKey can return errors" This reverts commit 68c06f19b4650389d7e5ab1659b318889028202c. * adding entry to changelog * Update CHANGELOG_PENDING.md Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * updating node.go * review fixes * fixes linter * fixing unit test * small fixes in comments * addressing review comments * addressing review comments 2 * reverting suggestion * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * do not expose brokenSignerDialerEndpoint * clean up logging * unifying methods shorten test time signer also drops * reenabling pings * improving testability + unit test * fixing go fmt + unit test * remove unused code * Addressing review comments * simplifying connection workflow * fix linter/go import issue * using base service quit * updating comment * Simplifying design + adjusting names * fixing linter issues * refactoring test harness + fixes * Addressing review comments * cleaning up * adding additional error check
5 years ago
privval: refactor Remote signers (#3370) This PR is related to #3107 and a continuation of #3351 It is important to emphasise that in the privval original design, client/server and listening/dialing roles are inverted and do not follow a conventional interaction. Given two hosts A and B: Host A is listener/client Host B is dialer/server (contains the secret key) When A requires a signature, it needs to wait for B to dial in before it can issue a request. A only accepts a single connection and any failure leads to dropping the connection and waiting for B to reconnect. The original rationale behind this design was based on security. Host B only allows outbound connections to a list of whitelisted hosts. It is not possible to reach B unless B dials in. There are no listening/open ports in B. This PR results in the following changes: Refactors ping/heartbeat to avoid previously existing race conditions. Separates transport (dialer/listener) from signing (client/server) concerns to simplify workflow. Unifies and abstracts away the differences between unix and tcp sockets. A single signer endpoint implementation unifies connection handling code (read/write/close/connection obj) The signer request handler (server side) is customizable to increase testability. Updates and extends unit tests A high level overview of the classes is as follows: Transport (endpoints): The following classes take care of establishing a connection SignerDialerEndpoint SignerListeningEndpoint SignerEndpoint groups common functionality (read/write/timeouts/etc.) Signing (client/server): The following classes take care of exchanging request/responses SignerClient SignerServer This PR also closes #3601 Commits: * refactoring - work in progress * reworking unit tests * Encapsulating and fixing unit tests * Improve tests * Clean up * Fix/improve unit tests * clean up tests * Improving service endpoint * fixing unit test * fix linter issues * avoid invalid cache values (improve later?) * complete implementation * wip * improved connection loop * Improve reconnections + fixing unit tests * addressing comments * small formatting changes * clean up * Update node/node.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * check during initialization * dropping connecting when writing fails * removing break * use t.log instead * unifying and using cmn.GetFreePort() * review fixes * reordering and unifying drop connection * closing instead of signalling * refactored service loop * removed superfluous brackets * GetPubKey can return errors * Revert "GetPubKey can return errors" This reverts commit 68c06f19b4650389d7e5ab1659b318889028202c. * adding entry to changelog * Update CHANGELOG_PENDING.md Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * updating node.go * review fixes * fixes linter * fixing unit test * small fixes in comments * addressing review comments * addressing review comments 2 * reverting suggestion * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * do not expose brokenSignerDialerEndpoint * clean up logging * unifying methods shorten test time signer also drops * reenabling pings * improving testability + unit test * fixing go fmt + unit test * remove unused code * Addressing review comments * simplifying connection workflow * fix linter/go import issue * using base service quit * updating comment * Simplifying design + adjusting names * fixing linter issues * refactoring test harness + fixes * Addressing review comments * cleaning up * adding additional error check
5 years ago
Close and retry a RemoteSigner on err (#2923) * Close and recreate a RemoteSigner on err * Update changelog * Address Anton's comments / suggestions: - update changelog - restart TCPVal - shut down on `ErrUnexpectedResponse` * re-init remote signer client with fresh connection if Ping fails - add/update TODOs in secret connection - rename tcp.go -> tcp_client.go, same with ipc to clarify their purpose * account for `conn returned by waitConnection can be `nil` - also add TODO about RemoteSigner conn field * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn - add rwmutex for conn field in IPC * comments and doc.go * fix ipc tests. fixes #2677 * use constants for tests * cleanup some error statements * fixes #2784, race in tests * remove print statement * minor fixes from review * update comment on sts spec * cosmetics * p2p/conn: add failing tests * p2p/conn: make SecretConnection thread safe * changelog * IPCVal signer refactor - use a .reset() method - don't use embedded RemoteSignerClient - guard RemoteSignerClient with mutex - drop the .conn - expose Close() on RemoteSignerClient * apply IPCVal refactor to TCPVal * remove mtx from RemoteSignerClient * consolidate IPCVal and TCPVal, fixes #3104 - done in tcp_client.go - now called SocketVal - takes a listener in the constructor - make tcpListener and unixListener contain all the differences * delete ipc files * introduce unix and tcp dialer for RemoteSigner * rename files - drop tcp_ prefix - rename priv_validator.go to file.go * bring back listener options * fix node * fix priv_val_server * fix node test * minor cleanup and comments
6 years ago
privval: refactor Remote signers (#3370) This PR is related to #3107 and a continuation of #3351 It is important to emphasise that in the privval original design, client/server and listening/dialing roles are inverted and do not follow a conventional interaction. Given two hosts A and B: Host A is listener/client Host B is dialer/server (contains the secret key) When A requires a signature, it needs to wait for B to dial in before it can issue a request. A only accepts a single connection and any failure leads to dropping the connection and waiting for B to reconnect. The original rationale behind this design was based on security. Host B only allows outbound connections to a list of whitelisted hosts. It is not possible to reach B unless B dials in. There are no listening/open ports in B. This PR results in the following changes: Refactors ping/heartbeat to avoid previously existing race conditions. Separates transport (dialer/listener) from signing (client/server) concerns to simplify workflow. Unifies and abstracts away the differences between unix and tcp sockets. A single signer endpoint implementation unifies connection handling code (read/write/close/connection obj) The signer request handler (server side) is customizable to increase testability. Updates and extends unit tests A high level overview of the classes is as follows: Transport (endpoints): The following classes take care of establishing a connection SignerDialerEndpoint SignerListeningEndpoint SignerEndpoint groups common functionality (read/write/timeouts/etc.) Signing (client/server): The following classes take care of exchanging request/responses SignerClient SignerServer This PR also closes #3601 Commits: * refactoring - work in progress * reworking unit tests * Encapsulating and fixing unit tests * Improve tests * Clean up * Fix/improve unit tests * clean up tests * Improving service endpoint * fixing unit test * fix linter issues * avoid invalid cache values (improve later?) * complete implementation * wip * improved connection loop * Improve reconnections + fixing unit tests * addressing comments * small formatting changes * clean up * Update node/node.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * check during initialization * dropping connecting when writing fails * removing break * use t.log instead * unifying and using cmn.GetFreePort() * review fixes * reordering and unifying drop connection * closing instead of signalling * refactored service loop * removed superfluous brackets * GetPubKey can return errors * Revert "GetPubKey can return errors" This reverts commit 68c06f19b4650389d7e5ab1659b318889028202c. * adding entry to changelog * Update CHANGELOG_PENDING.md Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * updating node.go * review fixes * fixes linter * fixing unit test * small fixes in comments * addressing review comments * addressing review comments 2 * reverting suggestion * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * do not expose brokenSignerDialerEndpoint * clean up logging * unifying methods shorten test time signer also drops * reenabling pings * improving testability + unit test * fixing go fmt + unit test * remove unused code * Addressing review comments * simplifying connection workflow * fix linter/go import issue * using base service quit * updating comment * Simplifying design + adjusting names * fixing linter issues * refactoring test harness + fixes * Addressing review comments * cleaning up * adding additional error check
5 years ago
Close and retry a RemoteSigner on err (#2923) * Close and recreate a RemoteSigner on err * Update changelog * Address Anton's comments / suggestions: - update changelog - restart TCPVal - shut down on `ErrUnexpectedResponse` * re-init remote signer client with fresh connection if Ping fails - add/update TODOs in secret connection - rename tcp.go -> tcp_client.go, same with ipc to clarify their purpose * account for `conn returned by waitConnection can be `nil` - also add TODO about RemoteSigner conn field * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn * Tests for retrying: IPC / TCP - shorter info log on success - set conn and use it in tests to close conn - add rwmutex for conn field in IPC * comments and doc.go * fix ipc tests. fixes #2677 * use constants for tests * cleanup some error statements * fixes #2784, race in tests * remove print statement * minor fixes from review * update comment on sts spec * cosmetics * p2p/conn: add failing tests * p2p/conn: make SecretConnection thread safe * changelog * IPCVal signer refactor - use a .reset() method - don't use embedded RemoteSignerClient - guard RemoteSignerClient with mutex - drop the .conn - expose Close() on RemoteSignerClient * apply IPCVal refactor to TCPVal * remove mtx from RemoteSignerClient * consolidate IPCVal and TCPVal, fixes #3104 - done in tcp_client.go - now called SocketVal - takes a listener in the constructor - make tcpListener and unixListener contain all the differences * delete ipc files * introduce unix and tcp dialer for RemoteSigner * rename files - drop tcp_ prefix - rename priv_validator.go to file.go * bring back listener options * fix node * fix priv_val_server * fix node test * minor cleanup and comments
6 years ago
privval: refactor Remote signers (#3370) This PR is related to #3107 and a continuation of #3351 It is important to emphasise that in the privval original design, client/server and listening/dialing roles are inverted and do not follow a conventional interaction. Given two hosts A and B: Host A is listener/client Host B is dialer/server (contains the secret key) When A requires a signature, it needs to wait for B to dial in before it can issue a request. A only accepts a single connection and any failure leads to dropping the connection and waiting for B to reconnect. The original rationale behind this design was based on security. Host B only allows outbound connections to a list of whitelisted hosts. It is not possible to reach B unless B dials in. There are no listening/open ports in B. This PR results in the following changes: Refactors ping/heartbeat to avoid previously existing race conditions. Separates transport (dialer/listener) from signing (client/server) concerns to simplify workflow. Unifies and abstracts away the differences between unix and tcp sockets. A single signer endpoint implementation unifies connection handling code (read/write/close/connection obj) The signer request handler (server side) is customizable to increase testability. Updates and extends unit tests A high level overview of the classes is as follows: Transport (endpoints): The following classes take care of establishing a connection SignerDialerEndpoint SignerListeningEndpoint SignerEndpoint groups common functionality (read/write/timeouts/etc.) Signing (client/server): The following classes take care of exchanging request/responses SignerClient SignerServer This PR also closes #3601 Commits: * refactoring - work in progress * reworking unit tests * Encapsulating and fixing unit tests * Improve tests * Clean up * Fix/improve unit tests * clean up tests * Improving service endpoint * fixing unit test * fix linter issues * avoid invalid cache values (improve later?) * complete implementation * wip * improved connection loop * Improve reconnections + fixing unit tests * addressing comments * small formatting changes * clean up * Update node/node.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * check during initialization * dropping connecting when writing fails * removing break * use t.log instead * unifying and using cmn.GetFreePort() * review fixes * reordering and unifying drop connection * closing instead of signalling * refactored service loop * removed superfluous brackets * GetPubKey can return errors * Revert "GetPubKey can return errors" This reverts commit 68c06f19b4650389d7e5ab1659b318889028202c. * adding entry to changelog * Update CHANGELOG_PENDING.md Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * updating node.go * review fixes * fixes linter * fixing unit test * small fixes in comments * addressing review comments * addressing review comments 2 * reverting suggestion * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * do not expose brokenSignerDialerEndpoint * clean up logging * unifying methods shorten test time signer also drops * reenabling pings * improving testability + unit test * fixing go fmt + unit test * remove unused code * Addressing review comments * simplifying connection workflow * fix linter/go import issue * using base service quit * updating comment * Simplifying design + adjusting names * fixing linter issues * refactoring test harness + fixes * Addressing review comments * cleaning up * adding additional error check
5 years ago
privval: refactor Remote signers (#3370) This PR is related to #3107 and a continuation of #3351 It is important to emphasise that in the privval original design, client/server and listening/dialing roles are inverted and do not follow a conventional interaction. Given two hosts A and B: Host A is listener/client Host B is dialer/server (contains the secret key) When A requires a signature, it needs to wait for B to dial in before it can issue a request. A only accepts a single connection and any failure leads to dropping the connection and waiting for B to reconnect. The original rationale behind this design was based on security. Host B only allows outbound connections to a list of whitelisted hosts. It is not possible to reach B unless B dials in. There are no listening/open ports in B. This PR results in the following changes: Refactors ping/heartbeat to avoid previously existing race conditions. Separates transport (dialer/listener) from signing (client/server) concerns to simplify workflow. Unifies and abstracts away the differences between unix and tcp sockets. A single signer endpoint implementation unifies connection handling code (read/write/close/connection obj) The signer request handler (server side) is customizable to increase testability. Updates and extends unit tests A high level overview of the classes is as follows: Transport (endpoints): The following classes take care of establishing a connection SignerDialerEndpoint SignerListeningEndpoint SignerEndpoint groups common functionality (read/write/timeouts/etc.) Signing (client/server): The following classes take care of exchanging request/responses SignerClient SignerServer This PR also closes #3601 Commits: * refactoring - work in progress * reworking unit tests * Encapsulating and fixing unit tests * Improve tests * Clean up * Fix/improve unit tests * clean up tests * Improving service endpoint * fixing unit test * fix linter issues * avoid invalid cache values (improve later?) * complete implementation * wip * improved connection loop * Improve reconnections + fixing unit tests * addressing comments * small formatting changes * clean up * Update node/node.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * check during initialization * dropping connecting when writing fails * removing break * use t.log instead * unifying and using cmn.GetFreePort() * review fixes * reordering and unifying drop connection * closing instead of signalling * refactored service loop * removed superfluous brackets * GetPubKey can return errors * Revert "GetPubKey can return errors" This reverts commit 68c06f19b4650389d7e5ab1659b318889028202c. * adding entry to changelog * Update CHANGELOG_PENDING.md Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_client.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_dialer_endpoint.go Co-Authored-By: jleni <juan.leni@zondax.ch> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: jleni <juan.leni@zondax.ch> * updating node.go * review fixes * fixes linter * fixing unit test * small fixes in comments * addressing review comments * addressing review comments 2 * reverting suggestion * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_client_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * Update privval/signer_listener_endpoint_test.go Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * do not expose brokenSignerDialerEndpoint * clean up logging * unifying methods shorten test time signer also drops * reenabling pings * improving testability + unit test * fixing go fmt + unit test * remove unused code * Addressing review comments * simplifying connection workflow * fix linter/go import issue * using base service quit * updating comment * Simplifying design + adjusting names * fixing linter issues * refactoring test harness + fixes * Addressing review comments * cleaning up * adding additional error check
5 years ago
fix non deterministic test failures and race in privval socket (#3258) * node: decrease retry conn timeout in test Should fix #3256 The retry timeout was set to the default, which is the same as the accept timeout, so it's no wonder this would fail. Here we decrease the retry timeout so we can try many times before the accept timeout. * p2p: increase handshake timeout in test This fails sometimes, presumably because the handshake timeout is so low (only 50ms). So increase it to 1s. Should fix #3187 * privval: fix race with ping. closes #3237 Pings happen in a go-routine and can happen concurrently with other messages. Since we use a request/response protocol, we expect to send a request and get back the corresponding response. But with pings happening concurrently, this assumption could be violated. We were using a mutex, but only a RWMutex, where the RLock was being held for sending messages - this was to allow the underlying connection to be replaced if it fails. Turns out we actually need to use a full lock (not just a read lock) to prevent multiple requests from happening concurrently. * node: fix test name. DelayedStop -> DelayedStart * autofile: Wait() method In the TestWALTruncate in consensus/wal_test.go we remove the WAL directory at the end of the test. However the wal.Stop() does not properly wait for the autofile group to finish shutting down. Hence it was possible that the group's go-routine is still running when the cleanup happens, which causes a panic since the directory disappeared. Here we add a Wait() method to properly wait until the go-routine exits so we can safely clean up. This fixes #2852.
5 years ago
lint: Enable Golint (#4212) * Fix many golint errors * Fix golint errors in the 'lite' package * Don't export Pool.store * Fix typo * Revert unwanted changes * Fix errors in counter package * Fix linter errors in kvstore package * Fix linter error in example package * Fix error in tests package * Fix linter errors in v2 package * Fix linter errors in consensus package * Fix linter errors in evidence package * Fix linter error in fail package * Fix linter errors in query package * Fix linter errors in core package * Fix linter errors in node package * Fix linter errors in mempool package * Fix linter error in conn package * Fix linter errors in pex package * Rename PEXReactor export to Reactor * Fix linter errors in trust package * Fix linter errors in upnp package * Fix linter errors in p2p package * Fix linter errors in proxy package * Fix linter errors in mock_test package * Fix linter error in client_test package * Fix linter errors in coretypes package * Fix linter errors in coregrpc package * Fix linter errors in rpcserver package * Fix linter errors in rpctypes package * Fix linter errors in rpctest package * Fix linter error in json2wal script * Fix linter error in wal2json script * Fix linter errors in kv package * Fix linter error in state package * Fix linter error in grpc_client * Fix linter errors in types package * Fix linter error in version package * Fix remaining errors * Address review comments * Fix broken tests * Reconcile package coregrpc * Fix golangci bot error * Fix new golint errors * Fix broken reference * Enable golint linter * minor changes to bring golint into line * fix failing test * fix pex reactor naming * address PR comments
5 years ago
  1. package node
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "math"
  7. "net"
  8. "os"
  9. "syscall"
  10. "testing"
  11. "time"
  12. "github.com/stretchr/testify/assert"
  13. "github.com/stretchr/testify/require"
  14. dbm "github.com/tendermint/tm-db"
  15. "github.com/tendermint/tendermint/abci/example/kvstore"
  16. cfg "github.com/tendermint/tendermint/config"
  17. "github.com/tendermint/tendermint/crypto"
  18. "github.com/tendermint/tendermint/crypto/ed25519"
  19. "github.com/tendermint/tendermint/crypto/tmhash"
  20. consmocks "github.com/tendermint/tendermint/internal/consensus/mocks"
  21. ssmocks "github.com/tendermint/tendermint/internal/statesync/mocks"
  22. "github.com/tendermint/tendermint/internal/evidence"
  23. "github.com/tendermint/tendermint/internal/mempool"
  24. mempoolv0 "github.com/tendermint/tendermint/internal/mempool/v0"
  25. statesync "github.com/tendermint/tendermint/internal/statesync"
  26. "github.com/tendermint/tendermint/internal/test/factory"
  27. "github.com/tendermint/tendermint/libs/log"
  28. tmpubsub "github.com/tendermint/tendermint/libs/pubsub"
  29. tmrand "github.com/tendermint/tendermint/libs/rand"
  30. tmtime "github.com/tendermint/tendermint/libs/time"
  31. "github.com/tendermint/tendermint/privval"
  32. "github.com/tendermint/tendermint/proxy"
  33. sm "github.com/tendermint/tendermint/state"
  34. "github.com/tendermint/tendermint/state/indexer"
  35. "github.com/tendermint/tendermint/store"
  36. "github.com/tendermint/tendermint/types"
  37. )
  38. func TestNodeStartStop(t *testing.T) {
  39. config := cfg.ResetTestRoot("node_node_test")
  40. defer os.RemoveAll(config.RootDir)
  41. // create & start node
  42. ns, err := newDefaultNode(config, log.TestingLogger())
  43. require.NoError(t, err)
  44. require.NoError(t, ns.Start())
  45. n, ok := ns.(*nodeImpl)
  46. require.True(t, ok)
  47. t.Logf("Started node %v", n.sw.NodeInfo())
  48. // wait for the node to produce a block
  49. blocksSub, err := n.EventBus().Subscribe(context.Background(), "node_test", types.EventQueryNewBlock)
  50. require.NoError(t, err)
  51. select {
  52. case <-blocksSub.Out():
  53. case <-blocksSub.Canceled():
  54. t.Fatal("blocksSub was canceled")
  55. case <-time.After(10 * time.Second):
  56. t.Fatal("timed out waiting for the node to produce a block")
  57. }
  58. // stop the node
  59. go func() {
  60. err = n.Stop()
  61. require.NoError(t, err)
  62. }()
  63. select {
  64. case <-n.Quit():
  65. case <-time.After(5 * time.Second):
  66. pid := os.Getpid()
  67. p, err := os.FindProcess(pid)
  68. if err != nil {
  69. panic(err)
  70. }
  71. err = p.Signal(syscall.SIGABRT)
  72. fmt.Println(err)
  73. t.Fatal("timed out waiting for shutdown")
  74. }
  75. }
  76. func getTestNode(t *testing.T, conf *cfg.Config, logger log.Logger) *nodeImpl {
  77. t.Helper()
  78. ns, err := newDefaultNode(conf, logger)
  79. require.NoError(t, err)
  80. n, ok := ns.(*nodeImpl)
  81. require.True(t, ok)
  82. return n
  83. }
  84. func TestNodeDelayedStart(t *testing.T) {
  85. config := cfg.ResetTestRoot("node_delayed_start_test")
  86. defer os.RemoveAll(config.RootDir)
  87. now := tmtime.Now()
  88. // create & start node
  89. n := getTestNode(t, config, log.TestingLogger())
  90. n.GenesisDoc().GenesisTime = now.Add(2 * time.Second)
  91. require.NoError(t, n.Start())
  92. defer n.Stop() //nolint:errcheck // ignore for tests
  93. startTime := tmtime.Now()
  94. assert.Equal(t, true, startTime.After(n.GenesisDoc().GenesisTime))
  95. }
  96. func TestNodeSetAppVersion(t *testing.T) {
  97. config := cfg.ResetTestRoot("node_app_version_test")
  98. defer os.RemoveAll(config.RootDir)
  99. // create node
  100. n := getTestNode(t, config, log.TestingLogger())
  101. // default config uses the kvstore app
  102. var appVersion uint64 = kvstore.ProtocolVersion
  103. // check version is set in state
  104. state, err := n.stateStore.Load()
  105. require.NoError(t, err)
  106. assert.Equal(t, state.Version.Consensus.App, appVersion)
  107. // check version is set in node info
  108. assert.Equal(t, n.nodeInfo.ProtocolVersion.App, appVersion)
  109. }
  110. func TestNodeSetPrivValTCP(t *testing.T) {
  111. addr := "tcp://" + testFreeAddr(t)
  112. config := cfg.ResetTestRoot("node_priv_val_tcp_test")
  113. defer os.RemoveAll(config.RootDir)
  114. config.PrivValidator.ListenAddr = addr
  115. dialer := privval.DialTCPFn(addr, 100*time.Millisecond, ed25519.GenPrivKey())
  116. dialerEndpoint := privval.NewSignerDialerEndpoint(
  117. log.TestingLogger(),
  118. dialer,
  119. )
  120. privval.SignerDialerEndpointTimeoutReadWrite(100 * time.Millisecond)(dialerEndpoint)
  121. signerServer := privval.NewSignerServer(
  122. dialerEndpoint,
  123. config.ChainID(),
  124. types.NewMockPV(),
  125. )
  126. go func() {
  127. err := signerServer.Start()
  128. if err != nil {
  129. panic(err)
  130. }
  131. }()
  132. defer signerServer.Stop() //nolint:errcheck // ignore for tests
  133. n := getTestNode(t, config, log.TestingLogger())
  134. assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator())
  135. }
  136. // address without a protocol must result in error
  137. func TestPrivValidatorListenAddrNoProtocol(t *testing.T) {
  138. addrNoPrefix := testFreeAddr(t)
  139. config := cfg.ResetTestRoot("node_priv_val_tcp_test")
  140. defer os.RemoveAll(config.RootDir)
  141. config.PrivValidator.ListenAddr = addrNoPrefix
  142. _, err := newDefaultNode(config, log.TestingLogger())
  143. assert.Error(t, err)
  144. }
  145. func TestNodeSetPrivValIPC(t *testing.T) {
  146. tmpfile := "/tmp/kms." + tmrand.Str(6) + ".sock"
  147. defer os.Remove(tmpfile) // clean up
  148. config := cfg.ResetTestRoot("node_priv_val_tcp_test")
  149. defer os.RemoveAll(config.RootDir)
  150. config.PrivValidator.ListenAddr = "unix://" + tmpfile
  151. dialer := privval.DialUnixFn(tmpfile)
  152. dialerEndpoint := privval.NewSignerDialerEndpoint(
  153. log.TestingLogger(),
  154. dialer,
  155. )
  156. privval.SignerDialerEndpointTimeoutReadWrite(100 * time.Millisecond)(dialerEndpoint)
  157. pvsc := privval.NewSignerServer(
  158. dialerEndpoint,
  159. config.ChainID(),
  160. types.NewMockPV(),
  161. )
  162. go func() {
  163. err := pvsc.Start()
  164. require.NoError(t, err)
  165. }()
  166. defer pvsc.Stop() //nolint:errcheck // ignore for tests
  167. n := getTestNode(t, config, log.TestingLogger())
  168. assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator())
  169. }
  170. // testFreeAddr claims a free port so we don't block on listener being ready.
  171. func testFreeAddr(t *testing.T) string {
  172. ln, err := net.Listen("tcp", "127.0.0.1:0")
  173. require.NoError(t, err)
  174. defer ln.Close()
  175. return fmt.Sprintf("127.0.0.1:%d", ln.Addr().(*net.TCPAddr).Port)
  176. }
  177. // create a proposal block using real and full
  178. // mempool and evidence pool and validate it.
  179. func TestCreateProposalBlock(t *testing.T) {
  180. config := cfg.ResetTestRoot("node_create_proposal")
  181. defer os.RemoveAll(config.RootDir)
  182. cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
  183. proxyApp := proxy.NewAppConns(cc)
  184. err := proxyApp.Start()
  185. require.Nil(t, err)
  186. defer proxyApp.Stop() //nolint:errcheck // ignore for tests
  187. logger := log.TestingLogger()
  188. const height int64 = 1
  189. state, stateDB, privVals := state(1, height)
  190. stateStore := sm.NewStore(stateDB)
  191. maxBytes := 16384
  192. const partSize uint32 = 256
  193. maxEvidenceBytes := int64(maxBytes / 2)
  194. state.ConsensusParams.Block.MaxBytes = int64(maxBytes)
  195. state.ConsensusParams.Evidence.MaxBytes = maxEvidenceBytes
  196. proposerAddr, _ := state.Validators.GetByIndex(0)
  197. mp := mempoolv0.NewCListMempool(
  198. config.Mempool,
  199. proxyApp.Mempool(),
  200. state.LastBlockHeight,
  201. mempoolv0.WithMetrics(mempool.NopMetrics()),
  202. mempoolv0.WithPreCheck(sm.TxPreCheck(state)),
  203. mempoolv0.WithPostCheck(sm.TxPostCheck(state)),
  204. )
  205. mp.SetLogger(logger)
  206. // Make EvidencePool
  207. evidenceDB := dbm.NewMemDB()
  208. blockStore := store.NewBlockStore(dbm.NewMemDB())
  209. evidencePool, err := evidence.NewPool(logger, evidenceDB, stateStore, blockStore)
  210. require.NoError(t, err)
  211. // fill the evidence pool with more evidence
  212. // than can fit in a block
  213. var currentBytes int64 = 0
  214. for currentBytes <= maxEvidenceBytes {
  215. ev := types.NewMockDuplicateVoteEvidenceWithValidator(height, time.Now(), privVals[0], "test-chain")
  216. currentBytes += int64(len(ev.Bytes()))
  217. evidencePool.ReportConflictingVotes(ev.VoteA, ev.VoteB)
  218. }
  219. evList, size := evidencePool.PendingEvidence(state.ConsensusParams.Evidence.MaxBytes)
  220. require.Less(t, size, state.ConsensusParams.Evidence.MaxBytes+1)
  221. evData := &types.EvidenceData{Evidence: evList}
  222. require.EqualValues(t, size, evData.ByteSize())
  223. // fill the mempool with more txs
  224. // than can fit in a block
  225. txLength := 100
  226. for i := 0; i <= maxBytes/txLength; i++ {
  227. tx := tmrand.Bytes(txLength)
  228. err := mp.CheckTx(context.Background(), tx, nil, mempool.TxInfo{})
  229. assert.NoError(t, err)
  230. }
  231. blockExec := sm.NewBlockExecutor(
  232. stateStore,
  233. logger,
  234. proxyApp.Consensus(),
  235. mp,
  236. evidencePool,
  237. blockStore,
  238. )
  239. commit := types.NewCommit(height-1, 0, types.BlockID{}, nil)
  240. block, _ := blockExec.CreateProposalBlock(
  241. height,
  242. state, commit,
  243. proposerAddr,
  244. )
  245. // check that the part set does not exceed the maximum block size
  246. partSet := block.MakePartSet(partSize)
  247. assert.Less(t, partSet.ByteSize(), int64(maxBytes))
  248. partSetFromHeader := types.NewPartSetFromHeader(partSet.Header())
  249. for partSetFromHeader.Count() < partSetFromHeader.Total() {
  250. added, err := partSetFromHeader.AddPart(partSet.GetPart(int(partSetFromHeader.Count())))
  251. require.NoError(t, err)
  252. require.True(t, added)
  253. }
  254. assert.EqualValues(t, partSetFromHeader.ByteSize(), partSet.ByteSize())
  255. err = blockExec.ValidateBlock(state, block)
  256. assert.NoError(t, err)
  257. }
  258. func TestMaxTxsProposalBlockSize(t *testing.T) {
  259. config := cfg.ResetTestRoot("node_create_proposal")
  260. defer os.RemoveAll(config.RootDir)
  261. cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
  262. proxyApp := proxy.NewAppConns(cc)
  263. err := proxyApp.Start()
  264. require.Nil(t, err)
  265. defer proxyApp.Stop() //nolint:errcheck // ignore for tests
  266. logger := log.TestingLogger()
  267. const height int64 = 1
  268. state, stateDB, _ := state(1, height)
  269. stateStore := sm.NewStore(stateDB)
  270. blockStore := store.NewBlockStore(dbm.NewMemDB())
  271. const maxBytes int64 = 16384
  272. const partSize uint32 = 256
  273. state.ConsensusParams.Block.MaxBytes = maxBytes
  274. proposerAddr, _ := state.Validators.GetByIndex(0)
  275. // Make Mempool
  276. mp := mempoolv0.NewCListMempool(
  277. config.Mempool,
  278. proxyApp.Mempool(),
  279. state.LastBlockHeight,
  280. mempoolv0.WithMetrics(mempool.NopMetrics()),
  281. mempoolv0.WithPreCheck(sm.TxPreCheck(state)),
  282. mempoolv0.WithPostCheck(sm.TxPostCheck(state)),
  283. )
  284. mp.SetLogger(logger)
  285. // fill the mempool with one txs just below the maximum size
  286. txLength := int(types.MaxDataBytesNoEvidence(maxBytes, 1))
  287. tx := tmrand.Bytes(txLength - 4) // to account for the varint
  288. err = mp.CheckTx(context.Background(), tx, nil, mempool.TxInfo{})
  289. assert.NoError(t, err)
  290. blockExec := sm.NewBlockExecutor(
  291. stateStore,
  292. logger,
  293. proxyApp.Consensus(),
  294. mp,
  295. sm.EmptyEvidencePool{},
  296. blockStore,
  297. )
  298. commit := types.NewCommit(height-1, 0, types.BlockID{}, nil)
  299. block, _ := blockExec.CreateProposalBlock(
  300. height,
  301. state, commit,
  302. proposerAddr,
  303. )
  304. pb, err := block.ToProto()
  305. require.NoError(t, err)
  306. assert.Less(t, int64(pb.Size()), maxBytes)
  307. // check that the part set does not exceed the maximum block size
  308. partSet := block.MakePartSet(partSize)
  309. assert.EqualValues(t, partSet.ByteSize(), int64(pb.Size()))
  310. }
  311. func TestMaxProposalBlockSize(t *testing.T) {
  312. config := cfg.ResetTestRoot("node_create_proposal")
  313. defer os.RemoveAll(config.RootDir)
  314. cc := proxy.NewLocalClientCreator(kvstore.NewApplication())
  315. proxyApp := proxy.NewAppConns(cc)
  316. err := proxyApp.Start()
  317. require.Nil(t, err)
  318. defer proxyApp.Stop() //nolint:errcheck // ignore for tests
  319. logger := log.TestingLogger()
  320. state, stateDB, _ := state(types.MaxVotesCount, int64(1))
  321. stateStore := sm.NewStore(stateDB)
  322. blockStore := store.NewBlockStore(dbm.NewMemDB())
  323. const maxBytes int64 = 1024 * 1024 * 2
  324. state.ConsensusParams.Block.MaxBytes = maxBytes
  325. proposerAddr, _ := state.Validators.GetByIndex(0)
  326. // Make Mempool
  327. mp := mempoolv0.NewCListMempool(
  328. config.Mempool,
  329. proxyApp.Mempool(),
  330. state.LastBlockHeight,
  331. mempoolv0.WithMetrics(mempool.NopMetrics()),
  332. mempoolv0.WithPreCheck(sm.TxPreCheck(state)),
  333. mempoolv0.WithPostCheck(sm.TxPostCheck(state)),
  334. )
  335. mp.SetLogger(logger)
  336. // fill the mempool with one txs just below the maximum size
  337. txLength := int(types.MaxDataBytesNoEvidence(maxBytes, types.MaxVotesCount))
  338. tx := tmrand.Bytes(txLength - 6) // to account for the varint
  339. err = mp.CheckTx(context.Background(), tx, nil, mempool.TxInfo{})
  340. assert.NoError(t, err)
  341. // now produce more txs than what a normal block can hold with 10 smaller txs
  342. // At the end of the test, only the single big tx should be added
  343. for i := 0; i < 10; i++ {
  344. tx := tmrand.Bytes(10)
  345. err = mp.CheckTx(context.Background(), tx, nil, mempool.TxInfo{})
  346. assert.NoError(t, err)
  347. }
  348. blockExec := sm.NewBlockExecutor(
  349. stateStore,
  350. logger,
  351. proxyApp.Consensus(),
  352. mp,
  353. sm.EmptyEvidencePool{},
  354. blockStore,
  355. )
  356. blockID := types.BlockID{
  357. Hash: tmhash.Sum([]byte("blockID_hash")),
  358. PartSetHeader: types.PartSetHeader{
  359. Total: math.MaxInt32,
  360. Hash: tmhash.Sum([]byte("blockID_part_set_header_hash")),
  361. },
  362. }
  363. timestamp := time.Date(math.MaxInt64, 0, 0, 0, 0, 0, math.MaxInt64, time.UTC)
  364. // change state in order to produce the largest accepted header
  365. state.LastBlockID = blockID
  366. state.LastBlockHeight = math.MaxInt64 - 1
  367. state.LastBlockTime = timestamp
  368. state.LastResultsHash = tmhash.Sum([]byte("last_results_hash"))
  369. state.AppHash = tmhash.Sum([]byte("app_hash"))
  370. state.Version.Consensus.Block = math.MaxInt64
  371. state.Version.Consensus.App = math.MaxInt64
  372. maxChainID := ""
  373. for i := 0; i < types.MaxChainIDLen; i++ {
  374. maxChainID += "𠜎"
  375. }
  376. state.ChainID = maxChainID
  377. cs := types.CommitSig{
  378. BlockIDFlag: types.BlockIDFlagNil,
  379. ValidatorAddress: crypto.AddressHash([]byte("validator_address")),
  380. Timestamp: timestamp,
  381. Signature: crypto.CRandBytes(types.MaxSignatureSize),
  382. }
  383. commit := &types.Commit{
  384. Height: math.MaxInt64,
  385. Round: math.MaxInt32,
  386. BlockID: blockID,
  387. }
  388. // add maximum amount of signatures to a single commit
  389. for i := 0; i < types.MaxVotesCount; i++ {
  390. commit.Signatures = append(commit.Signatures, cs)
  391. }
  392. block, partSet := blockExec.CreateProposalBlock(
  393. math.MaxInt64,
  394. state, commit,
  395. proposerAddr,
  396. )
  397. // this ensures that the header is at max size
  398. block.Header.Time = timestamp
  399. pb, err := block.ToProto()
  400. require.NoError(t, err)
  401. // require that the header and commit be the max possible size
  402. require.Equal(t, int64(pb.Header.Size()), types.MaxHeaderBytes)
  403. require.Equal(t, int64(pb.LastCommit.Size()), types.MaxCommitBytes(types.MaxVotesCount))
  404. // make sure that the block is less than the max possible size
  405. assert.Equal(t, int64(pb.Size()), maxBytes)
  406. // because of the proto overhead we expect the part set bytes to be equal or
  407. // less than the pb block size
  408. assert.LessOrEqual(t, partSet.ByteSize(), int64(pb.Size()))
  409. }
  410. func TestNodeNewSeedNode(t *testing.T) {
  411. config := cfg.ResetTestRoot("node_new_node_custom_reactors_test")
  412. config.Mode = cfg.ModeSeed
  413. defer os.RemoveAll(config.RootDir)
  414. nodeKey, err := types.LoadOrGenNodeKey(config.NodeKeyFile())
  415. require.NoError(t, err)
  416. ns, err := makeSeedNode(config,
  417. cfg.DefaultDBProvider,
  418. nodeKey,
  419. defaultGenesisDocProviderFunc(config),
  420. log.TestingLogger(),
  421. )
  422. require.NoError(t, err)
  423. n, ok := ns.(*nodeImpl)
  424. require.True(t, ok)
  425. err = n.Start()
  426. require.NoError(t, err)
  427. assert.True(t, n.pexReactor.IsRunning())
  428. }
  429. func TestNodeSetEventSink(t *testing.T) {
  430. config := cfg.ResetTestRoot("node_app_version_test")
  431. defer os.RemoveAll(config.RootDir)
  432. n := getTestNode(t, config, log.TestingLogger())
  433. assert.Equal(t, 1, len(n.eventSinks))
  434. assert.Equal(t, indexer.KV, n.eventSinks[0].Type())
  435. config.TxIndex.Indexer = []string{"null"}
  436. n = getTestNode(t, config, log.TestingLogger())
  437. assert.Equal(t, 1, len(n.eventSinks))
  438. assert.Equal(t, indexer.NULL, n.eventSinks[0].Type())
  439. config.TxIndex.Indexer = []string{"null", "kv"}
  440. n = getTestNode(t, config, log.TestingLogger())
  441. assert.Equal(t, 1, len(n.eventSinks))
  442. assert.Equal(t, indexer.NULL, n.eventSinks[0].Type())
  443. config.TxIndex.Indexer = []string{"kvv"}
  444. ns, err := newDefaultNode(config, log.TestingLogger())
  445. assert.Nil(t, ns)
  446. assert.Equal(t, errors.New("unsupported event sink type"), err)
  447. config.TxIndex.Indexer = []string{}
  448. n = getTestNode(t, config, log.TestingLogger())
  449. assert.Equal(t, 1, len(n.eventSinks))
  450. assert.Equal(t, indexer.NULL, n.eventSinks[0].Type())
  451. config.TxIndex.Indexer = []string{"psql"}
  452. ns, err = newDefaultNode(config, log.TestingLogger())
  453. assert.Nil(t, ns)
  454. assert.Equal(t, errors.New("the psql connection settings cannot be empty"), err)
  455. var psqlConn = "test"
  456. config.TxIndex.Indexer = []string{"psql"}
  457. config.TxIndex.PsqlConn = psqlConn
  458. n = getTestNode(t, config, log.TestingLogger())
  459. assert.Equal(t, 1, len(n.eventSinks))
  460. assert.Equal(t, indexer.PSQL, n.eventSinks[0].Type())
  461. n.OnStop()
  462. config.TxIndex.Indexer = []string{"psql", "kv"}
  463. config.TxIndex.PsqlConn = psqlConn
  464. n = getTestNode(t, config, log.TestingLogger())
  465. assert.Equal(t, 2, len(n.eventSinks))
  466. // we use map to filter the duplicated sinks, so it's not guarantee the order when append sinks.
  467. if n.eventSinks[0].Type() == indexer.KV {
  468. assert.Equal(t, indexer.PSQL, n.eventSinks[1].Type())
  469. } else {
  470. assert.Equal(t, indexer.PSQL, n.eventSinks[0].Type())
  471. assert.Equal(t, indexer.KV, n.eventSinks[1].Type())
  472. }
  473. n.OnStop()
  474. config.TxIndex.Indexer = []string{"kv", "psql"}
  475. config.TxIndex.PsqlConn = psqlConn
  476. n = getTestNode(t, config, log.TestingLogger())
  477. assert.Equal(t, 2, len(n.eventSinks))
  478. if n.eventSinks[0].Type() == indexer.KV {
  479. assert.Equal(t, indexer.PSQL, n.eventSinks[1].Type())
  480. } else {
  481. assert.Equal(t, indexer.PSQL, n.eventSinks[0].Type())
  482. assert.Equal(t, indexer.KV, n.eventSinks[1].Type())
  483. }
  484. n.OnStop()
  485. var e = errors.New("found duplicated sinks, please check the tx-index section in the config.toml")
  486. config.TxIndex.Indexer = []string{"psql", "kv", "Kv"}
  487. config.TxIndex.PsqlConn = psqlConn
  488. _, err = newDefaultNode(config, log.TestingLogger())
  489. require.Error(t, err)
  490. assert.Equal(t, e, err)
  491. config.TxIndex.Indexer = []string{"Psql", "kV", "kv", "pSql"}
  492. config.TxIndex.PsqlConn = psqlConn
  493. _, err = newDefaultNode(config, log.TestingLogger())
  494. require.Error(t, err)
  495. assert.Equal(t, e, err)
  496. }
  497. func state(nVals int, height int64) (sm.State, dbm.DB, []types.PrivValidator) {
  498. privVals := make([]types.PrivValidator, nVals)
  499. vals := make([]types.GenesisValidator, nVals)
  500. for i := 0; i < nVals; i++ {
  501. privVal := types.NewMockPV()
  502. privVals[i] = privVal
  503. vals[i] = types.GenesisValidator{
  504. Address: privVal.PrivKey.PubKey().Address(),
  505. PubKey: privVal.PrivKey.PubKey(),
  506. Power: 1000,
  507. Name: fmt.Sprintf("test%d", i),
  508. }
  509. }
  510. s, _ := sm.MakeGenesisState(&types.GenesisDoc{
  511. ChainID: "test-chain",
  512. Validators: vals,
  513. AppHash: nil,
  514. })
  515. // save validators to db for 2 heights
  516. stateDB := dbm.NewMemDB()
  517. stateStore := sm.NewStore(stateDB)
  518. if err := stateStore.Save(s); err != nil {
  519. panic(err)
  520. }
  521. for i := 1; i < int(height); i++ {
  522. s.LastBlockHeight++
  523. s.LastValidators = s.Validators.Copy()
  524. if err := stateStore.Save(s); err != nil {
  525. panic(err)
  526. }
  527. }
  528. return s, stateDB, privVals
  529. }
  530. func TestLoadStateFromGenesis(t *testing.T) {
  531. _ = loadStatefromGenesis(t)
  532. }
  533. func loadStatefromGenesis(t *testing.T) sm.State {
  534. t.Helper()
  535. stateDB := dbm.NewMemDB()
  536. stateStore := sm.NewStore(stateDB)
  537. config := cfg.ResetTestRoot("load_state_from_genesis")
  538. loadedState, err := stateStore.Load()
  539. require.NoError(t, err)
  540. require.True(t, loadedState.IsEmpty())
  541. genDoc, _ := factory.RandGenesisDoc(config, 0, false, 10)
  542. state, err := loadStateFromDBOrGenesisDocProvider(
  543. stateStore,
  544. genDoc,
  545. )
  546. require.NoError(t, err)
  547. require.NotNil(t, state)
  548. return state
  549. }
  550. func TestNodeStartStateSync(t *testing.T) {
  551. mockSSR := &statesync.MockSyncReactor{}
  552. mockFSR := &consmocks.BlockSyncReactor{}
  553. mockCSR := &consmocks.ConsSyncReactor{}
  554. mockSP := &ssmocks.StateProvider{}
  555. state := loadStatefromGenesis(t)
  556. config := cfg.ResetTestRoot("load_state_from_genesis")
  557. eventBus, err := createAndStartEventBus(log.TestingLogger())
  558. defer func() {
  559. err := eventBus.Stop()
  560. require.NoError(t, err)
  561. }()
  562. require.NoError(t, err)
  563. require.NotNil(t, eventBus)
  564. sub, err := eventBus.Subscribe(context.Background(), "test-client", types.EventQueryStateSyncStatus, 10)
  565. require.NoError(t, err)
  566. require.NotNil(t, sub)
  567. cfgSS := config.StateSync
  568. mockSSR.On("Sync", context.TODO(), mockSP, cfgSS.DiscoveryTime).Return(state, nil).
  569. On("Backfill", state).Return(nil)
  570. mockCSR.On("SetStateSyncingMetrics", float64(0)).Return().
  571. On("SwitchToConsensus", state, true).Return()
  572. require.NoError(t,
  573. startStateSync(mockSSR, mockFSR, mockCSR, mockSP, config.StateSync, false, state.InitialHeight, eventBus))
  574. for cnt := 0; cnt < 2; {
  575. select {
  576. case <-time.After(3 * time.Second):
  577. t.Errorf("StateSyncStatus timeout")
  578. case msg := <-sub.Out():
  579. if cnt == 0 {
  580. ensureStateSyncStatus(t, msg, false, state.InitialHeight)
  581. cnt++
  582. } else {
  583. // the state height = 0 because we are not actually update the state in this test
  584. ensureStateSyncStatus(t, msg, true, 0)
  585. cnt++
  586. }
  587. }
  588. }
  589. mockSSR.AssertNumberOfCalls(t, "Sync", 1)
  590. mockSSR.AssertNumberOfCalls(t, "Backfill", 1)
  591. mockCSR.AssertNumberOfCalls(t, "SetStateSyncingMetrics", 1)
  592. mockCSR.AssertNumberOfCalls(t, "SwitchToConsensus", 1)
  593. }
  594. func ensureStateSyncStatus(t *testing.T, msg tmpubsub.Message, complete bool, height int64) {
  595. t.Helper()
  596. status, ok := msg.Data().(types.EventDataStateSyncStatus)
  597. require.True(t, ok)
  598. require.Equal(t, complete, status.Complete)
  599. require.Equal(t, height, status.Height)
  600. }