Browse Source

light: improve provider handling (#6053)

Introduces heuristics that track the amount of no responses or unavailable blocks a provider has for more robust provider handling by the light client. Use concurrent calls to all witnesses when a new primary is needed.
pull/6191/head
Callum Waters 4 years ago
committed by GitHub
parent
commit
37d36cd5bc
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 326 additions and 170 deletions
  1. +191
    -108
      light/client.go
  2. +6
    -2
      light/client_test.go
  3. +24
    -20
      light/detector.go
  4. +1
    -1
      light/detector_test.go
  5. +14
    -3
      light/provider/errors.go
  6. +72
    -23
      light/provider/http/http.go
  7. +15
    -7
      light/provider/http/http_test.go
  8. +2
    -5
      light/provider/mock/deadmock.go
  9. +1
    -1
      light/verifier.go

+ 191
- 108
light/client.go View File

@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"sort"
"sync"
"time"
"github.com/tendermint/tendermint/libs/log"
@ -126,8 +127,10 @@ type Client struct {
pruningSize uint16
// See ConfirmationFunction option
confirmationFn func(action string) bool
quit chan struct{}
// The light client keeps track of how many times it has requested a light
// block from it's providers. When this exceeds the amount of witnesses the
// light client will just return the last error sent by the providers
// repeatRequests uint16
logger log.Logger
}
@ -161,14 +164,14 @@ func NewClient(
}
if c.latestTrustedBlock != nil {
c.logger.Info("Checking trusted light block using options")
c.logger.Info("checking trusted light block using options")
if err := c.checkTrustedHeaderUsingOptions(ctx, trustOptions); err != nil {
return nil, err
}
}
if c.latestTrustedBlock == nil || c.latestTrustedBlock.Height < trustOptions.Height {
c.logger.Info("Downloading trusted light block using options")
c.logger.Info("downloading trusted light block using options")
if err := c.initializeWithTrustOptions(ctx, trustOptions); err != nil {
return nil, err
}
@ -199,7 +202,6 @@ func NewClientFromTrustedStore(
trustedStore: trustedStore,
pruningSize: defaultPruningSize,
confirmationFn: func(action string) bool { return true },
quit: make(chan struct{}),
logger: log.NewNopLogger(),
}
@ -237,7 +239,7 @@ func (c *Client) restoreTrustedLightBlock() error {
return fmt.Errorf("can't get last trusted light block: %w", err)
}
c.latestTrustedBlock = trustedBlock
c.logger.Info("Restored trusted light block", "height", lastHeight)
c.logger.Info("restored trusted light block", "height", lastHeight)
}
return nil
@ -273,7 +275,7 @@ func (c *Client) checkTrustedHeaderUsingOptions(ctx context.Context, options Tru
case options.Height == c.latestTrustedBlock.Height:
primaryHash = options.Hash
case options.Height < c.latestTrustedBlock.Height:
c.logger.Info("Client initialized with old header (trusted is more recent)",
c.logger.Info("client initialized with old header (trusted is more recent)",
"old", options.Height,
"trustedHeight", c.latestTrustedBlock.Height,
"trustedHash", c.latestTrustedBlock.Hash())
@ -299,11 +301,11 @@ func (c *Client) checkTrustedHeaderUsingOptions(ctx context.Context, options Tru
}
if !bytes.Equal(primaryHash, c.latestTrustedBlock.Hash()) {
c.logger.Info("Prev. trusted header's hash (h1) doesn't match hash from primary provider (h2)",
c.logger.Info("previous trusted header's hash (h1) doesn't match hash from primary provider (h2)",
"h1", c.latestTrustedBlock.Hash(), "h2", primaryHash)
action := fmt.Sprintf(
"Prev. trusted header's hash %X doesn't match hash %X from primary provider. Remove all the stored light blocks?",
"Previous trusted header's hash %X doesn't match hash %X from primary provider. Remove all the stored light blocks?",
c.latestTrustedBlock.Hash(), primaryHash)
if c.confirmationFn(action) {
err := c.Cleanup()
@ -415,7 +417,7 @@ func (c *Client) Update(ctx context.Context, now time.Time) (*types.LightBlock,
if err != nil {
return nil, err
}
c.logger.Info("Advanced to new state", "height", latestBlock.Height, "hash", latestBlock.Hash())
c.logger.Info("advanced to new state", "height", latestBlock.Height, "hash", latestBlock.Hash())
return latestBlock, nil
}
@ -440,7 +442,7 @@ func (c *Client) VerifyLightBlockAtHeight(ctx context.Context, height int64, now
// Check if the light block is already verified.
h, err := c.TrustedLightBlock(height)
if err == nil {
c.logger.Info("Header has already been verified", "height", height, "hash", h.Hash())
c.logger.Debug("header has already been verified", "height", height, "hash", h.Hash())
// Return already trusted light block
return h, nil
}
@ -497,7 +499,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
if !bytes.Equal(l.Hash(), newHeader.Hash()) {
return fmt.Errorf("existing trusted header %X does not match newHeader %X", l.Hash(), newHeader.Hash())
}
c.logger.Info("Header has already been verified",
c.logger.Debug("header has already been verified",
"height", newHeader.Height, "hash", newHeader.Hash())
return nil
}
@ -516,7 +518,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
}
func (c *Client) verifyLightBlock(ctx context.Context, newLightBlock *types.LightBlock, now time.Time) error {
c.logger.Info("VerifyHeader", "height", newLightBlock.Height, "hash", newLightBlock.Hash())
c.logger.Info("verify light block", "height", newLightBlock.Height, "hash", newLightBlock.Hash())
var (
verifyFunc func(ctx context.Context, trusted *types.LightBlock, new *types.LightBlock, now time.Time) error
@ -561,7 +563,7 @@ func (c *Client) verifyLightBlock(ctx context.Context, newLightBlock *types.Ligh
err = verifyFunc(ctx, closestBlock, newLightBlock, now)
}
if err != nil {
c.logger.Error("Can't verify", "err", err)
c.logger.Error("failed to verify", "err", err)
return err
}
@ -595,7 +597,7 @@ func (c *Client) verifySequential(
}
// 2) Verify them
c.logger.Debug("Verify adjacent newLightBlock against verifiedBlock",
c.logger.Debug("verify adjacent newLightBlock against verifiedBlock",
"trustedHeight", verifiedBlock.Height,
"trustedHash", verifiedBlock.Hash(),
"newHeight", interimBlock.Height,
@ -610,32 +612,21 @@ func (c *Client) verifySequential(
case ErrInvalidHeader:
// If the target header is invalid, return immediately.
if err.To == newLightBlock.Height {
c.logger.Debug("Target header is invalid", "err", err)
c.logger.Debug("target header is invalid", "err", err)
return err
}
// If some intermediate header is invalid, replace the primary and try
// again.
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
replaceErr := c.replacePrimaryProvider()
if replaceErr != nil {
c.logger.Error("Can't replace primary", "err", replaceErr)
// return original error
return err
}
// If some intermediate header is invalid, remove the primary and try again.
c.logger.Error("primary sent invalid header -> removing", "err", err, "primary", c.primary)
replacementBlock, fErr := c.lightBlockFromPrimary(ctx, newLightBlock.Height)
if fErr != nil {
c.logger.Error("Can't fetch light block from primary", "err", fErr)
// return original error
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
if removeErr != nil {
c.logger.Debug("failed to replace primary. Returning original error", "err", removeErr)
return err
}
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
c.logger.Error("Replacement provider has a different light block",
"newHash", newLightBlock.Hash(),
"replHash", replacementBlock.Hash())
// return original error
c.logger.Debug("replaced primary but new primary has a different block to the initial one")
return err
}
@ -686,7 +677,7 @@ func (c *Client) verifySkipping(
)
for {
c.logger.Debug("Verify non-adjacent newHeader against verifiedBlock",
c.logger.Debug("verify non-adjacent newHeader against verifiedBlock",
"trustedHeight", verifiedBlock.Height,
"trustedHash", verifiedBlock.Hash(),
"newHeight", blockCache[depth].Height,
@ -716,10 +707,20 @@ func (c *Client) verifySkipping(
pivotHeight := verifiedBlock.Height + (blockCache[depth].Height-verifiedBlock.
Height)*verifySkippingNumerator/verifySkippingDenominator
interimBlock, providerErr := source.LightBlock(ctx, pivotHeight)
if providerErr != nil {
switch providerErr {
case nil:
blockCache = append(blockCache, interimBlock)
// if the error is benign, the client does not need to replace the primary
case provider.ErrLightBlockNotFound, provider.ErrNoResponse:
return nil, err
// all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the
// provider is removed
default:
return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr}
}
blockCache = append(blockCache, interimBlock)
}
depth++
@ -744,32 +745,20 @@ func (c *Client) verifySkippingAgainstPrimary(
// If the target header is invalid, return immediately.
invalidHeaderHeight := err.(ErrVerificationFailed).To
if invalidHeaderHeight == newLightBlock.Height {
c.logger.Debug("Target header is invalid", "err", err)
c.logger.Debug("target header is invalid", "err", err)
return err
}
// If some intermediate header is invalid, replace the primary and try
// again.
// If some intermediate header is invalid, remove the primary and try again.
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
replaceErr := c.replacePrimaryProvider()
if replaceErr != nil {
c.logger.Error("Can't replace primary", "err", replaceErr)
// return original error
return err
}
replacementBlock, fErr := c.lightBlockFromPrimary(ctx, newLightBlock.Height)
if fErr != nil {
c.logger.Error("Can't fetch light block from primary", "err", fErr)
// return original error
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
if removeErr != nil {
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
return err
}
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
c.logger.Error("Replacement provider has a different light block",
"newHash", newLightBlock.Hash(),
"replHash", replacementBlock.Hash())
// return original error
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
return err
}
@ -835,7 +824,7 @@ func (c *Client) Witnesses() []provider.Provider {
// Cleanup removes all the data (headers and validator sets) stored. Note: the
// client must be stopped at this point.
func (c *Client) Cleanup() error {
c.logger.Info("Removing all light blocks")
c.logger.Info("removing all light blocks")
c.latestTrustedBlock = nil
return c.trustedStore.Prune(0)
}
@ -908,21 +897,31 @@ func (c *Client) backwards(
return fmt.Errorf("failed to obtain the header at height #%d: %w", verifiedHeader.Height-1, err)
}
interimHeader = interimBlock.Header
c.logger.Debug("Verify newHeader against verifiedHeader",
c.logger.Debug("verify newHeader against verifiedHeader",
"trustedHeight", verifiedHeader.Height,
"trustedHash", verifiedHeader.Hash(),
"newHeight", interimHeader.Height,
"newHash", interimHeader.Hash())
if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil {
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
if replaceErr := c.replacePrimaryProvider(); replaceErr != nil {
c.logger.Error("Can't replace primary", "err", replaceErr)
// return original error
return fmt.Errorf("verify backwards from %d to %d failed: %w",
verifiedHeader.Height, interimHeader.Height, err)
// verification has failed
c.logger.Error("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
// the client tries to see if it can get a witness to continue with the request
newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true)
if replaceErr != nil {
c.logger.Debug("failed to replace primary. Returning original error", "err", replaceErr)
return err
}
// we need to verify the header at the same height again
continue
// before continuing we must check that they have the same target header to validate
if !bytes.Equal(newPrimarysBlock.Hash(), newHeader.Hash()) {
c.logger.Debug("replaced primary but new primary has a different block to the initial one")
// return the original error
return err
}
// try again with the new primary
return c.backwards(ctx, verifiedHeader, newPrimarysBlock.Header)
}
verifiedHeader = interimHeader
}
@ -930,55 +929,145 @@ func (c *Client) backwards(
return nil
}
// NOTE: requires a providerMutex locked.
func (c *Client) removeWitness(idx int) {
switch len(c.witnesses) {
case 0:
panic(fmt.Sprintf("wanted to remove %d element from empty witnesses slice", idx))
case 1:
c.witnesses = make([]provider.Provider, 0)
// lightBlockFromPrimary retrieves the lightBlock from the primary provider
// at the specified height. This method also handles provider behavior as follows:
//
// 1. If the provider does not respond or does not have the block, it tries again
// with a different provider
// 2. If all providers return the same error, the light client forwards the error to
// where the initial request came from
// 3. If the provider provides an invalid light block, is deemed unreliable or returns
// any other error, the primary is permanently dropped and is replaced by a witness.
func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*types.LightBlock, error) {
c.providerMutex.Lock()
l, err := c.primary.LightBlock(ctx, height)
c.providerMutex.Unlock()
switch err {
case nil:
// Everything went smoothly. We reset the lightBlockRequests and return the light block
return l, nil
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
// we find a new witness to replace the primary
c.logger.Debug("error from light block request from primary, replacing...", "error", err, "primary", c.primary)
return c.findNewPrimary(ctx, height, false)
default:
c.witnesses[idx] = c.witnesses[len(c.witnesses)-1]
// The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock
// These errors mean that the light client should drop the primary and try with another provider instead
c.logger.Error("error from light block request from primary, removing...", "error", err, "primary", c.primary)
return c.findNewPrimary(ctx, height, true)
}
}
// NOTE: requires a providerMutex lock
func (c *Client) removeWitnesses(indexes []int) error {
// check that we will still have witnesses remaining
if len(c.witnesses) <= len(indexes) {
return ErrNoWitnesses
}
// we need to make sure that we remove witnesses by index in the reverse
// order so as to not affect the indexes themselves
sort.Ints(indexes)
for i := len(indexes) - 1; i >= 0; i-- {
c.witnesses[indexes[i]] = c.witnesses[len(c.witnesses)-1]
c.witnesses = c.witnesses[:len(c.witnesses)-1]
}
return nil
}
// replaceProvider takes the first alternative provider and promotes it as the
// primary provider.
func (c *Client) replacePrimaryProvider() error {
type witnessResponse struct {
lb *types.LightBlock
witnessIndex int
err error
}
// findNewPrimary concurrently sends a light block request, promoting the first witness to return
// a valid light block as the new primary. The remove option indicates whether the primary should be
// entire removed or just appended to the back of the witnesses list. This method also handles witness
// errors. If no witness is available, it returns the last error of the witness.
func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) (*types.LightBlock, error) {
c.providerMutex.Lock()
defer c.providerMutex.Unlock()
if len(c.witnesses) <= 1 {
return ErrNoWitnesses
return nil, ErrNoWitnesses
}
c.primary = c.witnesses[0]
c.witnesses = c.witnesses[1:]
c.logger.Info("Replacing primary with the first witness", "new_primary", c.primary)
return nil
}
var (
witnessResponsesC = make(chan witnessResponse, len(c.witnesses))
witnessesToRemove []int
lastError error
wg sync.WaitGroup
)
// lightBlockFromPrimary retrieves the lightBlock from the primary provider
// at the specified height. Handles dropout by the primary provider by swapping
// with an alternative provider.
func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*types.LightBlock, error) {
c.providerMutex.Lock()
l, err := c.primary.LightBlock(ctx, height)
c.providerMutex.Unlock()
if err != nil {
c.logger.Debug("Error on light block request from primary", "error", err, "primary", c.primary)
replaceErr := c.replacePrimaryProvider()
if replaceErr != nil {
return nil, fmt.Errorf("%v. Tried to replace primary but: %w", err.Error(), replaceErr)
// send out a light block request to all witnesses
subctx, cancel := context.WithCancel(ctx)
defer cancel()
for index := range c.witnesses {
wg.Add(1)
go func(witnessIndex int, witnessResponsesC chan witnessResponse) {
defer wg.Done()
lb, err := c.witnesses[witnessIndex].LightBlock(subctx, height)
witnessResponsesC <- witnessResponse{lb, witnessIndex, err}
}(index, witnessResponsesC)
}
// process all the responses as they come in
for i := 0; i < cap(witnessResponsesC); i++ {
response := <-witnessResponsesC
switch response.err {
// success! We have found a new primary
case nil:
cancel() // cancel all remaining requests to other witnesses
wg.Wait() // wait for all goroutines to finish
// if we are not intending on removing the primary then append the old primary to the end of the witness slice
if !remove {
c.witnesses = append(c.witnesses, c.primary)
}
// promote respondent as the new primary
c.logger.Debug("found new primary", "primary", c.witnesses[response.witnessIndex])
c.primary = c.witnesses[response.witnessIndex]
// add promoted witness to the list of witnesses to be removed
witnessesToRemove = append(witnessesToRemove, response.witnessIndex)
// remove witnesses marked as bad (the client must do this before we alter the witness slice and change the indexes
// of witnesses). Removal is done in descending order
if err := c.removeWitnesses(witnessesToRemove); err != nil {
return nil, err
}
// return the light block that new primary responded with
return response.lb, nil
// process benign errors by logging them only
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
lastError = response.err
c.logger.Debug("error on light block request from witness",
"error", response.err, "primary", c.witnesses[response.witnessIndex])
continue
// process malevolent errors like ErrUnreliableProvider and ErrBadLightBlock by removing the witness
default:
lastError = response.err
c.logger.Error("error on light block request from witness, removing...",
"error", response.err, "primary", c.witnesses[response.witnessIndex])
witnessesToRemove = append(witnessesToRemove, response.witnessIndex)
}
// replace primary and request a light block again
return c.lightBlockFromPrimary(ctx, height)
}
return l, err
return nil, lastError
}
// compareFirstHeaderWithWitnesses compares h with all witnesses. If any
// compareFirstHeaderWithWitnesses concurrently compares h with all witnesses. If any
// witness reports a different header than h, the function returns an error.
func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.SignedHeader) error {
compareCtx, cancel := context.WithCancel(ctx)
@ -1006,26 +1095,20 @@ func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.S
case nil:
continue
case errConflictingHeaders:
c.logger.Error(fmt.Sprintf(`Witness #%d has a different header. Please check primary is correct
and remove witness. Otherwise, use the different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
c.logger.Error(fmt.Sprintf(`witness #%d has a different header. Please check primary is correct
and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
return err
case errBadWitness:
// If witness sent us an invalid header, then remove it. If it didn't
// respond or couldn't find the block, then we ignore it and move on to
// the next witness.
if _, ok := e.Reason.(provider.ErrBadLightBlock); ok {
c.logger.Info("Witness sent us invalid header / vals -> removing it", "witness", c.witnesses[e.WitnessIndex])
c.logger.Info("Witness sent an invalid light block, removing...", "witness", c.witnesses[e.WitnessIndex])
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
}
}
}
// we need to make sure that we remove witnesses by index in the reverse
// order so as to not affect the indexes themselves
sort.Ints(witnessesToRemove)
for i := len(witnessesToRemove) - 1; i >= 0; i-- {
c.removeWitness(witnessesToRemove[i])
}
return nil
// remove all witnesses that misbehaved
return c.removeWitnesses(witnessesToRemove)
}

+ 6
- 2
light/client_test.go View File

@ -740,7 +740,7 @@ func TestClient_Concurrency(t *testing.T) {
defer wg.Done()
// NOTE: Cleanup, Stop, VerifyLightBlockAtHeight and Verify are not supposed
// to be concurrenly safe.
// to be concurrently safe.
assert.Equal(t, chainID, c.ChainID())
@ -774,8 +774,12 @@ func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
_, err = c.Update(ctx, bTime.Add(2*time.Hour))
require.NoError(t, err)
// the primary should no longer be the deadNode
assert.NotEqual(t, c.Primary(), deadNode)
assert.Equal(t, 1, len(c.Witnesses()))
// we should still have the dead node as a witness because it
// hasn't repeatedly been unresponsive yet
assert.Equal(t, 2, len(c.Witnesses()))
}
func TestClient_BackwardsVerification(t *testing.T) {


+ 24
- 20
light/detector.go View File

@ -5,7 +5,6 @@ import (
"context"
"errors"
"fmt"
"sort"
"time"
"github.com/tendermint/tendermint/light/provider"
@ -35,7 +34,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
lastVerifiedHeader = primaryTrace[len(primaryTrace)-1].SignedHeader
witnessesToRemove = make([]int, 0)
)
c.logger.Debug("Running detector against trace", "endBlockHeight", lastVerifiedHeader.Height,
c.logger.Debug("running detector against trace", "endBlockHeight", lastVerifiedHeader.Height,
"endBlockHash", lastVerifiedHeader.Hash, "length", len(primaryTrace))
c.providerMutex.Lock()
@ -75,7 +74,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
now,
)
if err != nil {
c.logger.Info("Error validating witness's divergent header", "witness", supportingWitness, "err", err)
c.logger.Info("error validating witness's divergent header", "witness", supportingWitness, "err", err)
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
continue
}
@ -83,7 +82,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
// We are suspecting that the primary is faulty, hence we hold the witness as the source of truth
// and generate evidence against the primary that we can send to the witness
primaryEv := newLightClientAttackEvidence(primaryBlock, witnessTrace[len(witnessTrace)-1], witnessTrace[0])
c.logger.Error("Attempted attack detected. Sending evidence againt primary by witness", "ev", primaryEv,
c.logger.Error("ATTEMPTED ATTACK DETECTED. Sending evidence againt primary by witness", "ev", primaryEv,
"primary", c.primary, "witness", supportingWitness)
c.sendEvidence(ctx, primaryEv, supportingWitness)
@ -117,22 +116,17 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
return ErrLightClientAttack
case errBadWitness:
c.logger.Info("Witness returned an error during header comparison", "witness", c.witnesses[e.WitnessIndex],
"err", err)
// if witness sent us an invalid header, then remove it. If it didn't respond or couldn't find the block, then we
// ignore it and move on to the next witness
if _, ok := e.Reason.(provider.ErrBadLightBlock); ok {
c.logger.Info("Witness sent us invalid header / vals -> removing it", "witness", c.witnesses[e.WitnessIndex])
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
}
c.logger.Info("witness returned an error during header comparison, removing...",
"witness", c.witnesses[e.WitnessIndex], "err", err)
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
default:
c.logger.Debug("error in light block request to witness", "err", err)
}
}
// we need to make sure that we remove witnesses by index in the reverse
// order so as to not affect the indexes themselves
sort.Ints(witnessesToRemove)
for i := len(witnessesToRemove) - 1; i >= 0; i-- {
c.removeWitness(witnessesToRemove[i])
// remove witnesses that have misbehaved
if err := c.removeWitnesses(witnessesToRemove); err != nil {
return err
}
// 1. If we had at least one witness that returned the same header then we
@ -156,7 +150,17 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
witness provider.Provider, witnessIndex int) {
lightBlock, err := witness.LightBlock(ctx, h.Height)
if err != nil {
switch err {
case nil:
break
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
errc <- err
return
default:
// all other errors (i.e. invalid block or unreliable provider) we mark the witness as bad
// and remove it
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
return
}
@ -165,7 +169,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex}
}
c.logger.Debug("Matching header received by witness", "height", h.Height, "witness", witnessIndex)
c.logger.Debug("matching header received by witness", "height", h.Height, "witness", witnessIndex)
errc <- nil
}
@ -173,7 +177,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
func (c *Client) sendEvidence(ctx context.Context, ev *types.LightClientAttackEvidence, receiver provider.Provider) {
err := receiver.ReportEvidence(ctx, ev)
if err != nil {
c.logger.Error("Failed to report evidence to provider", "ev", ev, "provider", receiver)
c.logger.Error("failed to report evidence to provider", "ev", ev, "provider", receiver)
}
}


+ 1
- 1
light/detector_test.go View File

@ -253,5 +253,5 @@ func TestClientDivergentTraces3(t *testing.T) {
_, err = c.VerifyLightBlockAtHeight(ctx, 10, bTime.Add(1*time.Hour))
assert.Error(t, err)
assert.Equal(t, 0, len(c.Witnesses()))
assert.Equal(t, 1, len(c.Witnesses()))
}

+ 14
- 3
light/provider/errors.go View File

@ -7,15 +7,15 @@ import (
var (
// ErrLightBlockNotFound is returned when a provider can't find the
// requested header.
// requested header. The light client will not remove the provider
ErrLightBlockNotFound = errors.New("light block not found")
// ErrNoResponse is returned if the provider doesn't respond to the
// request in a gieven time
// request in a given time. The light client will not remove the provider
ErrNoResponse = errors.New("client failed to respond")
)
// ErrBadLightBlock is returned when a provider returns an invalid
// light block.
// light block. The light client will remove the provider.
type ErrBadLightBlock struct {
Reason error
}
@ -23,3 +23,14 @@ type ErrBadLightBlock struct {
func (e ErrBadLightBlock) Error() string {
return fmt.Sprintf("client provided bad signed header: %s", e.Reason.Error())
}
// ErrUnreliableProvider is a generic error that indicates that the provider isn't
// behaving in a reliable manner to the light client. The light client will
// remove the provider
type ErrUnreliableProvider struct {
Reason string
}
func (e ErrUnreliableProvider) Error() string {
return fmt.Sprintf("client deemed unreliable: %s", e.Reason)
}

+ 72
- 23
light/provider/http/http.go View File

@ -18,8 +18,10 @@ import (
)
var defaultOptions = Options{
MaxRetryAttempts: 5,
Timeout: 3 * time.Second,
MaxRetryAttempts: 5,
Timeout: 3 * time.Second,
NoBlockThreshold: 5,
NoResponseThreshold: 5,
}
// http provider uses an RPC client to obtain the necessary information.
@ -27,14 +29,37 @@ type http struct {
chainID string
client rpcclient.RemoteClient
maxRetryAttempts int
// httt provider heuristics
// The provider tracks the amount of times that the
// client doesn't respond. If this exceeds the threshold
// then the provider will return an unreliable provider error
noResponseThreshold uint16
noResponseCount uint16
// The provider tracks the amount of time the client
// doesn't have a block. If this exceeds the threshold
// then the provider will return an unreliable provider error
noBlockThreshold uint16
noBlockCount uint16
// In a single request, the provider attempts multiple times
// with exponential backoff to reach the client. If this
// exceeds the maxRetry attempts, this result in a ErrNoResponse
maxRetryAttempts uint16
}
type Options struct {
// -1 means no limit
MaxRetryAttempts int
// 0 means no retries
MaxRetryAttempts uint16
// 0 means no timeout.
Timeout time.Duration
// The amount of requests that a client doesn't have the block
// for before the provider deems the client unreliable
NoBlockThreshold uint16
// The amount of requests that a client doesn't respond to
// before the provider deems the client unreliable
NoResponseThreshold uint16
}
// New creates a HTTP provider, which is using the rpchttp.HTTP client under
@ -67,9 +92,11 @@ func NewWithClient(chainID string, client rpcclient.RemoteClient) provider.Provi
// NewWithClient allows you to provide a custom client.
func NewWithClientAndOptions(chainID string, client rpcclient.RemoteClient, options Options) provider.Provider {
return &http{
client: client,
chainID: chainID,
maxRetryAttempts: options.MaxRetryAttempts,
client: client,
chainID: chainID,
maxRetryAttempts: options.MaxRetryAttempts,
noResponseThreshold: options.NoResponseThreshold,
noBlockThreshold: options.NoBlockThreshold,
}
}
@ -130,7 +157,8 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
for len(vals) != total && page <= maxPages {
// create another for loop to control retries. If p.maxRetryAttempts
// is negative we will keep repeating.
for attempt := 0; attempt != p.maxRetryAttempts+1; attempt++ {
attempt := uint16(0)
for {
res, err := p.client.Validators(ctx, height, &page, &perPage)
switch e := err.(type) {
case nil: // success!! Now we validate the response
@ -149,8 +177,13 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
case *url.Error:
if e.Timeout() {
// if we have exceeded retry attempts then return a no response error
if attempt == p.maxRetryAttempts {
return nil, p.noResponse()
}
attempt++
// request timed out: we wait and try again with exponential backoff
time.Sleep(backoffTimeout(uint16(attempt)))
time.Sleep(backoffTimeout(attempt))
continue
}
return nil, provider.ErrBadLightBlock{Reason: e}
@ -188,7 +221,7 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHeader, error) {
// create a for loop to control retries. If p.maxRetryAttempts
// is negative we will keep repeating.
for attempt := 0; attempt != p.maxRetryAttempts+1; attempt++ {
for attempt := uint16(0); attempt != p.maxRetryAttempts+1; attempt++ {
commit, err := p.client.Commit(ctx, height)
switch e := err.(type) {
case nil: // success!!
@ -197,32 +230,48 @@ func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHe
case *url.Error:
if e.Timeout() {
// we wait and try again with exponential backoff
time.Sleep(backoffTimeout(uint16(attempt)))
time.Sleep(backoffTimeout(attempt))
continue
}
return nil, provider.ErrBadLightBlock{Reason: e}
case *rpctypes.RPCError:
// Check if we got something other than internal error. This shouldn't happen unless the RPC module
// or light client has been tampered with. If we do get this error, stop the connection with the
// peer and return an error
if e.Code != -32603 {
return nil, provider.ErrBadLightBlock{Reason: errors.New(e.Data)}
}
// check if the error indicates that the peer doesn't have the block
if strings.Contains(err.Error(), ctypes.ErrHeightNotAvailable.Error()) ||
strings.Contains(err.Error(), ctypes.ErrHeightExceedsChainHead.Error()) {
return nil, provider.ErrLightBlockNotFound
if strings.Contains(e.Data, ctypes.ErrHeightNotAvailable.Error()) ||
strings.Contains(e.Data, ctypes.ErrHeightExceedsChainHead.Error()) {
return nil, p.noBlock()
}
// for every other error, the provider returns a bad block
return nil, provider.ErrBadLightBlock{Reason: errors.New(e.Data)}
default:
// If we don't know the error then by default we return a bad light block error and
// terminate the connection with the peer.
return nil, provider.ErrBadLightBlock{Reason: e}
}
}
return nil, provider.ErrNoResponse
return nil, p.noResponse()
}
func (p *http) noResponse() error {
p.noResponseCount++
if p.noResponseCount > p.noResponseThreshold {
return provider.ErrUnreliableProvider{
Reason: fmt.Sprintf("failed to respond after %d attempts", p.noResponseCount),
}
}
return provider.ErrNoResponse
}
func (p *http) noBlock() error {
p.noBlockCount++
if p.noBlockCount > p.noBlockThreshold {
return provider.ErrUnreliableProvider{
Reason: fmt.Sprintf("failed to provide a block after %d attempts", p.noBlockCount),
}
}
return provider.ErrLightBlockNotFound
}
func validateHeight(height int64) (*int64, error) {


+ 15
- 7
light/provider/http/http_test.go View File

@ -66,25 +66,33 @@ func TestProvider(t *testing.T) {
require.NoError(t, err)
// let's get the highest block
sh, err := p.LightBlock(context.Background(), 0)
lb, err := p.LightBlock(context.Background(), 0)
require.NoError(t, err)
assert.True(t, sh.Height < 1000)
assert.True(t, lb.Height < 1000)
// let's check this is valid somehow
assert.Nil(t, sh.ValidateBasic(chainID))
assert.Nil(t, lb.ValidateBasic(chainID))
// historical queries now work :)
lower := sh.Height - 3
sh, err = p.LightBlock(context.Background(), lower)
lower := lb.Height - 3
lb, err = p.LightBlock(context.Background(), lower)
require.NoError(t, err)
assert.Equal(t, lower, sh.Height)
assert.Equal(t, lower, lb.Height)
// // fetching missing heights (both future and pruned) should return appropriate errors
_, err = p.LightBlock(context.Background(), 1000)
lb, err = p.LightBlock(context.Background(), 1000)
require.Error(t, err)
require.Nil(t, lb)
assert.Equal(t, provider.ErrLightBlockNotFound, err)
_, err = p.LightBlock(context.Background(), 1)
require.Error(t, err)
assert.Equal(t, provider.ErrLightBlockNotFound, err)
// if the provider is unable to provide four more blocks then we should return
// an unreliable peer error
for i := 0; i < 4; i++ {
_, err = p.LightBlock(context.Background(), 1)
}
assert.IsType(t, provider.ErrUnreliableProvider{}, err)
}

+ 2
- 5
light/provider/mock/deadmock.go View File

@ -2,15 +2,12 @@ package mock
import (
"context"
"errors"
"fmt"
"github.com/tendermint/tendermint/light/provider"
"github.com/tendermint/tendermint/types"
)
var errNoResp = errors.New("no response from provider")
type deadMock struct {
id string
}
@ -25,9 +22,9 @@ func (p *deadMock) String() string {
}
func (p *deadMock) LightBlock(_ context.Context, height int64) (*types.LightBlock, error) {
return nil, errNoResp
return nil, provider.ErrNoResponse
}
func (p *deadMock) ReportEvidence(_ context.Context, ev types.Evidence) error {
return errNoResp
return provider.ErrNoResponse
}

+ 1
- 1
light/verifier.go View File

@ -130,7 +130,7 @@ func VerifyAdjacent(
// Check the validator hashes are the same
if !bytes.Equal(untrustedHeader.ValidatorsHash, trustedHeader.NextValidatorsHash) {
err := fmt.Errorf("expected old header next validators (%X) to match those from new header (%X)",
err := fmt.Errorf("expected old header's next validators (%X) to match those from new header (%X)",
trustedHeader.NextValidatorsHash,
untrustedHeader.ValidatorsHash,
)


Loading…
Cancel
Save