Browse Source

light: improve error handling and allow providers to be added (#6733)

pull/6777/head
Callum Waters 3 years ago
committed by GitHub
parent
commit
0e2752ae42
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 142 additions and 60 deletions
  1. +1
    -1
      internal/statesync/snapshots.go
  2. +87
    -50
      light/client.go
  3. +30
    -0
      light/client_test.go
  4. +10
    -2
      light/detector.go
  5. +12
    -5
      light/verifier.go
  6. +2
    -2
      light/verifier_test.go

+ 1
- 1
internal/statesync/snapshots.go View File

@ -85,7 +85,7 @@ func (p *snapshotPool) Add(peerID types.NodeID, snapshot *snapshot) (bool, error
appHash, err := p.stateProvider.AppHash(ctx, snapshot.Height)
if err != nil {
return false, err
return false, fmt.Errorf("failed to get app hash: %w", err)
}
snapshot.trustedAppHash = appHash
key := snapshot.Key()


+ 87
- 50
light/client.go View File

@ -478,7 +478,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
}
if !bytes.Equal(l.Hash(), newHeader.Hash()) {
return fmt.Errorf("light block header %X does not match newHeader %X", l.Hash(), newHeader.Hash())
return fmt.Errorf("header from primary %X does not match newHeader %X", l.Hash(), newHeader.Hash())
}
return c.verifyLightBlock(ctx, l, now)
@ -586,7 +586,7 @@ func (c *Client) verifySequential(
}
// If some intermediate header is invalid, remove the primary and try again.
c.logger.Error("primary sent invalid header -> removing", "err", err, "primary", c.primary)
c.logger.Info("primary sent invalid header -> removing", "err", err, "primary", c.primary)
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
if removeErr != nil {
@ -630,6 +630,10 @@ func (c *Client) verifySequential(
// requested from source is kept such that when a verification is made, and the
// light client tries again to verify the new light block in the middle, the light
// client does not need to ask for all the same light blocks again.
//
// If this function errors, it should always wrap it in a `ErrVerifcationFailed`
// struct so that the calling function can determine where it failed and handle
// it accordingly.
func (c *Client) verifySkipping(
ctx context.Context,
source provider.Provider,
@ -690,20 +694,10 @@ func (c *Client) verifySkipping(
// schedule what the next height we need to fetch is
pivotHeight := c.schedule(verifiedBlock.Height, blockCache[depth].Height)
interimBlock, providerErr := source.LightBlock(ctx, pivotHeight)
switch providerErr {
case nil:
blockCache = append(blockCache, interimBlock)
// if the error is benign, the client does not need to replace the primary
case provider.ErrLightBlockNotFound, provider.ErrNoResponse, provider.ErrHeightTooHigh:
return nil, err
// all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the
// provider is removed
default:
if providerErr != nil {
return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr}
}
blockCache = append(blockCache, interimBlock)
}
depth++
@ -729,45 +723,71 @@ func (c *Client) verifySkippingAgainstPrimary(
now time.Time) error {
trace, err := c.verifySkipping(ctx, c.primary, trustedBlock, newLightBlock, now)
if err == nil {
// Success! Now compare the header with the witnesses to ensure it's not a fork.
// More witnesses we have, more chance to notice one.
//
// CORRECTNESS ASSUMPTION: there's at least 1 correct full node
// (primary or one of the witnesses).
if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil {
return cmpErr
}
}
var e = &ErrVerificationFailed{}
// all errors from verify skipping should be `ErrVerificationFailed`
// if it's not we just return the error directly
if !errors.As(err, e) {
return err
}
switch errors.Unwrap(err).(type) {
replace := true
switch e.Reason.(type) {
// Verification returned an invalid header
case ErrInvalidHeader:
// If the target header is invalid, return immediately.
invalidHeaderHeight := err.(ErrVerificationFailed).To
if invalidHeaderHeight == newLightBlock.Height {
// If it was the target header, return immediately.
if e.To == newLightBlock.Height {
c.logger.Debug("target header is invalid", "err", err)
return err
}
// If some intermediate header is invalid, remove the primary and try again.
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
if removeErr != nil {
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
return err
}
// If some intermediate header is invalid, remove the primary and try
// again.
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
return err
// An intermediate header expired. We can no longer validate it as there is
// no longer the ability to punish invalid blocks as evidence of misbehavior
case ErrOldHeaderExpired:
return err
// This happens if there was a problem in finding the next block or a
// context was canceled.
default:
if errors.Is(e.Reason, context.Canceled) || errors.Is(e.Reason, context.DeadlineExceeded) {
return e.Reason
}
// attempt to verify the header again
return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now)
case nil:
// Compare header with the witnesses to ensure it's not a fork.
// More witnesses we have, more chance to notice one.
//
// CORRECTNESS ASSUMPTION: there's at least 1 correct full node
// (primary or one of the witnesses).
if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil {
return cmpErr
if !c.providerShouldBeRemoved(e.Reason) {
replace = false
}
default:
return err
}
return nil
// if we've reached here we're attempting to retry verification with a
// different provider
c.logger.Info("primary returned error", "err", e, "primary", c.primary, "replace", replace)
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, replace)
if removeErr != nil {
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
return e.Reason
}
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
return e.Reason
}
// attempt to verify the header again from the trusted block
return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now)
}
// LastTrustedHeight returns a last trusted height. -1 and nil are returned if
@ -811,6 +831,15 @@ func (c *Client) Witnesses() []provider.Provider {
return c.witnesses
}
// AddProvider adds a providers to the light clients set
//
// NOTE: The light client does not check for uniqueness
func (c *Client) AddProvider(p provider.Provider) {
c.providerMutex.Lock()
defer c.providerMutex.Unlock()
c.witnesses = append(c.witnesses, p)
}
// Cleanup removes all the data (headers and validator sets) stored. Note: the
// client must be stopped at this point.
func (c *Client) Cleanup() error {
@ -865,7 +894,7 @@ func (c *Client) backwards(
"newHash", interimHeader.Hash())
if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil {
// verification has failed
c.logger.Error("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
c.logger.Info("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
// the client tries to see if it can get a witness to continue with the request
newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true)
@ -915,14 +944,14 @@ func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*type
case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh:
// we find a new witness to replace the primary
c.logger.Debug("error from light block request from primary, replacing...",
c.logger.Info("error from light block request from primary, replacing...",
"error", err, "height", height, "primary", c.primary)
return c.findNewPrimary(ctx, height, false)
default:
// The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock
// These errors mean that the light client should drop the primary and try with another provider instead
c.logger.Error("error from light block request from primary, removing...",
c.logger.Info("error from light block request from primary, removing...",
"error", err, "height", height, "primary", c.primary)
return c.findNewPrimary(ctx, height, true)
}
@ -1022,7 +1051,7 @@ func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool)
// process benign errors by logging them only
case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh:
lastError = response.err
c.logger.Debug("error on light block request from witness",
c.logger.Info("error on light block request from witness",
"error", response.err, "primary", c.witnesses[response.witnessIndex])
continue
@ -1066,13 +1095,13 @@ func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.S
case nil:
continue
case errConflictingHeaders:
c.logger.Error(fmt.Sprintf(`witness #%d has a different header. Please check primary is correct
and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
c.logger.Error(`witness has a different header. Please check primary is
correct and remove witness. Otherwise, use a different primary`,
"Witness", c.witnesses[e.WitnessIndex], "ExpHeader", h.Hash(), "GotHeader", e.Block.Hash())
return err
case errBadWitness:
// If witness sent us an invalid header, then remove it
c.logger.Info("witness sent an invalid light block or didn't respond, removing...",
"witness", c.witnesses[e.WitnessIndex],
c.logger.Info("witness returned an error, removing...",
"err", err)
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
default:
@ -1082,7 +1111,7 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne
}
// the witness either didn't respond or didn't have the block. We ignore it.
c.logger.Debug("unable to compare first header with witness",
c.logger.Debug("unable to compare first header with witness, ignoring",
"err", err)
}
@ -1091,3 +1120,11 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne
// remove all witnesses that misbehaved
return c.removeWitnesses(witnessesToRemove)
}
// providerShouldBeRemoved analyzes the nature of the error and whether the provider
// should be removed from the light clients set
func (c *Client) providerShouldBeRemoved(err error) bool {
return errors.As(err, &provider.ErrUnreliableProvider{}) ||
errors.As(err, &provider.ErrBadLightBlock{}) ||
errors.Is(err, provider.ErrConnectionClosed)
}

+ 30
- 0
light/client_test.go View File

@ -589,6 +589,36 @@ func TestClient_Concurrency(t *testing.T) {
wg.Wait()
}
func TestClient_AddProviders(t *testing.T) {
c, err := light.NewClient(
ctx,
chainID,
trustOptions,
fullNode,
[]provider.Provider{fullNode},
dbs.New(dbm.NewMemDB()),
light.Logger(log.TestingLogger()),
)
require.NoError(t, err)
closeCh := make(chan struct{})
go func() {
// run verification concurrently to make sure it doesn't dead lock
_, err = c.VerifyLightBlockAtHeight(ctx, 2, bTime.Add(2*time.Hour))
require.NoError(t, err)
close(closeCh)
}()
// NOTE: the light client doesn't check uniqueness of providers
c.AddProvider(fullNode)
require.Len(t, c.Witnesses(), 2)
select {
case <-closeCh:
case <-time.After(5 * time.Second):
t.Fatal("concurent light block verification failed to finish in 5s")
}
}
func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
c, err := light.NewClient(
ctx,


+ 10
- 2
light/detector.go View File

@ -131,7 +131,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
var isTargetHeight bool
isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness)
if err != nil {
errc <- err
if c.providerShouldBeRemoved(err) {
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
} else {
errc <- err
}
return
}
@ -155,7 +159,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
time.Sleep(2*c.maxClockDrift + c.maxBlockLag)
isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness)
if err != nil {
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
if c.providerShouldBeRemoved(err) {
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
} else {
errc <- err
}
return
}
if isTargetHeight {


+ 12
- 5
light/verifier.go View File

@ -50,8 +50,9 @@ func VerifyNonAdjacent(
return err
}
if HeaderExpired(trustedHeader, trustingPeriod, now) {
return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now}
// check if the untrusted header is within the trust period
if HeaderExpired(untrustedHeader, trustingPeriod, now) {
return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now}
}
if err := verifyNewHeaderAndVals(
@ -117,8 +118,9 @@ func VerifyAdjacent(
return errors.New("headers must be adjacent in height")
}
if HeaderExpired(trustedHeader, trustingPeriod, now) {
return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now}
// check if the untrusted header is within the trust period
if HeaderExpired(untrustedHeader, trustingPeriod, now) {
return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now}
}
if err := verifyNewHeaderAndVals(
@ -192,7 +194,10 @@ func HeaderExpired(h *types.SignedHeader, trustingPeriod time.Duration, now time
// of the trusted header
//
// For any of these cases ErrInvalidHeader is returned.
// NOTE: This does not check whether the trusted header has expired or not.
// NOTE: This does not check whether the trusted or untrusted header has expired
// or not. These checks are not necessary because the detector never runs during
// backwards verification and thus evidence that needs to be within a certain
// time bound is never sent.
func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error {
if err := untrustedHeader.ValidateBasic(); err != nil {
return ErrInvalidHeader{err}
@ -220,6 +225,8 @@ func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error {
return nil
}
// NOTE: This function assumes that untrustedHeader is after trustedHeader.
// Do not use for backwards verification.
func verifyNewHeaderAndVals(
untrustedHeader *types.SignedHeader,
untrustedVals *types.ValidatorSet,


+ 2
- 2
light/verifier_test.go View File

@ -64,7 +64,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) {
keys.GenSignedHeader(chainID, nextHeight, bTime.Add(-1*time.Hour), nil, vals, vals,
hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)),
vals,
3 * time.Hour,
4 * time.Hour,
bTime.Add(2 * time.Hour),
nil,
"to be after old header time",
@ -146,7 +146,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) {
hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)),
keys.ToValidators(10, 1),
1 * time.Hour,
bTime.Add(1 * time.Hour),
bTime.Add(2 * time.Hour),
nil,
"old header has expired",
},


Loading…
Cancel
Save