diff --git a/internal/statesync/snapshots.go b/internal/statesync/snapshots.go index 11cd78394..9058304a9 100644 --- a/internal/statesync/snapshots.go +++ b/internal/statesync/snapshots.go @@ -85,7 +85,7 @@ func (p *snapshotPool) Add(peerID types.NodeID, snapshot *snapshot) (bool, error appHash, err := p.stateProvider.AppHash(ctx, snapshot.Height) if err != nil { - return false, err + return false, fmt.Errorf("failed to get app hash: %w", err) } snapshot.trustedAppHash = appHash key := snapshot.Key() diff --git a/light/client.go b/light/client.go index 122130224..52bbdf981 100644 --- a/light/client.go +++ b/light/client.go @@ -478,7 +478,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now } if !bytes.Equal(l.Hash(), newHeader.Hash()) { - return fmt.Errorf("light block header %X does not match newHeader %X", l.Hash(), newHeader.Hash()) + return fmt.Errorf("header from primary %X does not match newHeader %X", l.Hash(), newHeader.Hash()) } return c.verifyLightBlock(ctx, l, now) @@ -586,7 +586,7 @@ func (c *Client) verifySequential( } // If some intermediate header is invalid, remove the primary and try again. - c.logger.Error("primary sent invalid header -> removing", "err", err, "primary", c.primary) + c.logger.Info("primary sent invalid header -> removing", "err", err, "primary", c.primary) replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true) if removeErr != nil { @@ -630,6 +630,10 @@ func (c *Client) verifySequential( // requested from source is kept such that when a verification is made, and the // light client tries again to verify the new light block in the middle, the light // client does not need to ask for all the same light blocks again. +// +// If this function errors, it should always wrap it in a `ErrVerifcationFailed` +// struct so that the calling function can determine where it failed and handle +// it accordingly. func (c *Client) verifySkipping( ctx context.Context, source provider.Provider, @@ -690,20 +694,10 @@ func (c *Client) verifySkipping( // schedule what the next height we need to fetch is pivotHeight := c.schedule(verifiedBlock.Height, blockCache[depth].Height) interimBlock, providerErr := source.LightBlock(ctx, pivotHeight) - switch providerErr { - case nil: - blockCache = append(blockCache, interimBlock) - - // if the error is benign, the client does not need to replace the primary - case provider.ErrLightBlockNotFound, provider.ErrNoResponse, provider.ErrHeightTooHigh: - return nil, err - - // all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the - // provider is removed - default: + if providerErr != nil { return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr} } - + blockCache = append(blockCache, interimBlock) } depth++ @@ -729,45 +723,71 @@ func (c *Client) verifySkippingAgainstPrimary( now time.Time) error { trace, err := c.verifySkipping(ctx, c.primary, trustedBlock, newLightBlock, now) + if err == nil { + // Success! Now compare the header with the witnesses to ensure it's not a fork. + // More witnesses we have, more chance to notice one. + // + // CORRECTNESS ASSUMPTION: there's at least 1 correct full node + // (primary or one of the witnesses). + if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil { + return cmpErr + } + } + + var e = &ErrVerificationFailed{} + // all errors from verify skipping should be `ErrVerificationFailed` + // if it's not we just return the error directly + if !errors.As(err, e) { + return err + } - switch errors.Unwrap(err).(type) { + replace := true + switch e.Reason.(type) { + // Verification returned an invalid header case ErrInvalidHeader: - // If the target header is invalid, return immediately. - invalidHeaderHeight := err.(ErrVerificationFailed).To - if invalidHeaderHeight == newLightBlock.Height { + // If it was the target header, return immediately. + if e.To == newLightBlock.Height { c.logger.Debug("target header is invalid", "err", err) return err } - // If some intermediate header is invalid, remove the primary and try again. - c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary) - replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true) - if removeErr != nil { - c.logger.Error("failed to replace primary. Returning original error", "err", removeErr) - return err - } + // If some intermediate header is invalid, remove the primary and try + // again. - if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) { - c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error") - return err + // An intermediate header expired. We can no longer validate it as there is + // no longer the ability to punish invalid blocks as evidence of misbehavior + case ErrOldHeaderExpired: + return err + + // This happens if there was a problem in finding the next block or a + // context was canceled. + default: + if errors.Is(e.Reason, context.Canceled) || errors.Is(e.Reason, context.DeadlineExceeded) { + return e.Reason } - // attempt to verify the header again - return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now) - case nil: - // Compare header with the witnesses to ensure it's not a fork. - // More witnesses we have, more chance to notice one. - // - // CORRECTNESS ASSUMPTION: there's at least 1 correct full node - // (primary or one of the witnesses). - if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil { - return cmpErr + if !c.providerShouldBeRemoved(e.Reason) { + replace = false } - default: - return err } - return nil + // if we've reached here we're attempting to retry verification with a + // different provider + c.logger.Info("primary returned error", "err", e, "primary", c.primary, "replace", replace) + + replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, replace) + if removeErr != nil { + c.logger.Error("failed to replace primary. Returning original error", "err", removeErr) + return e.Reason + } + + if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) { + c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error") + return e.Reason + } + + // attempt to verify the header again from the trusted block + return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now) } // LastTrustedHeight returns a last trusted height. -1 and nil are returned if @@ -811,6 +831,15 @@ func (c *Client) Witnesses() []provider.Provider { return c.witnesses } +// AddProvider adds a providers to the light clients set +// +// NOTE: The light client does not check for uniqueness +func (c *Client) AddProvider(p provider.Provider) { + c.providerMutex.Lock() + defer c.providerMutex.Unlock() + c.witnesses = append(c.witnesses, p) +} + // Cleanup removes all the data (headers and validator sets) stored. Note: the // client must be stopped at this point. func (c *Client) Cleanup() error { @@ -865,7 +894,7 @@ func (c *Client) backwards( "newHash", interimHeader.Hash()) if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil { // verification has failed - c.logger.Error("backwards verification failed, replacing primary...", "err", err, "primary", c.primary) + c.logger.Info("backwards verification failed, replacing primary...", "err", err, "primary", c.primary) // the client tries to see if it can get a witness to continue with the request newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true) @@ -915,14 +944,14 @@ func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*type case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh: // we find a new witness to replace the primary - c.logger.Debug("error from light block request from primary, replacing...", + c.logger.Info("error from light block request from primary, replacing...", "error", err, "height", height, "primary", c.primary) return c.findNewPrimary(ctx, height, false) default: // The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock // These errors mean that the light client should drop the primary and try with another provider instead - c.logger.Error("error from light block request from primary, removing...", + c.logger.Info("error from light block request from primary, removing...", "error", err, "height", height, "primary", c.primary) return c.findNewPrimary(ctx, height, true) } @@ -1022,7 +1051,7 @@ func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) // process benign errors by logging them only case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh: lastError = response.err - c.logger.Debug("error on light block request from witness", + c.logger.Info("error on light block request from witness", "error", response.err, "primary", c.witnesses[response.witnessIndex]) continue @@ -1066,13 +1095,13 @@ func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.S case nil: continue case errConflictingHeaders: - c.logger.Error(fmt.Sprintf(`witness #%d has a different header. Please check primary is correct -and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex]) + c.logger.Error(`witness has a different header. Please check primary is +correct and remove witness. Otherwise, use a different primary`, + "Witness", c.witnesses[e.WitnessIndex], "ExpHeader", h.Hash(), "GotHeader", e.Block.Hash()) return err case errBadWitness: // If witness sent us an invalid header, then remove it - c.logger.Info("witness sent an invalid light block or didn't respond, removing...", - "witness", c.witnesses[e.WitnessIndex], + c.logger.Info("witness returned an error, removing...", "err", err) witnessesToRemove = append(witnessesToRemove, e.WitnessIndex) default: @@ -1082,7 +1111,7 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne } // the witness either didn't respond or didn't have the block. We ignore it. - c.logger.Debug("unable to compare first header with witness", + c.logger.Debug("unable to compare first header with witness, ignoring", "err", err) } @@ -1091,3 +1120,11 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne // remove all witnesses that misbehaved return c.removeWitnesses(witnessesToRemove) } + +// providerShouldBeRemoved analyzes the nature of the error and whether the provider +// should be removed from the light clients set +func (c *Client) providerShouldBeRemoved(err error) bool { + return errors.As(err, &provider.ErrUnreliableProvider{}) || + errors.As(err, &provider.ErrBadLightBlock{}) || + errors.Is(err, provider.ErrConnectionClosed) +} diff --git a/light/client_test.go b/light/client_test.go index b826eaead..67e0525b8 100644 --- a/light/client_test.go +++ b/light/client_test.go @@ -589,6 +589,36 @@ func TestClient_Concurrency(t *testing.T) { wg.Wait() } +func TestClient_AddProviders(t *testing.T) { + c, err := light.NewClient( + ctx, + chainID, + trustOptions, + fullNode, + []provider.Provider{fullNode}, + dbs.New(dbm.NewMemDB()), + light.Logger(log.TestingLogger()), + ) + require.NoError(t, err) + + closeCh := make(chan struct{}) + go func() { + // run verification concurrently to make sure it doesn't dead lock + _, err = c.VerifyLightBlockAtHeight(ctx, 2, bTime.Add(2*time.Hour)) + require.NoError(t, err) + close(closeCh) + }() + + // NOTE: the light client doesn't check uniqueness of providers + c.AddProvider(fullNode) + require.Len(t, c.Witnesses(), 2) + select { + case <-closeCh: + case <-time.After(5 * time.Second): + t.Fatal("concurent light block verification failed to finish in 5s") + } +} + func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) { c, err := light.NewClient( ctx, diff --git a/light/detector.go b/light/detector.go index 4d7301cb0..32a0c3f1e 100644 --- a/light/detector.go +++ b/light/detector.go @@ -131,7 +131,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro var isTargetHeight bool isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness) if err != nil { - errc <- err + if c.providerShouldBeRemoved(err) { + errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex} + } else { + errc <- err + } return } @@ -155,7 +159,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro time.Sleep(2*c.maxClockDrift + c.maxBlockLag) isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness) if err != nil { - errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex} + if c.providerShouldBeRemoved(err) { + errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex} + } else { + errc <- err + } return } if isTargetHeight { diff --git a/light/verifier.go b/light/verifier.go index d9c8019eb..ee4bfb053 100644 --- a/light/verifier.go +++ b/light/verifier.go @@ -50,8 +50,9 @@ func VerifyNonAdjacent( return err } - if HeaderExpired(trustedHeader, trustingPeriod, now) { - return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now} + // check if the untrusted header is within the trust period + if HeaderExpired(untrustedHeader, trustingPeriod, now) { + return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now} } if err := verifyNewHeaderAndVals( @@ -117,8 +118,9 @@ func VerifyAdjacent( return errors.New("headers must be adjacent in height") } - if HeaderExpired(trustedHeader, trustingPeriod, now) { - return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now} + // check if the untrusted header is within the trust period + if HeaderExpired(untrustedHeader, trustingPeriod, now) { + return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now} } if err := verifyNewHeaderAndVals( @@ -192,7 +194,10 @@ func HeaderExpired(h *types.SignedHeader, trustingPeriod time.Duration, now time // of the trusted header // // For any of these cases ErrInvalidHeader is returned. -// NOTE: This does not check whether the trusted header has expired or not. +// NOTE: This does not check whether the trusted or untrusted header has expired +// or not. These checks are not necessary because the detector never runs during +// backwards verification and thus evidence that needs to be within a certain +// time bound is never sent. func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error { if err := untrustedHeader.ValidateBasic(); err != nil { return ErrInvalidHeader{err} @@ -220,6 +225,8 @@ func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error { return nil } +// NOTE: This function assumes that untrustedHeader is after trustedHeader. +// Do not use for backwards verification. func verifyNewHeaderAndVals( untrustedHeader *types.SignedHeader, untrustedVals *types.ValidatorSet, diff --git a/light/verifier_test.go b/light/verifier_test.go index 556beee75..0432c130d 100644 --- a/light/verifier_test.go +++ b/light/verifier_test.go @@ -64,7 +64,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) { keys.GenSignedHeader(chainID, nextHeight, bTime.Add(-1*time.Hour), nil, vals, vals, hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)), vals, - 3 * time.Hour, + 4 * time.Hour, bTime.Add(2 * time.Hour), nil, "to be after old header time", @@ -146,7 +146,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) { hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)), keys.ToValidators(10, 1), 1 * time.Hour, - bTime.Add(1 * time.Hour), + bTime.Add(2 * time.Hour), nil, "old header has expired", },