Add logging for `NthNextHmy` panic (#4341)

* improve logging

* [consensus] do not try to use negative index

* Revert "do not try to use negative index"

This reverts commit b434fd3f4af39f32650e909292cc0123bedba86e. We have
fixed the cause of the issue, which was time drift on a new cloud
provider's nodes. See `systemd-timesyncd.service`

Even if this fix had been merged, it would likely not have solved the
problem given those nodes with the correct time would pick a different
leader from those with time drift. Or, in other words, the view change
would not have gone through.

* improve logging
pull/4348/head
Max 2 years ago committed by GitHub
parent 8e6bbd0a61
commit 20e4892fd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      consensus/quorum/one-node-staked-vote.go
  2. 5
      consensus/quorum/quorum.go
  3. 4
      consensus/view_change.go
  4. 6
      consensus/view_change_construct.go
  5. 1
      core/blockchain_impl.go
  6. 4
      core/rawdb/accessors_offchain.go

@ -157,6 +157,9 @@ func (v *stakedVoteWeight) IsQuorumAchievedByMask(mask *bls_cosi.Mask) bool {
if currentTotalPower == nil {
return false
}
const msg = "[IsQuorumAchievedByMask] Voting power: need %+v, have %+v"
utils.Logger().Debug().
Msgf(msg, threshold, currentTotalPower)
return (*currentTotalPower).GT(threshold)
}

@ -12,6 +12,7 @@ import (
"github.com/harmony-one/harmony/consensus/votepower"
bls_cosi "github.com/harmony-one/harmony/crypto/bls"
shardingconfig "github.com/harmony-one/harmony/internal/configs/sharding"
"github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/multibls"
"github.com/harmony-one/harmony/numeric"
"github.com/harmony-one/harmony/shard"
@ -224,6 +225,10 @@ func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.P
idx := s.IndexOf(pubKey.Bytes)
if idx != -1 {
found = true
} else {
utils.Logger().Error().
Str("key", pubKey.Bytes.Hex()).
Msg("[NthNextHmy] pubKey not found")
}
numNodes := instance.NumHarmonyOperatedNodesPerShard()
// sanity check to avoid out of bound access

@ -142,6 +142,10 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) {
// timestamp messed up in current validator node
if curTimestamp <= blockTimestamp {
consensus.getLogger().Error().
Int64("curTimestamp", curTimestamp).
Int64("blockTimestamp", blockTimestamp).
Msg("[getNextViewID] timestamp of block too high")
return consensus.fallbackNextViewID()
}
// diff only increases, since view change timeout is shorter than

@ -444,7 +444,8 @@ func (vc *viewChange) InitPayload(
vc.nilBitmap[viewID] = nilBitmap
}
if err := vc.nilBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil {
vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed")
vc.getLogger().Warn().Err(err).
Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed")
continue
}
if _, ok := vc.nilSigs[viewID]; !ok {
@ -475,7 +476,8 @@ func (vc *viewChange) InitPayload(
vc.viewIDBitmap[viewID] = viewIDBitmap
}
if err := vc.viewIDBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil {
vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed")
vc.getLogger().Warn().Err(err).
Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed")
continue
}
if _, ok := vc.viewIDSigs[viewID]; !ok {

@ -428,6 +428,7 @@ func (bc *BlockChainImpl) ValidateNewBlock(block *types.Block, beaconChain Block
bc, block.Header(),
); err != nil {
utils.Logger().Error().
Uint64("blockNum", block.NumberU64()).
Str("blockHash", block.Hash().Hex()).
Err(err).
Msg("[ValidateNewBlock] Cannot verify vrf for the new block")

@ -21,7 +21,9 @@ func ReadShardState(
) (*shard.State, error) {
data, err := db.Get(shardStateKey(epoch))
if err != nil {
return nil, errors.New(MsgNoShardStateFromDB)
return nil, errors.Errorf(
MsgNoShardStateFromDB, "epoch: %d", epoch,
)
}
ss, err2 := shard.DecodeWrapper(data)
if err2 != nil {

Loading…
Cancel
Save