From ff9a85ac4889222c2f78ec7ebe65bee4693e21fb Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 17:09:34 -0700 Subject: [PATCH 1/6] Fix view change stuck issue --- consensus/consensus_service.go | 4 ++-- consensus/consensus_v2.go | 2 +- consensus/view_change.go | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index 17a220de0..ffbb2f370 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -573,9 +573,9 @@ func (consensus *Consensus) selfCommit(payload []byte) error { continue } - if _, err := consensus.Decider.SubmitVote( + if _, err := consensus.Decider.AddNewVote( quorum.Commit, - []bls.SerializedPublicKey{key.Pub.Bytes}, + []*bls_cosi.PublicKeyWrapper{key.Pub}, key.Pri.SignHash(commitPayload), common.BytesToHash(consensus.blockHash[:]), block.NumberU64(), diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 6360d77a0..ea1d1b9a0 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -517,7 +517,7 @@ func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMess } atomic.AddUint64(&consensus.blockNum, 1) - consensus.SetCurBlockViewID(committedMsg.ViewID + 1) + consensus.SetViewIDs(committedMsg.ViewID + 1) consensus.LeaderPubKey = committedMsg.SenderPubkeys[0] // Update consensus keys at last so the change of leader status doesn't mess up normal flow if blk.IsLastBlockInEpoch() { diff --git a/consensus/view_change.go b/consensus/view_change.go index 37df69712..2603f0e53 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -131,9 +131,8 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) { if curTimestamp <= blockTimestamp { return consensus.fallbackNextViewID() } - totalNode := consensus.Decider.ParticipantsCount() // diff is at least 1, and it won't exceed the totalNode - diff := uint64(((curTimestamp - blockTimestamp) / viewChangeTimeout) % int64(totalNode)) + diff := uint64((curTimestamp - blockTimestamp) / viewChangeTimeout) nextViewID := diff + consensus.GetCurBlockViewID() consensus.getLogger().Info(). From d3eeb15ce79dc7f743d1cb35494b94cbe234eb23 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 17:18:12 -0700 Subject: [PATCH 2/6] make submitVote private --- consensus/construct_test.go | 4 ++-- consensus/quorum/one-node-one-vote.go | 2 +- consensus/quorum/one-node-staked-vote.go | 2 +- consensus/quorum/quorom_test.go | 14 +++++++------- consensus/quorum/quorum.go | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/consensus/construct_test.go b/consensus/construct_test.go index 24b45c11a..a0111fb5d 100644 --- a/consensus/construct_test.go +++ b/consensus/construct_test.go @@ -73,7 +73,7 @@ func TestConstructPreparedMessage(test *testing.T) { leaderKey.FromLibBLSPublicKey(leaderPubKey) validatorKey := bls.SerializedPublicKey{} validatorKey.FromLibBLSPublicKey(validatorPubKey) - consensus.Decider.SubmitVote( + consensus.Decider.submitVote( quorum.Prepare, []bls.SerializedPublicKey{leaderKey}, leaderPriKey.Sign(message), @@ -81,7 +81,7 @@ func TestConstructPreparedMessage(test *testing.T) { consensus.blockNum, consensus.GetCurBlockViewID(), ) - if _, err := consensus.Decider.SubmitVote( + if _, err := consensus.Decider.submitVote( quorum.Prepare, []bls.SerializedPublicKey{validatorKey}, validatorPriKey.Sign(message), diff --git a/consensus/quorum/one-node-one-vote.go b/consensus/quorum/one-node-one-vote.go index 45db9bcd3..7455d6e72 100644 --- a/consensus/quorum/one-node-one-vote.go +++ b/consensus/quorum/one-node-one-vote.go @@ -36,7 +36,7 @@ func (v *uniformVoteWeight) AddNewVote( for i, pubKey := range pubKeys { pubKeysBytes[i] = pubKey.Bytes } - return v.SubmitVote(p, pubKeysBytes, sig, headerHash, height, viewID) + return v.submitVote(p, pubKeysBytes, sig, headerHash, height, viewID) } // IsQuorumAchieved .. diff --git a/consensus/quorum/one-node-staked-vote.go b/consensus/quorum/one-node-staked-vote.go index 8023b03d8..0f65257c9 100644 --- a/consensus/quorum/one-node-staked-vote.go +++ b/consensus/quorum/one-node-staked-vote.go @@ -82,7 +82,7 @@ func (v *stakedVoteWeight) AddNewVote( pubKeysBytes[i] = pubKey.Bytes } - ballet, err := v.SubmitVote(p, pubKeysBytes, sig, headerHash, height, viewID) + ballet, err := v.submitVote(p, pubKeysBytes, sig, headerHash, height, viewID) if err != nil { return ballet, err diff --git a/consensus/quorum/quorom_test.go b/consensus/quorum/quorom_test.go index d6b91a5bf..73767ea25 100644 --- a/consensus/quorum/quorom_test.go +++ b/consensus/quorum/quorom_test.go @@ -88,7 +88,7 @@ func TestSubmitVote(test *testing.T) { decider.UpdateParticipants([]bls.PublicKeyWrapper{pubKeyWrapper1, pubKeyWrapper2}) - if _, err := decider.SubmitVote( + if _, err := decider.submitVote( Prepare, []bls.SerializedPublicKey{pubKeyWrapper1.Bytes}, blsPriKey1.Sign(message), @@ -99,7 +99,7 @@ func TestSubmitVote(test *testing.T) { test.Log(err) } - if _, err := decider.SubmitVote( + if _, err := decider.submitVote( Prepare, []bls.SerializedPublicKey{pubKeyWrapper2.Bytes}, blsPriKey2.Sign(message), @@ -110,7 +110,7 @@ func TestSubmitVote(test *testing.T) { test.Log(err) } if decider.SignersCount(Prepare) != 2 { - test.Fatal("SubmitVote failed") + test.Fatal("submitVote failed") } aggSig := &bls_core.Sign{} @@ -145,7 +145,7 @@ func TestSubmitVoteAggregateSig(test *testing.T) { decider.UpdateParticipants([]bls.PublicKeyWrapper{pubKeyWrapper1, pubKeyWrapper2}) - decider.SubmitVote( + decider.submitVote( Prepare, []bls.SerializedPublicKey{pubKeyWrapper1.Bytes}, blsPriKey1.SignHash(blockHash[:]), @@ -160,7 +160,7 @@ func TestSubmitVoteAggregateSig(test *testing.T) { aggSig.Add(s) } } - if _, err := decider.SubmitVote( + if _, err := decider.submitVote( Prepare, []bls.SerializedPublicKey{pubKeyWrapper2.Bytes, pubKeyWrapper3.Bytes}, aggSig, @@ -172,7 +172,7 @@ func TestSubmitVoteAggregateSig(test *testing.T) { } if decider.SignersCount(Prepare) != 3 { - test.Fatal("SubmitVote failed") + test.Fatal("submitVote failed") } aggSig.Add(blsPriKey1.SignHash(blockHash[:])) @@ -180,7 +180,7 @@ func TestSubmitVoteAggregateSig(test *testing.T) { test.Fatal("AggregateVotes failed") } - if _, err := decider.SubmitVote( + if _, err := decider.submitVote( Prepare, []bls.SerializedPublicKey{pubKeyWrapper2.Bytes}, aggSig, diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index f41d47ec1..ae82d7ef3 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -81,7 +81,7 @@ type ParticipantTracker interface { // SignatoryTracker .. type SignatoryTracker interface { ParticipantTracker - SubmitVote( + submitVote( p Phase, pubkeys []bls.SerializedPublicKey, sig *bls_core.Sign, headerHash common.Hash, height, viewID uint64, From 500142719f6457c167db3db13870052023dcfb62 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 17:25:39 -0700 Subject: [PATCH 3/6] change submitVote on quorum --- consensus/quorum/quorum.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index ae82d7ef3..785aea4ac 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -273,7 +273,7 @@ func (s *cIdentities) SignersCount(p Phase) int64 { } } -func (s *cIdentities) SubmitVote( +func (s *cIdentities) submitVote( p Phase, pubkeys []bls.SerializedPublicKey, sig *bls_core.Sign, headerHash common.Hash, height, viewID uint64, From 6a72ca14ad6afafc2ae7747ee6becacbd7050b8b Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 18:12:16 -0700 Subject: [PATCH 4/6] make block sync trigger less frequent --- api/service/syncing/syncing.go | 2 +- consensus/consensus_service.go | 6 ++++++ consensus/construct_test.go | 10 ++++++---- consensus/validator.go | 19 +++++++++++-------- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/api/service/syncing/syncing.go b/api/service/syncing/syncing.go index 873da9e90..03a3b46ca 100644 --- a/api/service/syncing/syncing.go +++ b/api/service/syncing/syncing.go @@ -32,7 +32,7 @@ const ( TimesToFail = 5 // downloadBlocks service retry limit RegistrationNumber = 3 SyncingPortDifference = 3000 - inSyncThreshold = 0 // when peerBlockHeight - myBlockHeight <= inSyncThreshold, it's ready to join consensus + inSyncThreshold = 1 // when peerBlockHeight - myBlockHeight <= inSyncThreshold, it's ready to join consensus SyncLoopBatchSize uint32 = 1000 // maximum size for one query of block hashes verifyHeaderBatchSize uint64 = 100 // block chain header verification batch size SyncLoopFrequency = 1 // unit in second diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index ffbb2f370..b3ede1404 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -421,6 +421,12 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { Msg("[UpdateConsensusInformation] changing committee") // take care of possible leader change during the epoch + // TODO: in a very rare case, when a M1 view change happened, the block contains coinbase for last leader + // but the new leader is actually recognized by most of the nodes. At this time, if a node sync to this + // exact block and set its leader, it will set with the failed leader as in the coinbase of the block. + // This is a very rare case scenario and not likely to cause any issue in mainnet. But we need to think about + // a solution to take care of this case because the coinbase of the latest block doesn't really represent the + // the real current leader in case of M1 view change. if !curHeader.IsLastBlockInEpoch() && curHeader.Number().Uint64() != 0 { leaderPubKey, err := consensus.getLeaderPubKeyFromCoinbase(curHeader) if err != nil || leaderPubKey == nil { diff --git a/consensus/construct_test.go b/consensus/construct_test.go index a0111fb5d..27d1e82f4 100644 --- a/consensus/construct_test.go +++ b/consensus/construct_test.go @@ -71,19 +71,21 @@ func TestConstructPreparedMessage(test *testing.T) { message := "test string" leaderKey := bls.SerializedPublicKey{} leaderKey.FromLibBLSPublicKey(leaderPubKey) + leaderKeyWrapper := bls.PublicKeyWrapper{Object: leaderPubKey, Bytes: leaderKey} validatorKey := bls.SerializedPublicKey{} validatorKey.FromLibBLSPublicKey(validatorPubKey) - consensus.Decider.submitVote( + validatorKeyWrapper := bls.PublicKeyWrapper{Object: validatorPubKey, Bytes: validatorKey} + consensus.Decider.AddNewVote( quorum.Prepare, - []bls.SerializedPublicKey{leaderKey}, + []*bls.PublicKeyWrapper{&leaderKeyWrapper}, leaderPriKey.Sign(message), common.BytesToHash(consensus.blockHash[:]), consensus.blockNum, consensus.GetCurBlockViewID(), ) - if _, err := consensus.Decider.submitVote( + if _, err := consensus.Decider.AddNewVote( quorum.Prepare, - []bls.SerializedPublicKey{validatorKey}, + []*bls.PublicKeyWrapper{&validatorKeyWrapper}, validatorPriKey.Sign(message), common.BytesToHash(consensus.blockHash[:]), consensus.blockNum, diff --git a/consensus/validator.go b/consensus/validator.go index bff01506e..a1f83de65 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -114,10 +114,6 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { Msg("Wrong BlockNum Received, ignoring!") return } - if recvMsg.BlockNum > consensus.blockNum { - consensus.getLogger().Warn().Msgf("[OnPrepared] low consensus block number. Spin sync") - consensus.spinUpStateSync() - } // check validity of prepared signature blockHash := recvMsg.BlockHash @@ -153,6 +149,12 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { if !consensus.onPreparedSanityChecks(&blockObj, recvMsg) { return } + + if recvMsg.BlockNum > consensus.blockNum { + consensus.getLogger().Warn().Msgf("[OnPrepared] low consensus block number. Spin sync") + consensus.spinUpStateSync() + } + consensus.mutex.Lock() defer consensus.mutex.Unlock() @@ -236,10 +238,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { if !consensus.isRightBlockNumCheck(recvMsg) { return } - if recvMsg.BlockNum > consensus.blockNum { - consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") - consensus.spinUpStateSync() - } aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 0) if err != nil { @@ -272,6 +270,11 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.FBFTLog.AddMessage(recvMsg) + if recvMsg.BlockNum > consensus.blockNum { + consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") + consensus.spinUpStateSync() + } + consensus.mutex.Lock() defer consensus.mutex.Unlock() From 786162c0a7c60a9bd97da48d7d4fb4172e38b21a Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 18:31:42 -0700 Subject: [PATCH 5/6] Add more comments --- consensus/quorum/quorum.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index 785aea4ac..674ea96c0 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -81,6 +81,7 @@ type ParticipantTracker interface { // SignatoryTracker .. type SignatoryTracker interface { ParticipantTracker + // This func shouldn't be called directly from outside of quorum. Use AddNewVote instead. submitVote( p Phase, pubkeys []bls.SerializedPublicKey, sig *bls_core.Sign, headerHash common.Hash, @@ -118,6 +119,7 @@ type Decider interface { DependencyInjectionWriter SetVoters(subCommittee *shard.Committee, epoch *big.Int) (*TallyResult, error) Policy() Policy + // Add new vote will add the signature in the memory and increase the cumulative voting power AddNewVote( p Phase, pubkeys []*bls_cosi.PublicKeyWrapper, sig *bls_core.Sign, headerHash common.Hash, From 38fe2a4422e07216b9506169373380b7a31ed989 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 18:34:52 -0700 Subject: [PATCH 6/6] fix comment --- consensus/view_change.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/view_change.go b/consensus/view_change.go index 2603f0e53..3ef66eb9d 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -131,7 +131,7 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) { if curTimestamp <= blockTimestamp { return consensus.fallbackNextViewID() } - // diff is at least 1, and it won't exceed the totalNode + // diff only increases diff := uint64((curTimestamp - blockTimestamp) / viewChangeTimeout) nextViewID := diff + consensus.GetCurBlockViewID()