From 611448a0a13fe2014ecf3fc5e61e625da8f536b6 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Sat, 24 Oct 2020 17:16:51 -0700 Subject: [PATCH 01/26] pipelining wireframe --- api/service/blockproposal/service.go | 9 +- cmd/harmony/main.go | 2 +- consensus/consensus.go | 7 +- consensus/consensus_msg_sender.go | 7 +- consensus/consensus_service.go | 26 +++--- consensus/consensus_v2.go | 134 ++++++++++++++++++++------- consensus/construct.go | 46 +++++---- consensus/double_sign.go | 4 +- consensus/engine/consensus_engine.go | 2 +- consensus/leader.go | 12 ++- consensus/threshold.go | 3 +- consensus/validator.go | 53 +++++++---- consensus/view_change.go | 2 + core/headerchain.go | 2 +- internal/chain/sig.go | 6 +- node/node.go | 4 +- node/node_handler.go | 6 +- node/node_newblock.go | 34 ++++--- node/service_setup.go | 2 +- shard/committee/assignment.go | 2 +- 20 files changed, 238 insertions(+), 125 deletions(-) diff --git a/api/service/blockproposal/service.go b/api/service/blockproposal/service.go index 02eb97efd..3ec4b398e 100644 --- a/api/service/blockproposal/service.go +++ b/api/service/blockproposal/service.go @@ -11,13 +11,14 @@ type Service struct { stopChan chan struct{} stoppedChan chan struct{} readySignal chan struct{} + commitSigsChan chan []byte messageChan chan *msg_pb.Message - waitForConsensusReady func(readySignal chan struct{}, stopChan chan struct{}, stoppedChan chan struct{}) + waitForConsensusReady func(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) } // New returns a block proposal service. -func New(readySignal chan struct{}, waitForConsensusReady func(readySignal chan struct{}, stopChan chan struct{}, stoppedChan chan struct{})) *Service { - return &Service{readySignal: readySignal, waitForConsensusReady: waitForConsensusReady} +func New(readySignal chan struct{}, commitSigsChan chan []byte, waitForConsensusReady func(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{})) *Service { + return &Service{readySignal: readySignal, commitSigsChan: commitSigsChan, waitForConsensusReady: waitForConsensusReady} } // StartService starts block proposal service. @@ -35,7 +36,7 @@ func (s *Service) Init() { // Run runs block proposal. func (s *Service) Run(stopChan chan struct{}, stoppedChan chan struct{}) { - s.waitForConsensusReady(s.readySignal, s.stopChan, s.stoppedChan) + s.waitForConsensusReady(s.readySignal, s.commitSigsChan, s.stopChan, s.stoppedChan) } // StopService stops block proposal service. diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index 6ff28d924..dd551ecc8 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -613,7 +613,7 @@ func setupConsensusAndNode(hc harmonyConfig, nodeConfig *nodeconfig.ConfigType) } // TODO: refactor the creation of blockchain out of node.New() - currentConsensus.ChainReader = currentNode.Blockchain() + currentConsensus.Blockchain = currentNode.Blockchain() currentNode.NodeConfig.DNSZone = hc.Network.DNSZone currentNode.NodeConfig.SetBeaconGroupID( diff --git a/consensus/consensus.go b/consensus/consensus.go index ee4e41102..b626bd324 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -54,8 +54,8 @@ type Consensus struct { multiSigBitmap *bls_cosi.Mask // Bitmap for parsing multisig bitmap from validators multiSigMutex sync.RWMutex - // The chain reader for the blockchain this consensus is working on - ChainReader *core.BlockChain + // The blockchain this consensus is working on + Blockchain *core.BlockChain // Minimal number of peers in the shard // If the number of validators is less than minPeers, the consensus won't start MinPeers int @@ -81,6 +81,8 @@ type Consensus struct { vc *viewChange // Signal channel for starting a new consensus process ReadySignal chan struct{} + // Channel to send full commit signatures to finish new block proposal + CommitSigChannel chan []byte // The post-consensus processing func passed from Node object // Called when consensus on a new block is done OnConsensusDone func(*types.Block) error @@ -207,6 +209,7 @@ func New( consensus.SlashChan = make(chan slash.Record) consensus.commitFinishChan = make(chan uint64) consensus.ReadySignal = make(chan struct{}) + consensus.CommitSigChannel = make(chan []byte) // channel for receiving newly generated VDF consensus.RndChannel = make(chan [vdfAndSeedSize]byte) consensus.IgnoreViewIDCheck = abool.NewBool(false) diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index 9d1da5149..bbd63a746 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -56,7 +56,7 @@ func (sender *MessageSender) Reset(blockNum uint64) { } // SendWithRetry sends message with retry logic. -func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte) error { +func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte, immediate bool) error { if sender.retryTimes != 0 { msgRetry := MessageRetry{blockNum: blockNum, groups: groups, p2pMsg: p2pMsg, msgType: msgType, retryCount: 0} atomic.StoreUint32(&msgRetry.isActive, 1) @@ -65,7 +65,10 @@ func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.Messa sender.Retry(&msgRetry) }() } - return sender.host.SendMessageToGroups(groups, p2pMsg) + if immediate { + return sender.host.SendMessageToGroups(groups, p2pMsg) + } + return nil } // SendWithoutRetry sends message without retry logic. diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index be5db99d8..153ca33e1 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -243,7 +243,7 @@ func (consensus *Consensus) ReadSignatureBitmapPayload( func (consensus *Consensus) getLeaderPubKeyFromCoinbase( header *block.Header, ) (*bls.PublicKeyWrapper, error) { - shardState, err := consensus.ChainReader.ReadShardState(header.Epoch()) + shardState, err := consensus.Blockchain.ReadShardState(header.Epoch()) if err != nil { return nil, errors.Wrapf(err, "cannot read shard state %v %s", header.Epoch(), @@ -257,7 +257,7 @@ func (consensus *Consensus) getLeaderPubKeyFromCoinbase( } committerKey := new(bls_core.PublicKey) - isStaking := consensus.ChainReader.Config().IsStaking(header.Epoch()) + isStaking := consensus.Blockchain.Config().IsStaking(header.Epoch()) for _, member := range committee.Slots { if isStaking { // After staking the coinbase address will be the address of bls public key @@ -293,7 +293,7 @@ func (consensus *Consensus) getLeaderPubKeyFromCoinbase( // (b) node in committed but has any err during processing: Syncing mode // (c) node in committed and everything looks good: Normal mode func (consensus *Consensus) UpdateConsensusInformation() Mode { - curHeader := consensus.ChainReader.CurrentHeader() + curHeader := consensus.Blockchain.CurrentHeader() curEpoch := curHeader.Epoch() nextEpoch := new(big.Int).Add(curHeader.Epoch(), common.Big1) @@ -308,18 +308,18 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { } } - consensus.BlockPeriod = 5 * time.Second + consensus.BlockPeriod = 3 * time.Second // Enable aggregate sig at epoch 1000 for mainnet, at epoch 53000 for testnet, and always for other nets. - if (consensus.ChainReader.Config().ChainID == params.MainnetChainID && curEpoch.Cmp(big.NewInt(1000)) > 0) || - (consensus.ChainReader.Config().ChainID == params.TestnetChainID && curEpoch.Cmp(big.NewInt(54500)) > 0) || - (consensus.ChainReader.Config().ChainID != params.MainnetChainID && consensus.ChainReader.Config().ChainID != params.TestChainID) { + if (consensus.Blockchain.Config().ChainID == params.MainnetChainID && curEpoch.Cmp(big.NewInt(1000)) > 0) || + (consensus.Blockchain.Config().ChainID == params.TestnetChainID && curEpoch.Cmp(big.NewInt(54500)) > 0) || + (consensus.Blockchain.Config().ChainID != params.MainnetChainID && consensus.Blockchain.Config().ChainID != params.TestChainID) { consensus.AggregateSig = true } - isFirstTimeStaking := consensus.ChainReader.Config().IsStaking(nextEpoch) && - curHeader.IsLastBlockInEpoch() && !consensus.ChainReader.Config().IsStaking(curEpoch) - haventUpdatedDecider := consensus.ChainReader.Config().IsStaking(curEpoch) && + isFirstTimeStaking := consensus.Blockchain.Config().IsStaking(nextEpoch) && + curHeader.IsLastBlockInEpoch() && !consensus.Blockchain.Config().IsStaking(curEpoch) + haventUpdatedDecider := consensus.Blockchain.Config().IsStaking(curEpoch) && consensus.Decider.Policy() != quorum.SuperMajorityStake // Only happens once, the flip-over to a new Decider policy @@ -335,7 +335,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { epochToSet := curEpoch hasError := false curShardState, err := committee.WithStakingEnabled.ReadFromDB( - curEpoch, consensus.ChainReader, + curEpoch, consensus.Blockchain, ) if err != nil { utils.Logger().Error(). @@ -351,7 +351,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { if curHeader.IsLastBlockInEpoch() && isNotGenesisBlock { nextShardState, err := committee.WithStakingEnabled.ReadFromDB( - nextEpoch, consensus.ChainReader, + nextEpoch, consensus.Blockchain, ) if err != nil { utils.Logger().Error(). @@ -555,7 +555,7 @@ func (consensus *Consensus) selfCommit(payload []byte) error { consensus.switchPhase("selfCommit", FBFTCommit) consensus.aggregatedPrepareSig = aggSig consensus.prepareBitmap = mask - commitPayload := signature.ConstructCommitPayload(consensus.ChainReader, + commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, block.Epoch(), block.Hash(), block.NumberU64(), block.Header().ViewID().Uint64()) for i, key := range consensus.priKey { if err := consensus.commitBitmap.SetKey(key.Pub.Bytes, true); err != nil { diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index aaee214a9..4b9459277 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -100,28 +100,28 @@ func (consensus *Consensus) HandleMessageUpdate(ctx context.Context, msg *msg_pb return nil } -func (consensus *Consensus) finalizeCommits() { +func (consensus *Consensus) finalCommit() { consensus.getLogger().Info(). Int64("NumCommits", consensus.Decider.SignersCount(quorum.Commit)). - Msg("[finalizeCommits] Finalizing Block") + Msg("[finalCommit] Finalizing Consensus") beforeCatchupNum := consensus.blockNum + leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() if err != nil { - consensus.getLogger().Error().Err(err).Msg("[FinalizeCommits] leader not found") + consensus.getLogger().Error().Err(err).Msg("[finalCommit] leader not found") return } // Construct committed message network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) if err != nil { consensus.getLogger().Warn().Err(err). - Msg("[FinalizeCommits] Unable to construct Committed message") + Msg("[finalCommit] Unable to construct Committed message") return } - msgToSend, aggSig, FBFTMsg := + msgToSend, FBFTMsg := network.Bytes, - network.OptionalAggregateSignature, network.FBFTMsg - consensus.aggregatedCommitSig = aggSig // this may not needed + commitSigAndBitmap := FBFTMsg.Payload // this may not needed consensus.FBFTLog.AddMessage(FBFTMsg) // find correct block content curBlockHash := consensus.blockHash @@ -129,31 +129,38 @@ func (consensus *Consensus) finalizeCommits() { if block == nil { consensus.getLogger().Warn(). Str("blockHash", hex.EncodeToString(curBlockHash[:])). - Msg("[FinalizeCommits] Cannot find block by hash") + Msg("[finalCommit] Cannot find block by hash") return } - consensus.tryCatchup() + consensus.commitBlock(block, FBFTMsg) + if consensus.blockNum-beforeCatchupNum != 1 { consensus.getLogger().Warn(). Uint64("beforeCatchupBlockNum", beforeCatchupNum). - Msg("[FinalizeCommits] Leader cannot provide the correct block for committed message") + Msg("[finalCommit] Leader cannot provide the correct block for committed message") return } // if leader success finalize the block, send committed message to validators + // TODO: once leader rotation is implemented, leader who is about to be switched out + // needs to send the committed message immediately so the next leader can + // have the full commit signatures for new block + // For now, the leader don't need to send immediately as the committed sig will be + // included in the next block and sent in next prepared message. + sendImmediately := false if err := consensus.msgSender.SendWithRetry( block.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, - p2p.ConstructMessage(msgToSend)); err != nil { - consensus.getLogger().Warn().Err(err).Msg("[finalizeCommits] Cannot send committed message") + p2p.ConstructMessage(msgToSend), sendImmediately); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message") } else { consensus.getLogger().Info(). Hex("blockHash", curBlockHash[:]). Uint64("blockNum", consensus.blockNum). - Msg("[finalizeCommits] Sent Committed Message") + Msg("[finalCommit] Sent Committed Message") } // Dump new block into level db @@ -164,9 +171,9 @@ func (consensus *Consensus) finalizeCommits() { if consensus.consensusTimeout[timeoutBootstrap].IsActive() { consensus.consensusTimeout[timeoutBootstrap].Stop() - consensus.getLogger().Info().Msg("[finalizeCommits] Start consensus timer; stop bootstrap timer only once") + consensus.getLogger().Info().Msg("[finalCommit] Start consensus timer; stop bootstrap timer only once") } else { - consensus.getLogger().Info().Msg("[finalizeCommits] Start consensus timer") + consensus.getLogger().Info().Msg("[finalCommit] Start consensus timer") } consensus.consensusTimeout[timeoutConsensus].Start() @@ -180,11 +187,11 @@ func (consensus *Consensus) finalizeCommits() { Msg("HOORAY!!!!!!! CONSENSUS REACHED!!!!!!!") // Sleep to wait for the full block time - consensus.getLogger().Info().Msg("[finalizeCommits] Waiting for Block Time") + consensus.getLogger().Info().Msg("[finalCommit] Waiting for Block Time") <-time.After(time.Until(consensus.NextBlockDue)) - // Send signal to Node to propose the new block for consensus - consensus.ReadySignal <- struct{}{} + // Send commit sig/bitmap to finish the new block proposal + consensus.CommitSigChannel <- commitSigAndBitmap // Update time due for next block consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) @@ -196,7 +203,7 @@ func (consensus *Consensus) BlockCommitSigs(blockNum uint64) ([]byte, error) { if consensus.blockNum <= 1 { return nil, nil } - lastCommits, err := consensus.ChainReader.ReadCommitSig(blockNum) + lastCommits, err := consensus.Blockchain.ReadCommitSig(blockNum) if err != nil || len(lastCommits) < bls.BLSSignatureSizeInBytes { msgs := consensus.FBFTLog.GetMessagesByTypeSeq( @@ -276,15 +283,15 @@ func (consensus *Consensus) Start( } case <-consensus.syncReadyChan: consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan") - consensus.SetBlockNum(consensus.ChainReader.CurrentHeader().Number().Uint64() + 1) - consensus.SetViewIDs(consensus.ChainReader.CurrentHeader().ViewID().Uint64() + 1) + consensus.SetBlockNum(consensus.Blockchain.CurrentHeader().Number().Uint64() + 1) + consensus.SetViewIDs(consensus.Blockchain.CurrentHeader().ViewID().Uint64() + 1) mode := consensus.UpdateConsensusInformation() consensus.current.SetMode(mode) consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") case <-consensus.syncNotReadyChan: consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncNotReadyChan") - consensus.SetBlockNum(consensus.ChainReader.CurrentHeader().Number().Uint64() + 1) + consensus.SetBlockNum(consensus.Blockchain.CurrentHeader().Number().Uint64() + 1) consensus.current.SetMode(Syncing) consensus.getLogger().Info().Msg("[ConsensusMainLoop] Node is OUT OF SYNC") @@ -296,8 +303,8 @@ func (consensus *Consensus) Start( //VRF/VDF is only generated in the beacon chain if consensus.NeedsRandomNumberGeneration(newBlock.Header().Epoch()) { // generate VRF if the current block has a new leader - if !consensus.ChainReader.IsSameLeaderAsPreviousBlock(newBlock) { - vrfBlockNumbers, err := consensus.ChainReader.ReadEpochVrfBlockNums(newBlock.Header().Epoch()) + if !consensus.Blockchain.IsSameLeaderAsPreviousBlock(newBlock) { + vrfBlockNumbers, err := consensus.Blockchain.ReadEpochVrfBlockNums(newBlock.Header().Epoch()) if err != nil { consensus.getLogger().Info(). Uint64("MsgBlockNum", newBlock.NumberU64()). @@ -328,7 +335,7 @@ func (consensus *Consensus) Start( if (!vdfInProgress) && len(vrfBlockNumbers) >= consensus.VdfSeedSize() { //check local database to see if there's a VDF generated for this epoch //generate a VDF if no blocknum is available - _, err := consensus.ChainReader.ReadEpochVdfBlockNum(newBlock.Header().Epoch()) + _, err := consensus.Blockchain.ReadEpochVdfBlockNum(newBlock.Header().Epoch()) if err != nil { consensus.GenerateVdfAndProof(newBlock, vrfBlockNumbers) vdfInProgress = true @@ -349,7 +356,7 @@ func (consensus *Consensus) Start( Msg("[ConsensusMainLoop] failed to verify the VDF output") } else { //write the VDF only if VDF has not been generated - _, err := consensus.ChainReader.ReadEpochVdfBlockNum(newBlock.Header().Epoch()) + _, err := consensus.Blockchain.ReadEpochVdfBlockNum(newBlock.Header().Epoch()) if err == nil { consensus.getLogger().Info(). Uint64("MsgBlockNum", newBlock.NumberU64()). @@ -385,7 +392,7 @@ func (consensus *Consensus) Start( consensus.mutex.Lock() defer consensus.mutex.Unlock() if viewID == consensus.GetCurBlockViewID() { - consensus.finalizeCommits() + consensus.finalCommit() } }() @@ -465,6 +472,54 @@ func (consensus *Consensus) getLastMileBlocksAndMsg(bnStart uint64) ([]*types.Bl return blocks, msgs, nil } +// preCommitAndPropose commit the current block with 67% commit signatures and start +// proposing new block which will wait on the full commit signatures to finish +func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { + if blk == nil { + return errors.New("block to pre-commit is nil") + } + + leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() + if err != nil { + return err + } + + network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) + if err != nil { + return errors.Wrap(err, "[preCommitAndPropose] Unable to construct Committed message") + } + + msgToSend, FBFTMsg := + network.Bytes, + network.FBFTMsg + consensus.FBFTLog.AddMessage(FBFTMsg) + + blk.SetCurrentCommitSig(FBFTMsg.Payload) + if err := consensus.OnConsensusDone(blk); err != nil { + consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] Failed to add block to chain") + return err + } + + // if leader success finalize the block, send committed message to validators + if err := consensus.msgSender.SendWithRetry( + blk.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend), true); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") + } else { + consensus.getLogger().Info(). + Str("blockHash", blk.Hash().Hex()). + Uint64("blockNum", consensus.blockNum). + Msg("[preCommitAndPropose] Sent Committed Message") + } + + // Send signal to Node to propose the new block for consensus + consensus.ReadySignal <- struct{}{} + return nil +} + // tryCatchup add the last mile block in PBFT log memory cache to blockchain. func (consensus *Consensus) tryCatchup() error { // TODO: change this to a more systematic symbol @@ -510,14 +565,22 @@ func (consensus *Consensus) tryCatchup() error { } func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMessage) error { - if err := consensus.OnConsensusDone(blk); err != nil { - return err + if consensus.Blockchain.CurrentBlock().NumberU64() < blk.NumberU64() { + if err := consensus.OnConsensusDone(blk); err != nil { + return err + } } + if !committedMsg.HasSingleSender() { consensus.getLogger().Error().Msg("[TryCatchup] Leader message can not have multiple sender keys") return errIncorrectSender } + consensus.SetupForNewConsensus(blk, committedMsg) + return nil +} + +func (consensus *Consensus) SetupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) { atomic.AddUint64(&consensus.blockNum, 1) consensus.SetCurBlockViewID(committedMsg.ViewID + 1) consensus.LeaderPubKey = committedMsg.SenderPubkeys[0] @@ -526,7 +589,6 @@ func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMess consensus.SetMode(consensus.UpdateConsensusInformation()) } consensus.ResetState() - return nil } func (consensus *Consensus) postCatchup(initBN uint64) { @@ -557,7 +619,7 @@ func (consensus *Consensus) GenerateVrfAndProof(newBlock *types.Block, vrfBlockN } sk := vrf_bls.NewVRFSigner(key.Pri) blockHash := [32]byte{} - previousHeader := consensus.ChainReader.GetHeaderByNumber( + previousHeader := consensus.Blockchain.GetHeaderByNumber( newBlock.NumberU64() - 1, ) if previousHeader == nil { @@ -582,7 +644,7 @@ func (consensus *Consensus) GenerateVrfAndProof(newBlock *types.Block, vrfBlockN func (consensus *Consensus) ValidateVrfAndProof(headerObj *block.Header) bool { vrfPk := vrf_bls.NewVRFVerifier(consensus.LeaderPubKey.Object) var blockHash [32]byte - previousHeader := consensus.ChainReader.GetHeaderByNumber( + previousHeader := consensus.Blockchain.GetHeaderByNumber( headerObj.Number().Uint64() - 1, ) if previousHeader == nil { @@ -610,7 +672,7 @@ func (consensus *Consensus) ValidateVrfAndProof(headerObj *block.Header) bool { return false } - vrfBlockNumbers, _ := consensus.ChainReader.ReadEpochVrfBlockNums( + vrfBlockNumbers, _ := consensus.Blockchain.ReadEpochVrfBlockNums( headerObj.Epoch(), ) consensus.getLogger().Info(). @@ -626,7 +688,7 @@ func (consensus *Consensus) GenerateVdfAndProof(newBlock *types.Block, vrfBlockN //derive VDF seed from VRFs generated in the current epoch seed := [32]byte{} for i := 0; i < consensus.VdfSeedSize(); i++ { - previousVrf := consensus.ChainReader.GetVrfByNumber(vrfBlockNumbers[i]) + previousVrf := consensus.Blockchain.GetVrfByNumber(vrfBlockNumbers[i]) for j := 0; j < len(seed); j++ { seed[j] = seed[j] ^ previousVrf[j] } @@ -660,7 +722,7 @@ func (consensus *Consensus) GenerateVdfAndProof(newBlock *types.Block, vrfBlockN // ValidateVdfAndProof validates the VDF/proof in the current epoch func (consensus *Consensus) ValidateVdfAndProof(headerObj *block.Header) bool { - vrfBlockNumbers, err := consensus.ChainReader.ReadEpochVrfBlockNums(headerObj.Epoch()) + vrfBlockNumbers, err := consensus.Blockchain.ReadEpochVrfBlockNums(headerObj.Epoch()) if err != nil { consensus.getLogger().Error().Err(err). Str("MsgBlockNum", headerObj.Number().String()). @@ -675,7 +737,7 @@ func (consensus *Consensus) ValidateVdfAndProof(headerObj *block.Header) bool { seed := [32]byte{} for i := 0; i < consensus.VdfSeedSize(); i++ { - previousVrf := consensus.ChainReader.GetVrfByNumber(vrfBlockNumbers[i]) + previousVrf := consensus.Blockchain.GetVrfByNumber(vrfBlockNumbers[i]) for j := 0; j < len(seed); j++ { seed[j] = seed[j] ^ previousVrf[j] } diff --git a/consensus/construct.go b/consensus/construct.go index 1bb334f62..f405cbdf1 100644 --- a/consensus/construct.go +++ b/consensus/construct.go @@ -98,16 +98,8 @@ func (consensus *Consensus) construct( // Do the signing, 96 byte of bls signature needMsgSig := true switch p { - case msg_pb.MessageType_PREPARED: - consensusMsg.Block = consensus.block - // Payload - buffer := bytes.Buffer{} - // 96 bytes aggregated signature - aggSig = consensus.Decider.AggregateVotes(quorum.Prepare) - buffer.Write(aggSig.Serialize()) - // Bitmap - buffer.Write(consensus.prepareBitmap.Bitmap) - consensusMsg.Payload = buffer.Bytes() + case msg_pb.MessageType_ANNOUNCE: + consensusMsg.Payload = consensus.blockHash[:] case msg_pb.MessageType_PREPARE: needMsgSig = false sig := bls_core.Sign{} @@ -126,16 +118,11 @@ func (consensus *Consensus) construct( } } consensusMsg.Payload = sig.Serialize() + case msg_pb.MessageType_PREPARED: + consensusMsg.Block = consensus.block + consensusMsg.Payload = consensus.constructQuorumSigAndBitmap(quorum.Prepare) case msg_pb.MessageType_COMMITTED: - buffer := bytes.Buffer{} - // 96 bytes aggregated signature - aggSig = consensus.Decider.AggregateVotes(quorum.Commit) - buffer.Write(aggSig.Serialize()) - // Bitmap - buffer.Write(consensus.commitBitmap.Bitmap) - consensusMsg.Payload = buffer.Bytes() - case msg_pb.MessageType_ANNOUNCE: - consensusMsg.Payload = consensus.blockHash[:] + consensusMsg.Payload = consensus.constructQuorumSigAndBitmap(quorum.Commit) } var marshaledMessage []byte @@ -171,3 +158,24 @@ func (consensus *Consensus) construct( OptionalAggregateSignature: aggSig, }, nil } + +// constructQuorumSigAndBitmap constructs the aggregated sig and bitmap as +// a byte slice in format of: [[aggregated sig], [sig bitmap]] +func (consensus *Consensus) constructQuorumSigAndBitmap(p quorum.Phase) []byte { + buffer := bytes.Buffer{} + // 96 bytes aggregated signature + aggSig := consensus.Decider.AggregateVotes(p) + buffer.Write(aggSig.Serialize()) + // Bitmap + if p == quorum.Prepare { + buffer.Write(consensus.prepareBitmap.Bitmap) + } else if p == quorum.Commit { + buffer.Write(consensus.commitBitmap.Bitmap) + } else { + utils.Logger().Error(). + Str("phase", p.String()). + Msg("[constructQuorumSigAndBitmap] Invalid phase is supplied.") + return []byte{} + } + return buffer.Bytes() +} diff --git a/consensus/double_sign.go b/consensus/double_sign.go index d11044afd..86dfd050e 100644 --- a/consensus/double_sign.go +++ b/consensus/double_sign.go @@ -40,8 +40,8 @@ func (consensus *Consensus) checkDoubleSign(recvMsg *FBFTMessage) bool { return true } - curHeader := consensus.ChainReader.CurrentHeader() - committee, err := consensus.ChainReader.ReadShardState(curHeader.Epoch()) + curHeader := consensus.Blockchain.CurrentHeader() + committee, err := consensus.Blockchain.ReadShardState(curHeader.Epoch()) if err != nil { consensus.getLogger().Err(err). Uint32("shard", consensus.ShardID). diff --git a/consensus/engine/consensus_engine.go b/consensus/engine/consensus_engine.go index f1f6bb46d..af6011506 100644 --- a/consensus/engine/consensus_engine.go +++ b/consensus/engine/consensus_engine.go @@ -15,7 +15,7 @@ import ( staking "github.com/harmony-one/harmony/staking/types" ) -// ChainReader defines a collection of methods needed to access the local +// Blockchain defines a collection of methods needed to access the local // blockchain during header and/or uncle verification. // Note this reader interface is still in process of being integrated with the BFT consensus. type ChainReader interface { diff --git a/consensus/leader.go b/consensus/leader.go index c473d5138..d67301311 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -48,7 +48,6 @@ func (consensus *Consensus) announce(block *types.Block) { } msgToSend, FPBTMsg := networkMessage.Bytes, networkMessage.FBFTMsg - // TODO(chao): review FPBT log data structure consensus.FBFTLog.AddMessage(FPBTMsg) consensus.getLogger().Debug(). Str("MsgBlockHash", FPBTMsg.BlockHash.Hex()). @@ -81,7 +80,7 @@ func (consensus *Consensus) announce(block *types.Block) { if err := consensus.msgSender.SendWithRetry( consensus.blockNum, msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, p2p.ConstructMessage(msgToSend)); err != nil { + }, p2p.ConstructMessage(msgToSend), true); err != nil { consensus.getLogger().Warn(). Str("groupID", string(nodeconfig.NewGroupIDByShardID( nodeconfig.ShardID(consensus.ShardID), @@ -200,6 +199,9 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { } func (consensus *Consensus) onCommit(msg *msg_pb.Message) { + if consensus.GetViewChangingID() == 10 { + return + } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { consensus.getLogger().Debug().Err(err).Msg("[OnCommit] Parse pbft message failed") @@ -242,7 +244,7 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { Msg("[OnCommit] Failed finding a matching block for committed message") return } - commitPayload := signature.ConstructCommitPayload(consensus.ChainReader, + commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, blockObj.Epoch(), blockObj.Hash(), blockObj.NumberU64(), blockObj.Header().ViewID().Uint64()) logger = logger.With(). Uint64("MsgViewID", recvMsg.ViewID). @@ -301,6 +303,10 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { if !quorumWasMet && quorumIsMet { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") + go func() { + consensus.preCommitAndPropose(blockObj) + }() + consensus.getLogger().Info().Msg("[OnCommit] Starting Grace Period") go func(viewID uint64) { time.Sleep(2500 * time.Millisecond) diff --git a/consensus/threshold.go b/consensus/threshold.go index c1ca8c6f1..f9ced1139 100644 --- a/consensus/threshold.go +++ b/consensus/threshold.go @@ -46,7 +46,7 @@ func (consensus *Consensus) didReachPrepareQuorum() error { Msg("[didReachPrepareQuorum] Unparseable block data") return err } - commitPayload := signature.ConstructCommitPayload(consensus.ChainReader, + commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, blockObj.Epoch(), blockObj.Hash(), blockObj.NumberU64(), blockObj.Header().ViewID().Uint64()) // so by this point, everyone has committed to the blockhash of this block @@ -74,6 +74,7 @@ func (consensus *Consensus) didReachPrepareQuorum() error { nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, p2p.ConstructMessage(msgToSend), + true, ); err != nil { consensus.getLogger().Warn().Msg("[OnPrepare] Cannot send prepared message") } else { diff --git a/consensus/validator.go b/consensus/validator.go index 1f378c7bd..7bfc1ad1f 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -81,7 +81,7 @@ func (consensus *Consensus) sendCommitMessages(blockObj *types.Block) { priKeys := consensus.getPriKeysInCommittee() // Sign commit signature on the received block and construct the p2p messages - commitPayload := signature.ConstructCommitPayload(consensus.ChainReader, + commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, blockObj.Epoch(), blockObj.Hash(), blockObj.NumberU64(), blockObj.Header().ViewID().Uint64()) p2pMsgs := consensus.constructP2pMessages(msg_pb.MessageType_COMMIT, commitPayload, priKeys) @@ -156,19 +156,6 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - consensus.FBFTLog.AddBlock(&blockObj) - // add block field - blockPayload := make([]byte, len(recvMsg.Block)) - copy(blockPayload[:], recvMsg.Block[:]) - consensus.block = blockPayload - recvMsg.Block = []byte{} // save memory space - consensus.FBFTLog.AddMessage(recvMsg) - consensus.getLogger().Debug(). - Uint64("MsgViewID", recvMsg.ViewID). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Hex("blockHash", recvMsg.BlockHash[:]). - Msg("[OnPrepared] Prepared message and block added") - // tryCatchup is also run in onCommitted(), so need to lock with commitMutex. if consensus.current.Mode() != Normal { // don't sign the block that is not verified @@ -185,6 +172,19 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { } consensus.FBFTLog.MarkBlockVerified(&blockObj) + consensus.FBFTLog.AddBlock(&blockObj) + // add block field + blockPayload := make([]byte, len(recvMsg.Block)) + copy(blockPayload[:], recvMsg.Block[:]) + consensus.block = blockPayload + recvMsg.Block = []byte{} // save memory space + consensus.FBFTLog.AddMessage(recvMsg) + consensus.getLogger().Debug(). + Uint64("MsgViewID", recvMsg.ViewID). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Hex("blockHash", recvMsg.BlockHash[:]). + Msg("[OnPrepared] Prepared message and block added") + if consensus.checkViewID(recvMsg) != nil { if consensus.current.Mode() == Normal { consensus.getLogger().Debug(). @@ -227,8 +227,12 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.getLogger().Warn().Msg("[OnCommitted] unable to parse msg") return } - // NOTE let it handle its own logs - if !consensus.isRightBlockNumCheck(recvMsg) { + // It's ok to receive committed message for last block due to pipelining. + // The committed message for last block could include more signatures now. + if recvMsg.BlockNum < consensus.blockNum-1 { + consensus.getLogger().Debug(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Msg("Wrong BlockNum Received, ignoring!") return } if recvMsg.BlockNum > consensus.blockNum { @@ -256,7 +260,7 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Msg("[OnCommitted] Failed finding a matching block for committed message") return } - commitPayload := signature.ConstructCommitPayload(consensus.ChainReader, + commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, blockObj.Epoch(), blockObj.Hash(), blockObj.NumberU64(), blockObj.Header().ViewID().Uint64()) if !aggSig.VerifyHash(mask.AggregatePublic, commitPayload) { consensus.getLogger().Error(). @@ -273,6 +277,21 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.aggregatedCommitSig = aggSig consensus.commitBitmap = mask + // If we already have a committed signature received before, check whether the new one + // has more signatures and if yes, override the old data. + // Otherwise, simply write the commit signature in db. + commitSigBitmap, err := consensus.Blockchain.ReadCommitSig(blockObj.NumberU64()) + if err == nil && len(commitSigBitmap) == len(recvMsg.Payload) { + new := mask.CountEnabled() + mask.SetMask(commitSigBitmap[bls.BLSSignatureSizeInBytes:]) + cur := mask.CountEnabled() + if new > cur { + consensus.Blockchain.WriteCommitSig(blockObj.NumberU64(), recvMsg.Payload) + } + } else { + consensus.Blockchain.WriteCommitSig(blockObj.NumberU64(), recvMsg.Payload) + } + consensus.tryCatchup() if recvMsg.BlockNum > consensus.blockNum { consensus.getLogger().Info().Uint64("MsgBlockNum", recvMsg.BlockNum).Msg("[OnCommitted] OUT OF SYNC") diff --git a/consensus/view_change.go b/consensus/view_change.go index e8c792402..d9235adad 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -163,6 +163,7 @@ func (consensus *Consensus) startViewChange(viewID uint64) { []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), + true, ); err != nil { consensus.getLogger().Err(err). Msg("could not send out the ViewChange message") @@ -184,6 +185,7 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), + true, ); err != nil { return errors.New("failed to send out the NewView message") } diff --git a/core/headerchain.go b/core/headerchain.go index 1838ed190..7c077d8a0 100644 --- a/core/headerchain.go +++ b/core/headerchain.go @@ -525,7 +525,7 @@ func (hc *HeaderChain) Config() *params.ChainConfig { return hc.config } // Engine retrieves the header chain's consensus engine. func (hc *HeaderChain) Engine() consensus_engine.Engine { return hc.engine } -// GetBlock implements consensus.ChainReader, and returns nil for every input as +// GetBlock implements consensus.Blockchain, and returns nil for every input as // a header chain does not have blocks available for retrieval. func (hc *HeaderChain) GetBlock(hash common.Hash, number uint64) *types.Block { return nil diff --git a/internal/chain/sig.go b/internal/chain/sig.go index 38d2b461d..98bb47b3e 100644 --- a/internal/chain/sig.go +++ b/internal/chain/sig.go @@ -11,15 +11,15 @@ import ( // ReadSignatureBitmapByPublicKeys read the payload of signature and bitmap based on public keys func ReadSignatureBitmapByPublicKeys(recvPayload []byte, publicKeys []bls.PublicKeyWrapper) (*bls_core.Sign, *bls.Mask, error) { - if len(recvPayload) < 96 { + if len(recvPayload) < bls.BLSSignatureSizeInBytes { return nil, nil, errors.New("payload not have enough length") } payload := append(recvPayload[:0:0], recvPayload...) //#### Read payload data // 96 byte of multi-sig offset := 0 - multiSig := payload[offset : offset+96] - offset += 96 + multiSig := payload[offset : offset+bls.BLSSignatureSizeInBytes] + offset += bls.BLSSignatureSizeInBytes // bitmap bitmap := payload[offset:] //#### END Read payload data diff --git a/node/node.go b/node/node.go index 3fa0ef3e7..2111fa0ee 100644 --- a/node/node.go +++ b/node/node.go @@ -1053,7 +1053,7 @@ func (node *Node) InitConsensusWithValidators() (err error) { Uint64("epoch", epoch.Uint64()). Msg("[InitConsensusWithValidators] Try To Get PublicKeys") shardState, err := committee.WithStakingEnabled.Compute( - epoch, node.Consensus.ChainReader, + epoch, node.Consensus.Blockchain, ) if err != nil { utils.Logger().Err(err). @@ -1159,7 +1159,7 @@ func (node *Node) populateSelfAddresses(epoch *big.Int) { node.keysToAddrsEpoch = epoch shardID := node.Consensus.ShardID - shardState, err := node.Consensus.ChainReader.ReadShardState(epoch) + shardState, err := node.Consensus.Blockchain.ReadShardState(epoch) if err != nil { utils.Logger().Error().Err(err). Int64("epoch", epoch.Int64()). diff --git a/node/node_handler.go b/node/node_handler.go index 037a667c1..ee10a9de3 100644 --- a/node/node_handler.go +++ b/node/node_handler.go @@ -241,7 +241,7 @@ func (node *Node) BroadcastCrossLink() { node.host.SendMessageToGroups( []nodeconfig.GroupID{nodeconfig.NewGroupIDByShardID(shard.BeaconChainShardID)}, p2p.ConstructMessage( - proto_node.ConstructCrossLinkMessage(node.Consensus.ChainReader, headers)), + proto_node.ConstructCrossLinkMessage(node.Consensus.Blockchain, headers)), ) } @@ -400,11 +400,11 @@ func (node *Node) PostConsensusProcessing(newBlock *types.Block) error { for _, addr := range node.GetAddresses(newBlock.Epoch()) { wrapper, err := node.Beaconchain().ReadValidatorInformation(addr) if err != nil { - return err + return nil } snapshot, err := node.Beaconchain().ReadValidatorSnapshot(addr) if err != nil { - return err + return nil } computed := availability.ComputeCurrentSigning( snapshot.Validator, wrapper, diff --git a/node/node_newblock.go b/node/node_newblock.go index fcfd45461..26227d7fc 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -6,6 +6,8 @@ import ( "strings" "time" + "github.com/harmony-one/harmony/crypto/bls" + staking "github.com/harmony-one/harmony/staking/types" "github.com/ethereum/go-ethereum/common" @@ -23,14 +25,13 @@ const ( // WaitForConsensusReadyV2 listen for the readiness signal from consensus and generate new block for consensus. // only leader will receive the ready signal -func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, stopChan chan struct{}, stoppedChan chan struct{}) { +func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) { go func() { // Setup stoppedChan defer close(stoppedChan) utils.Logger().Debug(). Msg("Waiting for Consensus ready") - // TODO: make local net start faster time.Sleep(30 * time.Second) // Wait for other nodes to be ready (test-only) for { @@ -48,20 +49,27 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, stopChan ch Msg("PROPOSING NEW BLOCK ------------------------------------------------") // Prepare last commit signatures - commitSigs := make(chan []byte) - sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) + newCommitSigsChan := make(chan []byte) - node.Consensus.StartFinalityCount() - if err != nil { - utils.Logger().Error().Err(err).Msg("[ProposeNewBlock] Cannot get commit signatures from last block") - break - } - // Currently the block proposal is not triggered asynchronously yet with last consensus. - // TODO: trigger block proposal when 66% commit, and feed and final commit sigs here. go func() { - commitSigs <- sigs + select { + case commitSigs := <-commitSigsChan: + if len(commitSigs) > bls.BLSSignatureSizeInBytes { + newCommitSigsChan <- commitSigs + } + case <-time.After(5 * time.Second): + sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) + + if err != nil { + utils.Logger().Error().Err(err).Msg("[ProposeNewBlock] Cannot get commit signatures from last block") + } else { + newCommitSigsChan <- sigs + } + + } }() - newBlock, err := node.ProposeNewBlock(commitSigs) + node.Consensus.StartFinalityCount() + newBlock, err := node.ProposeNewBlock(newCommitSigsChan) if err == nil { utils.Logger().Info(). diff --git a/node/service_setup.go b/node/service_setup.go index 9b0000e38..ed55607db 100644 --- a/node/service_setup.go +++ b/node/service_setup.go @@ -30,7 +30,7 @@ func (node *Node) setupForValidator() { // Register new block service. node.serviceManager.RegisterService( service.BlockProposal, - blockproposal.New(node.Consensus.ReadySignal, node.WaitForConsensusReadyV2), + blockproposal.New(node.Consensus.ReadySignal, node.Consensus.CommitSigChannel, node.WaitForConsensusReadyV2), ) } diff --git a/shard/committee/assignment.go b/shard/committee/assignment.go index 32c1f9fa7..a2ee08c3c 100644 --- a/shard/committee/assignment.go +++ b/shard/committee/assignment.go @@ -227,7 +227,7 @@ func IsEligibleForEPoSAuction(snapshot *staking.ValidatorSnapshot, validator *st } } -// ChainReader is a subset of Engine.ChainReader, just enough to do assignment +// Blockchain is a subset of Engine.Blockchain, just enough to do assignment type ChainReader interface { // ReadShardState retrieves sharding state given the epoch number. // This api reads the shard state cached or saved on the chaindb. From ee3dd70676dd08c4da06347d30618c1859e2de8c Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Mon, 26 Oct 2020 18:02:26 -0700 Subject: [PATCH 02/26] merge --- consensus/consensus_msg_sender.go | 1 + consensus/consensus_v2.go | 4 ++ consensus/leader.go | 7 +- consensus/quorum/quorum.go | 19 +++++ consensus/validator.go | 6 ++ consensus/view_change.go | 116 ++++++++++++++++++++++++++++-- node/node_newblock.go | 3 +- 7 files changed, 146 insertions(+), 10 deletions(-) diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index bbd63a746..b43b69306 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -115,6 +115,7 @@ func (sender *MessageSender) StopRetry(msgType msg_pb.MessageType) { if ok { msgRetry := data.(*MessageRetry) atomic.StoreUint32(&msgRetry.isActive, 0) + utils.Logger().Info().Str("type", msgType.String()).Uint32("isActive", msgRetry.isActive).Msg("STOPPING RETRY") } } diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 4b9459277..9b02635e7 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -142,6 +142,7 @@ func (consensus *Consensus) finalCommit() { return } + // if leader success finalize the block, send committed message to validators // TODO: once leader rotation is implemented, leader who is about to be switched out // needs to send the committed message immediately so the next leader can @@ -484,6 +485,9 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return err } + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) if err != nil { return errors.Wrap(err, "[preCommitAndPropose] Unable to construct Committed message") diff --git a/consensus/leader.go b/consensus/leader.go index d67301311..fa9da37c5 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -3,6 +3,8 @@ package consensus import ( "time" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/crypto/bls" nodeconfig "github.com/harmony-one/harmony/internal/configs/node" @@ -199,8 +201,9 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { } func (consensus *Consensus) onCommit(msg *msg_pb.Message) { - if consensus.GetViewChangingID() == 10 { - return + utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) + if consensus.GetCurBlockViewID() == 10 { + //return } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index 6f9c68678..ddca4bcd9 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -2,6 +2,7 @@ package quorum import ( "fmt" + "github.com/harmony-one/harmony/internal/configs/sharding" "math/big" "github.com/harmony-one/harmony/crypto/bls" @@ -73,6 +74,7 @@ type ParticipantTracker interface { IndexOf(bls.SerializedPublicKey) int ParticipantsCount() int64 NthNext(*bls.PublicKeyWrapper, int) (bool, *bls.PublicKeyWrapper) + NthNextHmy(shardingconfig.Instance, *bls.PublicKeyWrapper, int) (bool, *bls.PublicKeyWrapper) UpdateParticipants(pubKeys []bls.PublicKeyWrapper) } @@ -218,6 +220,23 @@ func (s *cIdentities) NthNext(pubKey *bls.PublicKeyWrapper, next int) (bool, *bl return found, &s.publicKeys[idx] } +// NthNextHmy return the Nth next pubkey of Harmony nodes, next can be negative number +func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.PublicKeyWrapper, next int) (bool, *bls.PublicKeyWrapper) { + found := false + + idx := s.IndexOf(pubKey.Bytes) + if idx != -1 { + found = true + } + numNodes := instance.NumHarmonyOperatedNodesPerShard() + // sanity check to avoid out of bound access + if numNodes <= 0 || numNodes > len(s.publicKeys) { + numNodes = len(s.publicKeys) + } + idx = (idx + next) % numNodes + return found, &s.publicKeys[idx] +} + func (s *cIdentities) Participants() multibls.PublicKeys { return s.publicKeys } diff --git a/consensus/validator.go b/consensus/validator.go index 7bfc1ad1f..55f6c1f1d 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -227,6 +227,12 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.getLogger().Warn().Msg("[OnCommitted] unable to parse msg") return } + + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message") + // It's ok to receive committed message for last block due to pipelining. // The committed message for last block could include more signatures now. if recvMsg.BlockNum < consensus.blockNum-1 { diff --git a/consensus/view_change.go b/consensus/view_change.go index d9235adad..4c71d2197 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -1,6 +1,8 @@ package consensus import ( + "github.com/harmony-one/harmony/shard" + "math/big" "sync" "time" @@ -90,20 +92,122 @@ func (pm *State) GetViewChangeDuraion() time.Duration { return time.Duration(diff * diff * int64(viewChangeDuration)) } -// GetNextLeaderKey uniquely determine who is the leader for given viewID -func (consensus *Consensus) GetNextLeaderKey(viewID uint64) *bls.PublicKeyWrapper { +// fallbackNextViewID return the next view ID and duration when there is an exception +// to calculate the time-based viewId +func (consensus *Consensus) fallbackNextViewID() (uint64, time.Duration) { + diff := int64(consensus.GetViewChangingID() + 1 - consensus.GetCurBlockViewID()) + if diff <= 0 { + diff = int64(1) + } + consensus.getLogger().Error(). + Int64("diff", diff). + Msg("[fallbackNextViewID] use legacy viewID algorithm") + return consensus.GetViewChangingID() + 1, time.Duration(diff * diff * int64(viewChangeDuration)) +} + +// getNextViewID return the next view ID based on the timestamp +// The next view ID is calculated based on the difference of validator's timestamp +// and the block's timestamp. So that it can be deterministic to return the next view ID +// only based on the blockchain block and the validator's current timestamp. +// The next view ID is the single factor used to determine +// the next leader, so it is mod the number of nodes per shard. +// It returns the next viewID and duration of the view change +// The view change duration is a fixed duration now to avoid stuck into offline nodes during +// the view change. +// viewID is only used as the fallback mechansim to determine the nextViewID +func (consensus *Consensus) getNextViewID() (uint64, time.Duration) { + // handle corner case at first + if consensus.Blockchain == nil { + return consensus.fallbackNextViewID() + } + curHeader := consensus.Blockchain.CurrentHeader() + if curHeader == nil { + return consensus.fallbackNextViewID() + } + blockTimestamp := curHeader.Time().Int64() + curTimestamp := time.Now().Unix() + + // timestamp messed up in current validator node + if curTimestamp <= blockTimestamp { + return consensus.fallbackNextViewID() + } + totalNode := consensus.Decider.ParticipantsCount() + // diff is at least 1, and it won't exceed the totalNode + diff := uint64(((curTimestamp - blockTimestamp) / viewChangeTimeout) % int64(totalNode)) + nextViewID := diff + consensus.GetCurBlockViewID() + + consensus.getLogger().Info(). + Int64("curTimestamp", curTimestamp). + Int64("blockTimestamp", blockTimestamp). + Uint64("nextViewID", nextViewID). + Uint64("curViewID", consensus.GetCurBlockViewID()). + Msg("[getNextViewID]") + + // duration is always the fixed view change duration for synchronous view change + return nextViewID, viewChangeDuration +} + +// getNextLeaderKey uniquely determine who is the leader for given viewID +// It reads the current leader's pubkey based on the blockchain data and returns +// the next leader based on the gap of the viewID of the view change and the last +// know view id of the block. +func (consensus *Consensus) getNextLeaderKey(viewID uint64) *bls.PublicKeyWrapper { gap := 1 + + if viewID > consensus.GetCurBlockViewID() { + gap = int(viewID - consensus.GetCurBlockViewID()) + } + var lastLeaderPubKey *bls.PublicKeyWrapper + var err error + epoch := big.NewInt(0) + if consensus.Blockchain == nil { + consensus.getLogger().Error().Msg("[getNextLeaderKey] ChainReader is nil. Use consensus.LeaderPubKey") + lastLeaderPubKey = consensus.LeaderPubKey + } else { + curHeader := consensus.Blockchain.CurrentHeader() + if curHeader == nil { + consensus.getLogger().Error().Msg("[getNextLeaderKey] Failed to get current header from blockchain") + lastLeaderPubKey = consensus.LeaderPubKey + } else { + // this is the truth of the leader based on blockchain blocks + lastLeaderPubKey, err = consensus.getLeaderPubKeyFromCoinbase(curHeader) + if err != nil || lastLeaderPubKey == nil { + consensus.getLogger().Error().Err(err). + Msg("[getNextLeaderKey] Unable to get leaderPubKey from coinbase. Set it to consensus.LeaderPubKey") + lastLeaderPubKey = consensus.LeaderPubKey + } + epoch = curHeader.Epoch() + // viewchange happened at the first block of new epoch + // use the LeaderPubKey as the base of the next leader + // as we shouldn't use lastLeader from coinbase as the base. + // The LeaderPubKey should be updated to the index 0 of the committee + if curHeader.IsLastBlockInEpoch() { + consensus.getLogger().Info().Msg("[getNextLeaderKey] view change in the first block of new epoch") + lastLeaderPubKey = consensus.LeaderPubKey + } + } + } consensus.getLogger().Info(). + Str("lastLeaderPubKey", lastLeaderPubKey.Bytes.Hex()). Str("leaderPubKey", consensus.LeaderPubKey.Bytes.Hex()). + Int("gap", gap). Uint64("newViewID", viewID). Uint64("myCurBlockViewID", consensus.GetCurBlockViewID()). - Msg("[GetNextLeaderKey] got leaderPubKey from coinbase") - wasFound, next := consensus.Decider.NthNext(consensus.LeaderPubKey, gap) + Msg("[getNextLeaderKey] got leaderPubKey from coinbase") + // wasFound, next := consensus.Decider.NthNext(lastLeaderPubKey, gap) + // FIXME: rotate leader on harmony nodes only before fully externalization + wasFound, next := consensus.Decider.NthNextHmy( + shard.Schedule.InstanceForEpoch(epoch), + lastLeaderPubKey, + gap) if !wasFound { consensus.getLogger().Warn(). Str("key", consensus.LeaderPubKey.Bytes.Hex()). - Msg("GetNextLeaderKey: currentLeaderKey not found") + Msg("[getNextLeaderKey] currentLeaderKey not found") } + consensus.getLogger().Info(). + Str("nextLeader", next.Bytes.Hex()). + Msg("[getNextLeaderKey] next Leader") return next } @@ -125,7 +229,7 @@ func (consensus *Consensus) startViewChange(viewID uint64) { consensus.consensusTimeout[timeoutBootstrap].Stop() consensus.current.SetMode(ViewChanging) consensus.SetViewChangingID(viewID) - consensus.LeaderPubKey = consensus.GetNextLeaderKey(viewID) + consensus.LeaderPubKey = consensus.getNextLeaderKey(viewID) duration := consensus.current.GetViewChangeDuraion() consensus.getLogger().Warn(). diff --git a/node/node_newblock.go b/node/node_newblock.go index 26227d7fc..c9de062ea 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -57,7 +57,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsC if len(commitSigs) > bls.BLSSignatureSizeInBytes { newCommitSigsChan <- commitSigs } - case <-time.After(5 * time.Second): + case <-time.After(4 * time.Second): sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) if err != nil { @@ -65,7 +65,6 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsC } else { newCommitSigsChan <- sigs } - } }() node.Consensus.StartFinalityCount() From 0819464f2bb07b6f256a6184929d2187b127837d Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Mon, 26 Oct 2020 18:06:33 -0700 Subject: [PATCH 03/26] fix build --- consensus/quorum/quorum.go | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index 115791605..f41d47ec1 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -242,23 +242,6 @@ func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.P return found, &s.publicKeys[idx] } -// NthNextHmy return the Nth next pubkey of Harmony nodes, next can be negative number -func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.PublicKeyWrapper, next int) (bool, *bls.PublicKeyWrapper) { - found := false - - idx := s.IndexOf(pubKey.Bytes) - if idx != -1 { - found = true - } - numNodes := instance.NumHarmonyOperatedNodesPerShard() - // sanity check to avoid out of bound access - if numNodes <= 0 || numNodes > len(s.publicKeys) { - numNodes = len(s.publicKeys) - } - idx = (idx + next) % numNodes - return found, &s.publicKeys[idx] -} - func (s *cIdentities) Participants() multibls.PublicKeys { return s.publicKeys } From 8906679ad70db5dc33974d6a0653fea44e890773 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Tue, 27 Oct 2020 17:40:14 -0700 Subject: [PATCH 04/26] stabalize consensus process with pipelining --- consensus/consensus_service.go | 2 +- consensus/consensus_v2.go | 21 ++++++++++++++++----- consensus/quorum/one-node-staked-vote.go | 3 ++- consensus/validator.go | 17 +++++++++-------- internal/utils/singleton.go | 2 +- node/node_cross_shard.go | 1 + 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index 768fd99e8..e9968bc74 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -311,7 +311,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { } } - consensus.BlockPeriod = 3 * time.Second + consensus.BlockPeriod = 4 * time.Second // Enable aggregate sig at epoch 1000 for mainnet, at epoch 53000 for testnet, and always for other nets. if (consensus.Blockchain.Config().ChainID == params.MainnetChainID && curEpoch.Cmp(big.NewInt(1000)) > 0) || diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index a3d9642ba..d53291cfd 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -7,6 +7,8 @@ import ( "sync/atomic" "time" + "github.com/harmony-one/harmony/internal/utils" + "github.com/rs/zerolog" msg_pb "github.com/harmony-one/harmony/api/proto/message" @@ -134,6 +136,7 @@ func (consensus *Consensus) finalCommit() { } consensus.commitBlock(block, FBFTMsg) + consensus.Blockchain.WriteCommitSig(block.NumberU64(), commitSigAndBitmap) if consensus.blockNum-beforeCatchupNum != 1 { consensus.getLogger().Warn(). @@ -148,7 +151,7 @@ func (consensus *Consensus) finalCommit() { // have the full commit signatures for new block // For now, the leader don't need to send immediately as the committed sig will be // included in the next block and sent in next prepared message. - sendImmediately := false + sendImmediately := true if err := consensus.msgSender.SendWithRetry( block.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ @@ -189,12 +192,17 @@ func (consensus *Consensus) finalCommit() { // Sleep to wait for the full block time consensus.getLogger().Info().Msg("[finalCommit] Waiting for Block Time") <-time.After(time.Until(consensus.NextBlockDue)) - - // Send commit sig/bitmap to finish the new block proposal - consensus.CommitSigChannel <- commitSigAndBitmap - // Update time due for next block consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) + + // Send commit sig/bitmap to finish the new block proposal + go func() { + select { + case consensus.CommitSigChannel <- commitSigAndBitmap: + case <-time.After(6 * time.Second): + utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") + } + }() } // BlockCommitSigs returns the byte array of aggregated @@ -517,6 +525,9 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { } // Send signal to Node to propose the new block for consensus + consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] sending block proposal signal") + + // TODO: make sure preCommit happens before finalCommit consensus.ReadySignal <- struct{}{} return nil } diff --git a/consensus/quorum/one-node-staked-vote.go b/consensus/quorum/one-node-staked-vote.go index 7b79f4de8..dc8dd2e11 100644 --- a/consensus/quorum/one-node-staked-vote.go +++ b/consensus/quorum/one-node-staked-vote.go @@ -188,7 +188,8 @@ func (v *stakedVoteWeight) QuorumThreshold() numeric.Dec { // IsAllSigsCollected .. func (v *stakedVoteWeight) IsAllSigsCollected() bool { - return v.SignersCount(Commit) == v.ParticipantsCount() + utils.Logger().Info().Msgf("ALL SIGS %s", v.voteTally.Commit.tally) + return v.voteTally.Commit.tally.Equal(numeric.NewDec(1)) } func (v *stakedVoteWeight) SetVoters( diff --git a/consensus/validator.go b/consensus/validator.go index 55f6c1f1d..d9d48bba2 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -114,7 +114,7 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { Msg("Wrong BlockNum Received, ignoring!") return } - if recvMsg.BlockNum > consensus.blockNum { + if recvMsg.BlockNum > consensus.blockNum+1 { consensus.getLogger().Warn().Msgf("[OnPrepared] low consensus block number. Spin sync") consensus.spinUpStateSync() } @@ -156,12 +156,6 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - // tryCatchup is also run in onCommitted(), so need to lock with commitMutex. - if consensus.current.Mode() != Normal { - // don't sign the block that is not verified - consensus.getLogger().Info().Msg("[OnPrepared] Not in normal mode, Exiting!!") - return - } if consensus.BlockVerifier == nil { consensus.getLogger().Debug().Msg("[onPrepared] consensus received message before init. Ignoring") return @@ -217,6 +211,13 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { copy(consensus.blockHash[:], blockHash[:]) } + // tryCatchup is also run in onCommitted(), so need to lock with commitMutex. + if consensus.current.Mode() != Normal { + // don't sign the block that is not verified + consensus.getLogger().Info().Msg("[OnPrepared] Not in normal mode, Exiting!!") + return + } + consensus.sendCommitMessages(&blockObj) consensus.switchPhase("onPrepared", FBFTCommit) } @@ -241,7 +242,7 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Msg("Wrong BlockNum Received, ignoring!") return } - if recvMsg.BlockNum > consensus.blockNum { + if recvMsg.BlockNum > consensus.blockNum+1 { consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") consensus.spinUpStateSync() } diff --git a/internal/utils/singleton.go b/internal/utils/singleton.go index 41fe4ebfd..e7773ae2e 100644 --- a/internal/utils/singleton.go +++ b/internal/utils/singleton.go @@ -45,7 +45,7 @@ func SetLogContext(_port, _ip string) { // SetLogVerbosity specifies the verbosity of global logger func SetLogVerbosity(verbosity log.Lvl) { - logVerbosity = verbosity + logVerbosity = 4 if glogger != nil { glogger.Verbosity(logVerbosity) } diff --git a/node/node_cross_shard.go b/node/node_cross_shard.go index 8738ecbb1..3e7878a7c 100644 --- a/node/node_cross_shard.go +++ b/node/node_cross_shard.go @@ -20,6 +20,7 @@ func (node *Node) BroadcastCXReceipts(newBlock *types.Block) { //#### Read payload data from committed msg if len(commitSigAndBitmap) <= 96 { utils.Logger().Debug().Int("commitSigAndBitmapLen", len(commitSigAndBitmap)).Msg("[BroadcastCXReceipts] commitSigAndBitmap Not Enough Length") + return } commitSig := make([]byte, 96) commitBitmap := make([]byte, len(commitSigAndBitmap)-96) From 49d7985f1495c4b37b14799afd65203f51c6c0bf Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Fri, 30 Oct 2020 11:52:25 -0700 Subject: [PATCH 05/26] some more logic cleanup --- api/service/blockproposal/service.go | 7 ++-- consensus/consensus.go | 15 +++++++-- consensus/consensus_service.go | 6 ++-- consensus/consensus_test.go | 2 +- consensus/consensus_v2.go | 20 ++++++++++-- consensus/leader.go | 5 +-- consensus/validator.go | 49 ++++++++++++++++++++++++++-- consensus/view_change.go | 2 +- internal/utils/singleton.go | 2 +- node/node.go | 2 +- node/node_newblock.go | 12 +++++-- 11 files changed, 99 insertions(+), 23 deletions(-) diff --git a/api/service/blockproposal/service.go b/api/service/blockproposal/service.go index 3ec4b398e..cfb4efa04 100644 --- a/api/service/blockproposal/service.go +++ b/api/service/blockproposal/service.go @@ -3,6 +3,7 @@ package blockproposal import ( "github.com/ethereum/go-ethereum/rpc" msg_pb "github.com/harmony-one/harmony/api/proto/message" + "github.com/harmony-one/harmony/consensus" "github.com/harmony-one/harmony/internal/utils" ) @@ -10,14 +11,14 @@ import ( type Service struct { stopChan chan struct{} stoppedChan chan struct{} - readySignal chan struct{} + readySignal chan consensus.ProposalType commitSigsChan chan []byte messageChan chan *msg_pb.Message - waitForConsensusReady func(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) + waitForConsensusReady func(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) } // New returns a block proposal service. -func New(readySignal chan struct{}, commitSigsChan chan []byte, waitForConsensusReady func(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{})) *Service { +func New(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, waitForConsensusReady func(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{})) *Service { return &Service{readySignal: readySignal, commitSigsChan: commitSigsChan, waitForConsensusReady: waitForConsensusReady} } diff --git a/consensus/consensus.go b/consensus/consensus.go index b626bd324..21f544080 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -27,6 +27,15 @@ const ( var errLeaderPriKeyNotFound = errors.New("getting leader private key from consensus public keys failed") +// ProposalType is to indicate the type of signal for new block proposal +type ProposalType byte + +// Constant of the top level Message Type exchanged among nodes +const ( + SyncProposal ProposalType = iota + AsyncProposal +) + // BlockVerifierFunc is a function used to verify the block type BlockVerifierFunc func(*types.Block) error @@ -79,8 +88,8 @@ type Consensus struct { mutex sync.Mutex // ViewChange struct vc *viewChange - // Signal channel for starting a new consensus process - ReadySignal chan struct{} + // Signal channel for proposing a new block and start new consensus + ReadySignal chan ProposalType // Channel to send full commit signatures to finish new block proposal CommitSigChannel chan []byte // The post-consensus processing func passed from Node object @@ -208,7 +217,7 @@ func New( consensus.syncNotReadyChan = make(chan struct{}) consensus.SlashChan = make(chan slash.Record) consensus.commitFinishChan = make(chan uint64) - consensus.ReadySignal = make(chan struct{}) + consensus.ReadySignal = make(chan ProposalType) consensus.CommitSigChannel = make(chan []byte) // channel for receiving newly generated VDF consensus.RndChannel = make(chan [vdfAndSeedSize]byte) diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index e9968bc74..7b6fc7e01 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -451,7 +451,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { consensus.getLogger().Info(). Str("myKey", myPubKeys.SerializeToHexStr()). Msg("[UpdateConsensusInformation] I am the New Leader") - consensus.ReadySignal <- struct{}{} + consensus.ReadySignal <- SyncProposal }() } return Normal @@ -573,9 +573,9 @@ func (consensus *Consensus) selfCommit(payload []byte) error { continue } - if _, err := consensus.Decider.SubmitVote( + if _, err := consensus.Decider.AddNewVote( quorum.Commit, - []bls.SerializedPublicKey{key.Pub.Bytes}, + []*bls_cosi.PublicKeyWrapper{key.Pub}, key.Pri.SignHash(commitPayload), common.BytesToHash(consensus.blockHash[:]), block.NumberU64(), diff --git a/consensus/consensus_test.go b/consensus/consensus_test.go index 2cada40e4..43de0d5d8 100644 --- a/consensus/consensus_test.go +++ b/consensus/consensus_test.go @@ -72,7 +72,7 @@ func TestConsensusInitialization(t *testing.T) { assert.IsType(t, make(chan uint64), consensus.commitFinishChan) assert.NotNil(t, consensus.commitFinishChan) - assert.IsType(t, make(chan struct{}), consensus.ReadySignal) + assert.IsType(t, make(chan ProposalType), consensus.ReadySignal) assert.NotNil(t, consensus.ReadySignal) assert.IsType(t, make(chan [vdfAndSeedSize]byte), consensus.RndChannel) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index d53291cfd..f97cfb663 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -123,6 +123,9 @@ func (consensus *Consensus) finalCommit() { msgToSend, FBFTMsg := network.Bytes, network.FBFTMsg + consensus.getLogger().Warn(). + Str("bitmap", hex.EncodeToString(FBFTMsg.Payload[:])). + Msg("[finalCommit] BITMAP") commitSigAndBitmap := FBFTMsg.Payload // this may not needed consensus.FBFTLog.AddMessage(FBFTMsg) // find correct block content @@ -151,7 +154,11 @@ func (consensus *Consensus) finalCommit() { // have the full commit signatures for new block // For now, the leader don't need to send immediately as the committed sig will be // included in the next block and sent in next prepared message. - sendImmediately := true + + sendImmediately := false + if !consensus.IsLeader() { + sendImmediately = true + } if err := consensus.msgSender.SendWithRetry( block.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ @@ -246,7 +253,7 @@ func (consensus *Consensus) Start( <-startChannel toStart <- struct{}{} consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Send ReadySignal") - consensus.ReadySignal <- struct{}{} + consensus.ReadySignal <- SyncProposal }() } consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Consensus started") @@ -502,6 +509,9 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { network.Bytes, network.FBFTMsg consensus.FBFTLog.AddMessage(FBFTMsg) + consensus.getLogger().Warn(). + Str("bitmap", hex.EncodeToString(FBFTMsg.Payload[:])). + Msg("[finalCommit] BITMAP") blk.SetCurrentCommitSig(FBFTMsg.Payload) if err := consensus.OnConsensusDone(blk); err != nil { @@ -509,6 +519,8 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return err } + // If I am still the leader + //if consensus.IsLeader() { // if leader success finalize the block, send committed message to validators if err := consensus.msgSender.SendWithRetry( blk.NumberU64(), @@ -528,7 +540,9 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] sending block proposal signal") // TODO: make sure preCommit happens before finalCommit - consensus.ReadySignal <- struct{}{} + consensus.ReadySignal <- AsyncProposal + //} + consensus.getLogger().Warn().Msg("[preCommitAndPropose] FULLY FINISHED") return nil } diff --git a/consensus/leader.go b/consensus/leader.go index fa9da37c5..e5b614c8b 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -202,8 +202,8 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { func (consensus *Consensus) onCommit(msg *msg_pb.Message) { utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) - if consensus.GetCurBlockViewID() == 10 { - //return + if consensus.GetCurBlockViewID()%7 == 0 { + return } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { @@ -307,6 +307,7 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") go func() { + // TODO: make it a channel consensus.preCommitAndPropose(blockObj) }() diff --git a/consensus/validator.go b/consensus/validator.go index d9d48bba2..dd805a313 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -153,8 +153,16 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { if !consensus.onPreparedSanityChecks(&blockObj, recvMsg) { return } + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnPrepared] Received OnPrepared message11111111") consensus.mutex.Lock() defer consensus.mutex.Unlock() + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnPrepared] Received OnPrepared message222222") if consensus.BlockVerifier == nil { consensus.getLogger().Debug().Msg("[onPrepared] consensus received message before init. Ignoring") @@ -164,9 +172,21 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.getLogger().Error().Err(err).Msg("[OnPrepared] Block verification failed") return } + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnPrepared] Received OnPrepared message3333") consensus.FBFTLog.MarkBlockVerified(&blockObj) + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnPrepared] Received OnPrepared message44444") consensus.FBFTLog.AddBlock(&blockObj) + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnPrepared] Received OnPrepared message555555") // add block field blockPayload := make([]byte, len(recvMsg.Block)) copy(blockPayload[:], recvMsg.Block[:]) @@ -247,6 +267,10 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.spinUpStateSync() } + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message11111111") aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 0) if err != nil { consensus.getLogger().Error().Err(err).Msg("[OnCommitted] readSignatureBitmapPayload failed") @@ -257,6 +281,13 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { return } + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message222222") // Must have the corresponding block to verify committed message. blockObj := consensus.FBFTLog.GetBlockByHash(recvMsg.BlockHash) if blockObj == nil { @@ -267,6 +298,10 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Msg("[OnCommitted] Failed finding a matching block for committed message") return } + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message333333") commitPayload := signature.ConstructCommitPayload(consensus.Blockchain, blockObj.Epoch(), blockObj.Hash(), blockObj.NumberU64(), blockObj.Header().ViewID().Uint64()) if !aggSig.VerifyHash(mask.AggregatePublic, commitPayload) { @@ -276,11 +311,21 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { return } + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message444444") consensus.FBFTLog.AddMessage(recvMsg) - consensus.mutex.Lock() - defer consensus.mutex.Unlock() + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message555555") + consensus.getLogger().Info(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Uint64("MsgViewID", recvMsg.ViewID). + Msg("[OnCommitted] Received committed message666666") consensus.aggregatedCommitSig = aggSig consensus.commitBitmap = mask diff --git a/consensus/view_change.go b/consensus/view_change.go index 63c8abfde..13f21916d 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -399,7 +399,7 @@ func (consensus *Consensus) onViewChange(msg *msg_pb.Message) { } go func() { - consensus.ReadySignal <- struct{}{} + consensus.ReadySignal <- SyncProposal }() return } diff --git a/internal/utils/singleton.go b/internal/utils/singleton.go index e7773ae2e..ea5ba8f9b 100644 --- a/internal/utils/singleton.go +++ b/internal/utils/singleton.go @@ -49,7 +49,7 @@ func SetLogVerbosity(verbosity log.Lvl) { if glogger != nil { glogger.Verbosity(logVerbosity) } - updateZeroLogLevel(int(verbosity)) + updateZeroLogLevel(int(4)) } // AddLogFile creates a StreamHandler that outputs JSON logs diff --git a/node/node.go b/node/node.go index 2111fa0ee..2574e80d5 100644 --- a/node/node.go +++ b/node/node.go @@ -846,7 +846,7 @@ func (node *Node) Start() error { } for e := range errChan { - utils.SampledLogger().Info(). + utils.Logger().Info(). Interface("item", e.payload). Msgf("[p2p]: issue while handling incoming p2p message: %v", e.err) } diff --git a/node/node_newblock.go b/node/node_newblock.go index c9de062ea..7e61a207c 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -6,6 +6,8 @@ import ( "strings" "time" + "github.com/harmony-one/harmony/consensus" + "github.com/harmony-one/harmony/crypto/bls" staking "github.com/harmony-one/harmony/staking/types" @@ -25,7 +27,7 @@ const ( // WaitForConsensusReadyV2 listen for the readiness signal from consensus and generate new block for consensus. // only leader will receive the ready signal -func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) { +func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{}) { go func() { // Setup stoppedChan defer close(stoppedChan) @@ -41,7 +43,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsC utils.Logger().Debug(). Msg("Consensus new block proposal: STOPPED!") return - case <-readySignal: + case proposalType := <-readySignal: for node.Consensus != nil && node.Consensus.IsLeader() { time.Sleep(SleepPeriod) utils.Logger().Info(). @@ -52,12 +54,16 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan struct{}, commitSigsC newCommitSigsChan := make(chan []byte) go func() { + waitTime := 0 * time.Second + if proposalType == consensus.AsyncProposal { + waitTime = 4 * time.Second + } select { case commitSigs := <-commitSigsChan: if len(commitSigs) > bls.BLSSignatureSizeInBytes { newCommitSigsChan <- commitSigs } - case <-time.After(4 * time.Second): + case <-time.After(waitTime): sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) if err != nil { From 5e90f2fc98cfa8bdc468c8744280e15039653c9b Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Sun, 1 Nov 2020 09:43:37 -0800 Subject: [PATCH 06/26] fixes --- consensus/consensus_v2.go | 28 +++++++++++++----------- consensus/leader.go | 2 +- consensus/quorum/one-node-staked-vote.go | 4 ++-- consensus/validator.go | 8 ++----- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index b2120a0e8..8e45c8c4f 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -156,7 +156,7 @@ func (consensus *Consensus) finalCommit() { // included in the next block and sent in next prepared message. sendImmediately := false - if !consensus.IsLeader() { + if !consensus.IsLeader() || block.IsLastBlockInEpoch() { sendImmediately = true } if err := consensus.msgSender.SendWithRetry( @@ -522,18 +522,20 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { // If I am still the leader //if consensus.IsLeader() { // if leader success finalize the block, send committed message to validators - if err := consensus.msgSender.SendWithRetry( - blk.NumberU64(), - msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ - nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, - p2p.ConstructMessage(msgToSend), true); err != nil { - consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") - } else { - consensus.getLogger().Info(). - Str("blockHash", blk.Hash().Hex()). - Uint64("blockNum", consensus.blockNum). - Msg("[preCommitAndPropose] Sent Committed Message") + if !blk.IsLastBlockInEpoch() { + if err := consensus.msgSender.SendWithRetry( + blk.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend), true); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") + } else { + consensus.getLogger().Info(). + Str("blockHash", blk.Hash().Hex()). + Uint64("blockNum", consensus.blockNum). + Msg("[preCommitAndPropose] Sent Committed Message") + } } // Send signal to Node to propose the new block for consensus diff --git a/consensus/leader.go b/consensus/leader.go index e5b614c8b..a03dab0f7 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -202,7 +202,7 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { func (consensus *Consensus) onCommit(msg *msg_pb.Message) { utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) - if consensus.GetCurBlockViewID()%7 == 0 { + if consensus.GetCurBlockViewID()%8== 0 { return } recvMsg, err := consensus.ParseFBFTMessage(msg) diff --git a/consensus/quorum/one-node-staked-vote.go b/consensus/quorum/one-node-staked-vote.go index 0f65257c9..f0099f968 100644 --- a/consensus/quorum/one-node-staked-vote.go +++ b/consensus/quorum/one-node-staked-vote.go @@ -112,12 +112,12 @@ func (v *stakedVoteWeight) AddNewVote( t := v.QuorumThreshold() - msg := "Attempt to reach quorum" + msg := "[AddNewVote] New Vote Added!" if !tallyQuorum.quorumAchieved { tallyQuorum.quorumAchieved = tallyQuorum.tally.GT(t) if tallyQuorum.quorumAchieved { - msg = "Quorum Achieved!" + msg = "[AddNewVote] Quorum Achieved!" } } utils.Logger().Info(). diff --git a/consensus/validator.go b/consensus/validator.go index 275421ad4..57e73cf80 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -1,12 +1,11 @@ package consensus import ( - "bytes" "encoding/hex" "time" "github.com/harmony-one/harmony/crypto/bls" - nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/configs/node" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/rlp" @@ -224,10 +223,7 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.prepareBitmap = mask // Optimistically add blockhash field of prepare message - emptyHash := [32]byte{} - if bytes.Equal(consensus.blockHash[:], emptyHash[:]) { - copy(consensus.blockHash[:], blockHash[:]) - } + copy(consensus.blockHash[:], blockHash[:]) // tryCatchup is also run in onCommitted(), so need to lock with commitMutex. if consensus.current.Mode() != Normal { From ae8da18230ca06386f226e455bfdb1d21cbaef88 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Mon, 2 Nov 2020 15:52:51 -0800 Subject: [PATCH 07/26] Remove test code --- consensus/consensus_v2.go | 6 ------ consensus/leader.go | 8 +------- consensus/validator.go | 34 +--------------------------------- 3 files changed, 2 insertions(+), 46 deletions(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 8e45c8c4f..a3dd642ad 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -123,9 +123,6 @@ func (consensus *Consensus) finalCommit() { msgToSend, FBFTMsg := network.Bytes, network.FBFTMsg - consensus.getLogger().Warn(). - Str("bitmap", hex.EncodeToString(FBFTMsg.Payload[:])). - Msg("[finalCommit] BITMAP") commitSigAndBitmap := FBFTMsg.Payload // this may not needed consensus.FBFTLog.AddMessage(FBFTMsg) // find correct block content @@ -509,9 +506,6 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { network.Bytes, network.FBFTMsg consensus.FBFTLog.AddMessage(FBFTMsg) - consensus.getLogger().Warn(). - Str("bitmap", hex.EncodeToString(FBFTMsg.Payload[:])). - Msg("[finalCommit] BITMAP") blk.SetCurrentCommitSig(FBFTMsg.Payload) if err := consensus.OnConsensusDone(blk); err != nil { diff --git a/consensus/leader.go b/consensus/leader.go index a03dab0f7..fd5450ecc 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -3,10 +3,8 @@ package consensus import ( "time" - "github.com/harmony-one/harmony/internal/utils" - "github.com/harmony-one/harmony/crypto/bls" - nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/consensus/signature" @@ -201,10 +199,6 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { } func (consensus *Consensus) onCommit(msg *msg_pb.Message) { - utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) - if consensus.GetCurBlockViewID()%8== 0 { - return - } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { consensus.getLogger().Debug().Err(err).Msg("[OnCommit] Parse pbft message failed") diff --git a/consensus/validator.go b/consensus/validator.go index 57e73cf80..8a1a3c74f 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -5,7 +5,7 @@ import ( "time" "github.com/harmony-one/harmony/crypto/bls" - "github.com/harmony-one/harmony/internal/configs/node" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/rlp" @@ -156,10 +156,6 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnPrepared] Received OnPrepared message222222") if consensus.BlockVerifier == nil { consensus.getLogger().Debug().Msg("[onPrepared] consensus received message before init. Ignoring") @@ -169,21 +165,9 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.getLogger().Error().Err(err).Msg("[OnPrepared] Block verification failed") return } - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnPrepared] Received OnPrepared message3333") consensus.FBFTLog.MarkBlockVerified(&blockObj) - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnPrepared] Received OnPrepared message44444") consensus.FBFTLog.AddBlock(&blockObj) - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnPrepared] Received OnPrepared message555555") // add block field blockPayload := make([]byte, len(recvMsg.Block)) copy(blockPayload[:], recvMsg.Block[:]) @@ -260,10 +244,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.spinUpStateSync() } - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnCommitted] Received committed message11111111") aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 0) if err != nil { consensus.getLogger().Error().Err(err).Msg("[OnCommitted] readSignatureBitmapPayload failed") @@ -277,10 +257,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnCommitted] Received committed message222222") // Must have the corresponding block to verify committed message. blockObj := consensus.FBFTLog.GetBlockByHash(recvMsg.BlockHash) if blockObj == nil { @@ -300,10 +276,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { return } - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnCommitted] Received committed message444444") consensus.FBFTLog.AddMessage(recvMsg) if recvMsg.BlockNum > consensus.blockNum { @@ -311,10 +283,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.spinUpStateSync() } - consensus.getLogger().Info(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MsgViewID", recvMsg.ViewID). - Msg("[OnCommitted] Received committed message666666") consensus.aggregatedCommitSig = aggSig consensus.commitBitmap = mask From 3ba4620431ee37e72503924aa16d1630d9a37b95 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Mon, 2 Nov 2020 16:13:24 -0800 Subject: [PATCH 08/26] clean up code --- consensus/consensus.go | 2 +- consensus/consensus_msg_sender.go | 1 - consensus/consensus_service.go | 2 +- consensus/consensus_v2.go | 21 ++++++++------------- consensus/leader.go | 8 +++++++- consensus/validator.go | 4 ---- internal/utils/singleton.go | 4 ++-- node/node.go | 2 +- 8 files changed, 20 insertions(+), 24 deletions(-) diff --git a/consensus/consensus.go b/consensus/consensus.go index 21f544080..793cc0e4e 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -30,7 +30,7 @@ var errLeaderPriKeyNotFound = errors.New("getting leader private key from consen // ProposalType is to indicate the type of signal for new block proposal type ProposalType byte -// Constant of the top level Message Type exchanged among nodes +// Constant of the type of new block proposal const ( SyncProposal ProposalType = iota AsyncProposal diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index b43b69306..bbd63a746 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -115,7 +115,6 @@ func (sender *MessageSender) StopRetry(msgType msg_pb.MessageType) { if ok { msgRetry := data.(*MessageRetry) atomic.StoreUint32(&msgRetry.isActive, 0) - utils.Logger().Info().Str("type", msgType.String()).Uint32("isActive", msgRetry.isActive).Msg("STOPPING RETRY") } } diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index ede64dcae..2682b37cb 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -311,7 +311,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { } } - consensus.BlockPeriod = 4 * time.Second + consensus.BlockPeriod = 5 * time.Second // Enable aggregate sig at epoch 1000 for mainnet, at epoch 53000 for testnet, and always for other nets. if (consensus.Blockchain.Config().ChainID == params.MainnetChainID && curEpoch.Cmp(big.NewInt(1000)) > 0) || diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index a3dd642ad..6ba109221 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -123,7 +123,7 @@ func (consensus *Consensus) finalCommit() { msgToSend, FBFTMsg := network.Bytes, network.FBFTMsg - commitSigAndBitmap := FBFTMsg.Payload // this may not needed + commitSigAndBitmap := FBFTMsg.Payload consensus.FBFTLog.AddMessage(FBFTMsg) // find correct block content curBlockHash := consensus.blockHash @@ -135,8 +135,8 @@ func (consensus *Consensus) finalCommit() { return } + block.SetCurrentCommitSig(commitSigAndBitmap) consensus.commitBlock(block, FBFTMsg) - consensus.Blockchain.WriteCommitSig(block.NumberU64(), commitSigAndBitmap) if consensus.blockNum-beforeCatchupNum != 1 { consensus.getLogger().Warn(). @@ -145,13 +145,13 @@ func (consensus *Consensus) finalCommit() { return } - // if leader success finalize the block, send committed message to validators + // if leader successfully finalizes the block, send committed message to validators // TODO: once leader rotation is implemented, leader who is about to be switched out // needs to send the committed message immediately so the next leader can // have the full commit signatures for new block // For now, the leader don't need to send immediately as the committed sig will be - // included in the next block and sent in next prepared message. - + // included in the next block and sent in next prepared message. Unless the node + // won't be the leader anymore or it's the last block of the epoch (no pipelining). sendImmediately := false if !consensus.IsLeader() || block.IsLastBlockInEpoch() { sendImmediately = true @@ -495,9 +495,9 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { } consensus.mutex.Lock() - defer consensus.mutex.Unlock() - network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) + consensus.mutex.Unlock() + if err != nil { return errors.Wrap(err, "[preCommitAndPropose] Unable to construct Committed message") } @@ -513,9 +513,7 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return err } - // If I am still the leader - //if consensus.IsLeader() { - // if leader success finalize the block, send committed message to validators + // If it's not the epoch block, do pipelining and send committed message to validators now at 67% committed. if !blk.IsLastBlockInEpoch() { if err := consensus.msgSender.SendWithRetry( blk.NumberU64(), @@ -535,10 +533,7 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { // Send signal to Node to propose the new block for consensus consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] sending block proposal signal") - // TODO: make sure preCommit happens before finalCommit consensus.ReadySignal <- AsyncProposal - //} - consensus.getLogger().Warn().Msg("[preCommitAndPropose] FULLY FINISHED") return nil } diff --git a/consensus/leader.go b/consensus/leader.go index fd5450ecc..e5b614c8b 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -3,8 +3,10 @@ package consensus import ( "time" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/crypto/bls" - "github.com/harmony-one/harmony/internal/configs/node" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/consensus/signature" @@ -199,6 +201,10 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { } func (consensus *Consensus) onCommit(msg *msg_pb.Message) { + utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) + if consensus.GetCurBlockViewID()%7 == 0 { + return + } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { consensus.getLogger().Debug().Err(err).Msg("[OnCommit] Parse pbft message failed") diff --git a/consensus/validator.go b/consensus/validator.go index 8a1a3c74f..31ad38dd9 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -239,10 +239,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Msg("Wrong BlockNum Received, ignoring!") return } - if recvMsg.BlockNum > consensus.blockNum+1 { - consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") - consensus.spinUpStateSync() - } aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 0) if err != nil { diff --git a/internal/utils/singleton.go b/internal/utils/singleton.go index ea5ba8f9b..41fe4ebfd 100644 --- a/internal/utils/singleton.go +++ b/internal/utils/singleton.go @@ -45,11 +45,11 @@ func SetLogContext(_port, _ip string) { // SetLogVerbosity specifies the verbosity of global logger func SetLogVerbosity(verbosity log.Lvl) { - logVerbosity = 4 + logVerbosity = verbosity if glogger != nil { glogger.Verbosity(logVerbosity) } - updateZeroLogLevel(int(4)) + updateZeroLogLevel(int(verbosity)) } // AddLogFile creates a StreamHandler that outputs JSON logs diff --git a/node/node.go b/node/node.go index 2ac7fa1d4..59b82aa1a 100644 --- a/node/node.go +++ b/node/node.go @@ -864,7 +864,7 @@ func (node *Node) Start() error { } for e := range errChan { - utils.Logger().Info(). + utils.SampledLogger().Info(). Interface("item", e.payload). Msgf("[p2p]: issue while handling incoming p2p message: %v", e.err) } From a34cd1ac4776663ca426ddfbee39130e2caaffaf Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Mon, 2 Nov 2020 16:43:18 -0800 Subject: [PATCH 09/26] code cleanup --- consensus/leader.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/consensus/leader.go b/consensus/leader.go index e5b614c8b..900051c41 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -3,10 +3,8 @@ package consensus import ( "time" - "github.com/harmony-one/harmony/internal/utils" - "github.com/harmony-one/harmony/crypto/bls" - nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/consensus/signature" @@ -201,10 +199,6 @@ func (consensus *Consensus) onPrepare(msg *msg_pb.Message) { } func (consensus *Consensus) onCommit(msg *msg_pb.Message) { - utils.Logger().Info().Msgf("ViewChanging %d %d", consensus.GetCurBlockViewID(), consensus.GetViewChangingID()) - if consensus.GetCurBlockViewID()%7 == 0 { - return - } recvMsg, err := consensus.ParseFBFTMessage(msg) if err != nil { consensus.getLogger().Debug().Err(err).Msg("[OnCommit] Parse pbft message failed") @@ -307,7 +301,7 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") go func() { - // TODO: make it a channel + // TODO: make it synchronized with commitFinishChan consensus.preCommitAndPropose(blockObj) }() From 9e8ee2664532ae9f2ec1f4d349ed3e8a1e582e78 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Tue, 3 Nov 2020 17:06:49 -0800 Subject: [PATCH 10/26] fix import --- consensus/leader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/leader.go b/consensus/leader.go index 900051c41..5e3480fd5 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -4,7 +4,7 @@ import ( "time" "github.com/harmony-one/harmony/crypto/bls" - "github.com/harmony-one/harmony/internal/configs/node" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/consensus/signature" From 9cc1d5c14ef705c8474b8ad2a1c3b187c26890d6 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Tue, 3 Nov 2020 17:32:53 -0800 Subject: [PATCH 11/26] Disable pipelining during epoch change --- consensus/consensus_v2.go | 45 +++++++++++++++++---------------------- consensus/leader.go | 12 +++++++---- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 26a8d9d30..6f16b9cdf 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -136,12 +136,12 @@ func (consensus *Consensus) finalCommit() { } block.SetCurrentCommitSig(commitSigAndBitmap) - consensus.commitBlock(block, FBFTMsg) + err = consensus.commitBlock(block, FBFTMsg) - if consensus.blockNum-beforeCatchupNum != 1 { - consensus.getLogger().Warn(). + if err != nil || consensus.blockNum-beforeCatchupNum != 1 { + consensus.getLogger().Err(err). Uint64("beforeCatchupBlockNum", beforeCatchupNum). - Msg("[finalCommit] Leader cannot provide the correct block for committed message") + Msg("[finalCommit] Leader failed to commit the confirmed block") return } @@ -170,6 +170,10 @@ func (consensus *Consensus) finalCommit() { Msg("[finalCommit] Sent Committed Message") } + if consensus.IsLeader() && block.IsLastBlockInEpoch() { + consensus.ReadySignal <- AsyncProposal + } + // Dump new block into level db // In current code, we add signatures in block in tryCatchup, the block dump to explorer does not contains signatures // but since explorer doesn't need signatures, it should be fine @@ -507,27 +511,18 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { network.FBFTMsg consensus.FBFTLog.AddMessage(FBFTMsg) - blk.SetCurrentCommitSig(FBFTMsg.Payload) - if err := consensus.OnConsensusDone(blk); err != nil { - consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] Failed to add block to chain") - return err - } - - // If it's not the epoch block, do pipelining and send committed message to validators now at 67% committed. - if !blk.IsLastBlockInEpoch() { - if err := consensus.msgSender.SendWithRetry( - blk.NumberU64(), - msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ - nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, - p2p.ConstructMessage(msgToSend), true); err != nil { - consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") - } else { - consensus.getLogger().Info(). - Str("blockHash", blk.Hash().Hex()). - Uint64("blockNum", consensus.blockNum). - Msg("[preCommitAndPropose] Sent Committed Message") - } + if err := consensus.msgSender.SendWithRetry( + blk.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend), true); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") + } else { + consensus.getLogger().Info(). + Str("blockHash", blk.Hash().Hex()). + Uint64("blockNum", consensus.blockNum). + Msg("[preCommitAndPropose] Sent Committed Message") } // Send signal to Node to propose the new block for consensus diff --git a/consensus/leader.go b/consensus/leader.go index 5e3480fd5..b827aed6d 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -300,10 +300,14 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { if !quorumWasMet && quorumIsMet { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") - go func() { - // TODO: make it synchronized with commitFinishChan - consensus.preCommitAndPropose(blockObj) - }() + + // If it's not the epoch block, do pipelining and send committed message to validators now at 67% committed. + if !blockObj.IsLastBlockInEpoch() { + go func() { + // TODO: make it synchronized with commitFinishChan + consensus.preCommitAndPropose(blockObj) + }() + } consensus.getLogger().Info().Msg("[OnCommit] Starting Grace Period") go func(viewID uint64) { From 0af08d2c1236db3ba0e16e5cd44a18eef5401dfc Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Tue, 3 Nov 2020 17:40:34 -0800 Subject: [PATCH 12/26] add missing commit --- consensus/consensus_v2.go | 6 ++++++ consensus/leader.go | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 6f16b9cdf..72cc69bbf 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -511,6 +511,12 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { network.FBFTMsg consensus.FBFTLog.AddMessage(FBFTMsg) + blk.SetCurrentCommitSig(FBFTMsg.Payload) + if err := consensus.OnConsensusDone(blk); err != nil { + consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] Failed to add block to chain") + return err + } + if err := consensus.msgSender.SendWithRetry( blk.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ diff --git a/consensus/leader.go b/consensus/leader.go index b827aed6d..9f88b67e5 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -300,7 +300,6 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { if !quorumWasMet && quorumIsMet { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") - // If it's not the epoch block, do pipelining and send committed message to validators now at 67% committed. if !blockObj.IsLastBlockInEpoch() { go func() { From 4aef0f4132d5f3b401bc48754619bbfb03051476 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Tue, 3 Nov 2020 22:47:17 -0800 Subject: [PATCH 13/26] do not sent committed at 67% --- cmd/harmony/main.go | 2 +- consensus/consensus.go | 4 +- consensus/consensus_msg_sender.go | 7 +-- consensus/consensus_v2.go | 82 +++++++++------------------- consensus/engine/consensus_engine.go | 2 +- consensus/leader.go | 13 ++--- consensus/threshold.go | 1 - consensus/validator.go | 14 ----- consensus/view_change.go | 2 - node/node_handler.go | 10 ---- node/node_newblock.go | 3 +- 11 files changed, 39 insertions(+), 101 deletions(-) diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index 07971a601..0c3e69c90 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -653,7 +653,7 @@ func setupConsensusAndNode(hc harmonyConfig, nodeConfig *nodeconfig.ConfigType) // Assign closure functions to the consensus object currentConsensus.SetBlockVerifier(currentNode.VerifyNewBlock) - currentConsensus.OnConsensusDone = currentNode.PostConsensusProcessing + currentConsensus.PostConsensusJob = currentNode.PostConsensusProcessing // update consensus information based on the blockchain currentConsensus.SetMode(currentConsensus.UpdateConsensusInformation()) currentConsensus.NextBlockDue = time.Now() diff --git a/consensus/consensus.go b/consensus/consensus.go index 793cc0e4e..dc8e60d60 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -92,9 +92,9 @@ type Consensus struct { ReadySignal chan ProposalType // Channel to send full commit signatures to finish new block proposal CommitSigChannel chan []byte - // The post-consensus processing func passed from Node object + // The post-consensus job func passed from Node object // Called when consensus on a new block is done - OnConsensusDone func(*types.Block) error + PostConsensusJob func(*types.Block) error // The verifier func passed from Node object BlockVerifier BlockVerifierFunc // verified block to state sync broadcast diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index bbd63a746..9d1da5149 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -56,7 +56,7 @@ func (sender *MessageSender) Reset(blockNum uint64) { } // SendWithRetry sends message with retry logic. -func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte, immediate bool) error { +func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte) error { if sender.retryTimes != 0 { msgRetry := MessageRetry{blockNum: blockNum, groups: groups, p2pMsg: p2pMsg, msgType: msgType, retryCount: 0} atomic.StoreUint32(&msgRetry.isActive, 1) @@ -65,10 +65,7 @@ func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.Messa sender.Retry(&msgRetry) }() } - if immediate { - return sender.host.SendMessageToGroups(groups, p2pMsg) - } - return nil + return sender.host.SendMessageToGroups(groups, p2pMsg) } // SendWithoutRetry sends message without retry logic. diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 72cc69bbf..efa84e8ef 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -135,6 +135,9 @@ func (consensus *Consensus) finalCommit() { return } + consensus.getLogger().Info().Hex("new", commitSigAndBitmap).Msg("[finalCommit] Overriding commit signatures!!") + consensus.Blockchain.WriteCommitSig(block.NumberU64(), commitSigAndBitmap) + block.SetCurrentCommitSig(commitSigAndBitmap) err = consensus.commitBlock(block, FBFTMsg) @@ -146,22 +149,12 @@ func (consensus *Consensus) finalCommit() { } // if leader successfully finalizes the block, send committed message to validators - // TODO: once leader rotation is implemented, leader who is about to be switched out - // needs to send the committed message immediately so the next leader can - // have the full commit signatures for new block - // For now, the leader don't need to send immediately as the committed sig will be - // included in the next block and sent in next prepared message. Unless the node - // won't be the leader anymore or it's the last block of the epoch (no pipelining). - sendImmediately := false - if !consensus.IsLeader() || block.IsLastBlockInEpoch() { - sendImmediately = true - } if err := consensus.msgSender.SendWithRetry( block.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, - p2p.ConstructMessage(msgToSend), sendImmediately); err != nil { + p2p.ConstructMessage(msgToSend)); err != nil { consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message") } else { consensus.getLogger().Info(). @@ -170,10 +163,6 @@ func (consensus *Consensus) finalCommit() { Msg("[finalCommit] Sent Committed Message") } - if consensus.IsLeader() && block.IsLastBlockInEpoch() { - consensus.ReadySignal <- AsyncProposal - } - // Dump new block into level db // In current code, we add signatures in block in tryCatchup, the block dump to explorer does not contains signatures // but since explorer doesn't need signatures, it should be fine @@ -203,14 +192,17 @@ func (consensus *Consensus) finalCommit() { // Update time due for next block consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) - // Send commit sig/bitmap to finish the new block proposal - go func() { - select { - case consensus.CommitSigChannel <- commitSigAndBitmap: - case <-time.After(6 * time.Second): - utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") - } - }() + // If still the leader, send commit sig/bitmap to finish the new block proposal, + // else, the block proposal will timeout by itself. + if consensus.IsLeader() { + go func() { + select { + case consensus.CommitSigChannel <- commitSigAndBitmap: + case <-time.After(6 * time.Second): + utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") + } + }() + } } // BlockCommitSigs returns the byte array of aggregated @@ -493,46 +485,19 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return errors.New("block to pre-commit is nil") } - leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() - if err != nil { - return err - } - consensus.mutex.Lock() - network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) + bareMinimumCommit := consensus.constructQuorumSigAndBitmap(quorum.Commit) consensus.mutex.Unlock() - if err != nil { - return errors.Wrap(err, "[preCommitAndPropose] Unable to construct Committed message") - } - - msgToSend, FBFTMsg := - network.Bytes, - network.FBFTMsg - consensus.FBFTLog.AddMessage(FBFTMsg) + blk.SetCurrentCommitSig(bareMinimumCommit) - blk.SetCurrentCommitSig(FBFTMsg.Payload) - if err := consensus.OnConsensusDone(blk); err != nil { + if _, err := consensus.Blockchain.InsertChain([]*types.Block{blk}, true); err != nil { consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] Failed to add block to chain") return err } - if err := consensus.msgSender.SendWithRetry( - blk.NumberU64(), - msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ - nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, - p2p.ConstructMessage(msgToSend), true); err != nil { - consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") - } else { - consensus.getLogger().Info(). - Str("blockHash", blk.Hash().Hex()). - Uint64("blockNum", consensus.blockNum). - Msg("[preCommitAndPropose] Sent Committed Message") - } - // Send signal to Node to propose the new block for consensus - consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] sending block proposal signal") + consensus.getLogger().Info().Msg("[preCommitAndPropose] sending block proposal signal") consensus.ReadySignal <- AsyncProposal return nil @@ -584,7 +549,8 @@ func (consensus *Consensus) tryCatchup() error { func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMessage) error { if consensus.Blockchain.CurrentBlock().NumberU64() < blk.NumberU64() { - if err := consensus.OnConsensusDone(blk); err != nil { + if _, err := consensus.Blockchain.InsertChain([]*types.Block{blk}, true); err != nil { + consensus.getLogger().Error().Err(err).Msg("[commitBlock] Failed to add block to chain") return err } } @@ -594,10 +560,16 @@ func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMess return errIncorrectSender } + consensus.PostConsensusJob(blk) consensus.SetupForNewConsensus(blk, committedMsg) + consensus.FinishFinalityCount() + utils.Logger().Info().Uint64("blockNum", blk.NumberU64()). + Str("hash", blk.Header().Hash().Hex()). + Msg("Added New Block to Blockchain!!!") return nil } +// SetupForNewConsensus sets the state for new consensus func (consensus *Consensus) SetupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) { atomic.AddUint64(&consensus.blockNum, 1) consensus.SetCurBlockViewID(committedMsg.ViewID + 1) diff --git a/consensus/engine/consensus_engine.go b/consensus/engine/consensus_engine.go index af6011506..f1f6bb46d 100644 --- a/consensus/engine/consensus_engine.go +++ b/consensus/engine/consensus_engine.go @@ -15,7 +15,7 @@ import ( staking "github.com/harmony-one/harmony/staking/types" ) -// Blockchain defines a collection of methods needed to access the local +// ChainReader defines a collection of methods needed to access the local // blockchain during header and/or uncle verification. // Note this reader interface is still in process of being integrated with the BFT consensus. type ChainReader interface { diff --git a/consensus/leader.go b/consensus/leader.go index 9f88b67e5..7ae69bb59 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -80,7 +80,7 @@ func (consensus *Consensus) announce(block *types.Block) { if err := consensus.msgSender.SendWithRetry( consensus.blockNum, msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, p2p.ConstructMessage(msgToSend), true); err != nil { + }, p2p.ConstructMessage(msgToSend)); err != nil { consensus.getLogger().Warn(). Str("groupID", string(nodeconfig.NewGroupIDByShardID( nodeconfig.ShardID(consensus.ShardID), @@ -300,13 +300,10 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { if !quorumWasMet && quorumIsMet { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") - // If it's not the epoch block, do pipelining and send committed message to validators now at 67% committed. - if !blockObj.IsLastBlockInEpoch() { - go func() { - // TODO: make it synchronized with commitFinishChan - consensus.preCommitAndPropose(blockObj) - }() - } + go func() { + // TODO: make it synchronized with commitFinishChan + consensus.preCommitAndPropose(blockObj) + }() consensus.getLogger().Info().Msg("[OnCommit] Starting Grace Period") go func(viewID uint64) { diff --git a/consensus/threshold.go b/consensus/threshold.go index f9ced1139..503185f3b 100644 --- a/consensus/threshold.go +++ b/consensus/threshold.go @@ -74,7 +74,6 @@ func (consensus *Consensus) didReachPrepareQuorum() error { nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, p2p.ConstructMessage(msgToSend), - true, ); err != nil { consensus.getLogger().Warn().Msg("[OnPrepare] Cannot send prepared message") } else { diff --git a/consensus/validator.go b/consensus/validator.go index 814af51ff..05d21065a 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -284,20 +284,6 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.aggregatedCommitSig = aggSig consensus.commitBitmap = mask - // If we already have a committed signature received before, check whether the new one - // has more signatures and if yes, override the old data. - // Otherwise, simply write the commit signature in db. - commitSigBitmap, err := consensus.Blockchain.ReadCommitSig(blockObj.NumberU64()) - if err == nil && len(commitSigBitmap) == len(recvMsg.Payload) { - new := mask.CountEnabled() - mask.SetMask(commitSigBitmap[bls.BLSSignatureSizeInBytes:]) - cur := mask.CountEnabled() - if new > cur { - consensus.getLogger().Info().Hex("old", commitSigBitmap).Hex("new", recvMsg.Payload).Msg("[OnCommitted] Overriding commit signatures!!") - consensus.Blockchain.WriteCommitSig(blockObj.NumberU64(), recvMsg.Payload) - } - } - consensus.tryCatchup() if recvMsg.BlockNum > consensus.blockNum { consensus.getLogger().Info().Uint64("MsgBlockNum", recvMsg.BlockNum).Msg("[OnCommitted] OUT OF SYNC") diff --git a/consensus/view_change.go b/consensus/view_change.go index 1a94f2892..7b8cb42a4 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -265,7 +265,6 @@ func (consensus *Consensus) startViewChange() { []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), - true, ); err != nil { consensus.getLogger().Err(err). Msg("[startViewChange] could not send out the ViewChange message") @@ -295,7 +294,6 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), - true, ); err != nil { return errors.New("failed to send out the NewView message") } diff --git a/node/node_handler.go b/node/node_handler.go index ee10a9de3..c33dfe183 100644 --- a/node/node_handler.go +++ b/node/node_handler.go @@ -353,16 +353,6 @@ func (node *Node) numSignaturesIncludedInBlock(block *types.Block) uint32 { // 2. [leader] send new block to the client // 3. [leader] send cross shard tx receipts to destination shard func (node *Node) PostConsensusProcessing(newBlock *types.Block) error { - if _, err := node.Blockchain().InsertChain([]*types.Block{newBlock}, true); err != nil { - return err - } - utils.Logger().Info(). - Uint64("blockNum", newBlock.NumberU64()). - Str("hash", newBlock.Header().Hash().Hex()). - Msg("Added New Block to Blockchain!!!") - - node.Consensus.FinishFinalityCount() - if node.Consensus.IsLeader() { if node.NodeConfig.ShardID == shard.BeaconChainShardID { node.BroadcastNewBlock(newBlock) diff --git a/node/node_newblock.go b/node/node_newblock.go index 7e61a207c..886fda481 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -44,7 +44,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp Msg("Consensus new block proposal: STOPPED!") return case proposalType := <-readySignal: - for node.Consensus != nil && node.Consensus.IsLeader() { + if node.Consensus != nil && node.Consensus.IsLeader() { time.Sleep(SleepPeriod) utils.Logger().Info(). Uint64("blockNum", node.Blockchain().CurrentBlock().NumberU64()+1). @@ -88,7 +88,6 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp // Send the new block to Consensus so it can be confirmed. node.BlockChannel <- newBlock - break } else { utils.Logger().Err(err).Msg("!!!!!!!!!Failed Proposing New Block!!!!!!!!!") } From 0e95cb634bad2b599235975484e53fc48b2bf0bc Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Wed, 4 Nov 2020 13:30:44 -0800 Subject: [PATCH 14/26] fix sync race condition --- api/service/syncing/syncing.go | 6 ++++++ consensus/consensus_v2.go | 2 ++ consensus/validator.go | 8 ++------ node/node_newblock.go | 2 ++ 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/api/service/syncing/syncing.go b/api/service/syncing/syncing.go index 873da9e90..cf37f1de7 100644 --- a/api/service/syncing/syncing.go +++ b/api/service/syncing/syncing.go @@ -33,6 +33,7 @@ const ( RegistrationNumber = 3 SyncingPortDifference = 3000 inSyncThreshold = 0 // when peerBlockHeight - myBlockHeight <= inSyncThreshold, it's ready to join consensus + syncStatusCheckCount = 3 // check this many times before confirming it's out of sync SyncLoopBatchSize uint32 = 1000 // maximum size for one query of block hashes verifyHeaderBatchSize uint64 = 100 // block chain header verification batch size SyncLoopFrequency = 1 // unit in second @@ -920,6 +921,7 @@ func (ss *StateSync) SyncLoop(bc *core.BlockChain, worker *worker.Worker, isBeac // remove SyncLoopFrequency ticker := time.NewTicker(SyncLoopFrequency * time.Second) defer ticker.Stop() + outOfSyncCount := 1 for range ticker.C { otherHeight := ss.getMaxPeerHeight(isBeacon) currentHeight := bc.CurrentBlock().NumberU64() @@ -929,6 +931,10 @@ func (ss *StateSync) SyncLoop(bc *core.BlockChain, worker *worker.Worker, isBeac isBeacon, bc.ShardID(), otherHeight, currentHeight) break } + if outOfSyncCount < syncStatusCheckCount { + outOfSyncCount++ + continue + } utils.Logger().Info(). Msgf("[SYNC] Node is OUT OF SYNC (isBeacon: %t, ShardID: %d, otherHeight: %d, currentHeight: %d)", isBeacon, bc.ShardID(), otherHeight, currentHeight) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index efa84e8ef..cb50c6258 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -289,10 +289,12 @@ func (consensus *Consensus) Start( } case <-consensus.syncReadyChan: consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan") + consensus.mutex.Lock() consensus.SetBlockNum(consensus.Blockchain.CurrentHeader().Number().Uint64() + 1) consensus.SetViewIDs(consensus.Blockchain.CurrentHeader().ViewID().Uint64() + 1) mode := consensus.UpdateConsensusInformation() consensus.current.SetMode(mode) + consensus.mutex.Unlock() consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") case <-consensus.syncNotReadyChan: diff --git a/consensus/validator.go b/consensus/validator.go index 05d21065a..7655da547 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -229,14 +229,10 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Uint64("MsgViewID", recvMsg.ViewID). Msg("[OnCommitted] Received committed message") - // It's ok to receive committed message for last block due to pipelining. - // The committed message for last block could include more signatures now. - if recvMsg.BlockNum < consensus.blockNum-1 { - consensus.getLogger().Debug(). - Uint64("MsgBlockNum", recvMsg.BlockNum). - Msg("Wrong BlockNum Received, ignoring!") + if !consensus.isRightBlockNumCheck(recvMsg) { return } + if recvMsg.BlockNum > consensus.blockNum { consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") consensus.spinUpStateSync() diff --git a/node/node_newblock.go b/node/node_newblock.go index 886fda481..0117d77e2 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -60,10 +60,12 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp } select { case commitSigs := <-commitSigsChan: + utils.Logger().Info().Msg("[ProposeNewBlock] received commit sigs asynchronously") if len(commitSigs) > bls.BLSSignatureSizeInBytes { newCommitSigsChan <- commitSigs } case <-time.After(waitTime): + utils.Logger().Info().Msg("[ProposeNewBlock] timeout waiting for commit sigs, reading directly from DB") sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) if err != nil { From 8db8802eed87dead6ee7d5d09907baa40a7b4a3d Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Wed, 4 Nov 2020 19:25:08 -0800 Subject: [PATCH 15/26] send committed at 67% --- api/service/explorer/service.go | 2 +- api/service/syncing/syncing.go | 34 ++++++++++--- consensus/consensus_msg_sender.go | 7 ++- consensus/consensus_v2.go | 81 +++++++++++++++++++++++++------ consensus/leader.go | 13 +++-- consensus/threshold.go | 1 + consensus/validator.go | 27 ++++++++--- consensus/view_change.go | 2 + node/node_syncing.go | 4 +- 9 files changed, 130 insertions(+), 41 deletions(-) diff --git a/api/service/explorer/service.go b/api/service/explorer/service.go index bed0e2272..112d4dda7 100644 --- a/api/service/explorer/service.go +++ b/api/service/explorer/service.go @@ -176,7 +176,7 @@ func (s *Service) GetTotalSupply(w http.ResponseWriter, r *http.Request) { // GetNodeSync returns status code 500 if node is not in sync func (s *Service) GetNodeSync(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") - sync := !s.stateSync.IsOutOfSync(s.blockchain) + sync := !s.stateSync.IsOutOfSync(s.blockchain, false) if !sync { w.WriteHeader(http.StatusTeapot) } diff --git a/api/service/syncing/syncing.go b/api/service/syncing/syncing.go index cf37f1de7..124e45d92 100644 --- a/api/service/syncing/syncing.go +++ b/api/service/syncing/syncing.go @@ -899,18 +899,36 @@ func (ss *StateSync) GetMaxPeerHeight() uint64 { } // IsOutOfSync checks whether the node is out of sync from other peers -func (ss *StateSync) IsOutOfSync(bc *core.BlockChain) bool { +func (ss *StateSync) IsOutOfSync(bc *core.BlockChain, doubleCheck bool) bool { if ss.syncConfig == nil { return true // If syncConfig is not instantiated, return not in sync } - otherHeight := ss.getMaxPeerHeight(false) + otherHeight1 := ss.getMaxPeerHeight(false) + lastHeight := bc.CurrentBlock().NumberU64() + wasOutOfSync := lastHeight+inSyncThreshold < otherHeight1 + + if !doubleCheck { + utils.Logger().Info(). + Uint64("OtherHeight", otherHeight1). + Uint64("lastHeight", lastHeight). + Msg("[SYNC] Checking sync status") + return wasOutOfSync + } + time.Sleep(3 * time.Second) + // double check the sync status after 3 second to confirm (avoid false alarm) + + otherHeight2 := ss.getMaxPeerHeight(false) currentHeight := bc.CurrentBlock().NumberU64() - utils.Logger().Debug(). - Uint64("OtherHeight", otherHeight). - Uint64("MyHeight", currentHeight). - Bool("IsOutOfSync", currentHeight+inSyncThreshold < otherHeight). + + isOutOfSync := currentHeight+inSyncThreshold < otherHeight2 + utils.Logger().Info(). + Uint64("OtherHeight1", otherHeight1). + Uint64("OtherHeight2", otherHeight2). + Uint64("lastHeight", lastHeight). + Uint64("currentHeight", currentHeight). Msg("[SYNC] Checking sync status") - return currentHeight+inSyncThreshold < otherHeight + // Only confirm out of sync when the node has lower height and didn't move in heights for 2 consecutive checks + return wasOutOfSync && isOutOfSync && lastHeight == currentHeight } // SyncLoop will keep syncing with peers until catches up @@ -978,7 +996,7 @@ func (ss *StateSync) addConsensusLastMile(bc *core.BlockChain, consensus *consen return errors.Wrap(err, "failed to InsertChain") } } - consensus.FBFTLog.PruneCacheBeforeBlock(bc.CurrentBlock().NumberU64() + 1) + consensus.FBFTLog.PruneCacheBeforeBlock(bc.CurrentBlock().NumberU64()) return nil } diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index 9d1da5149..bbd63a746 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -56,7 +56,7 @@ func (sender *MessageSender) Reset(blockNum uint64) { } // SendWithRetry sends message with retry logic. -func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte) error { +func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte, immediate bool) error { if sender.retryTimes != 0 { msgRetry := MessageRetry{blockNum: blockNum, groups: groups, p2pMsg: p2pMsg, msgType: msgType, retryCount: 0} atomic.StoreUint32(&msgRetry.isActive, 1) @@ -65,7 +65,10 @@ func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.Messa sender.Retry(&msgRetry) }() } - return sender.host.SendMessageToGroups(groups, p2pMsg) + if immediate { + return sender.host.SendMessageToGroups(groups, p2pMsg) + } + return nil } // SendWithoutRetry sends message without retry logic. diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index cb50c6258..3fc719c46 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -149,12 +149,17 @@ func (consensus *Consensus) finalCommit() { } // if leader successfully finalizes the block, send committed message to validators + sendImmediately := false + if !consensus.IsLeader() || block.IsLastBlockInEpoch() { + sendImmediately = true + } if err := consensus.msgSender.SendWithRetry( block.NumberU64(), msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, - p2p.ConstructMessage(msgToSend)); err != nil { + p2p.ConstructMessage(msgToSend), + sendImmediately); err != nil { consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message") } else { consensus.getLogger().Info(). @@ -195,13 +200,22 @@ func (consensus *Consensus) finalCommit() { // If still the leader, send commit sig/bitmap to finish the new block proposal, // else, the block proposal will timeout by itself. if consensus.IsLeader() { - go func() { - select { - case consensus.CommitSigChannel <- commitSigAndBitmap: - case <-time.After(6 * time.Second): - utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") - } - }() + if block.IsLastBlockInEpoch() { + // No pipelining + go func() { + consensus.getLogger().Info().Msg("[finalCommit] sending block proposal signal") + consensus.ReadySignal <- SyncProposal + }() + } else { + // pipelining + go func() { + select { + case consensus.CommitSigChannel <- commitSigAndBitmap: + case <-time.After(6 * time.Second): + utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") + } + }() + } } } @@ -290,12 +304,14 @@ func (consensus *Consensus) Start( case <-consensus.syncReadyChan: consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan") consensus.mutex.Lock() - consensus.SetBlockNum(consensus.Blockchain.CurrentHeader().Number().Uint64() + 1) - consensus.SetViewIDs(consensus.Blockchain.CurrentHeader().ViewID().Uint64() + 1) - mode := consensus.UpdateConsensusInformation() - consensus.current.SetMode(mode) + if consensus.blockNum < consensus.Blockchain.CurrentHeader().Number().Uint64()+1 { + consensus.SetBlockNum(consensus.Blockchain.CurrentHeader().Number().Uint64() + 1) + consensus.SetViewIDs(consensus.Blockchain.CurrentHeader().ViewID().Uint64() + 1) + mode := consensus.UpdateConsensusInformation() + consensus.current.SetMode(mode) + consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") + } consensus.mutex.Unlock() - consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") case <-consensus.syncNotReadyChan: consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncNotReadyChan") @@ -393,7 +409,7 @@ func (consensus *Consensus) Start( consensus.announce(newBlock) case viewID := <-consensus.commitFinishChan: - consensus.getLogger().Info().Msg("[ConsensusMainLoop] commitFinishChan") + consensus.getLogger().Info().Uint64("viewID", viewID).Msg("[ConsensusMainLoop] commitFinishChan") // Only Leader execute this condition func() { @@ -487,9 +503,26 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return errors.New("block to pre-commit is nil") } + leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() + if err != nil { + consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] leader not found") + return err + } + + // Construct committed message consensus.mutex.Lock() - bareMinimumCommit := consensus.constructQuorumSigAndBitmap(quorum.Commit) + network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey}) consensus.mutex.Unlock() + if err != nil { + consensus.getLogger().Warn().Err(err). + Msg("[preCommitAndPropose] Unable to construct Committed message") + return err + } + msgToSend, FBFTMsg := + network.Bytes, + network.FBFTMsg + bareMinimumCommit := FBFTMsg.Payload + consensus.FBFTLog.AddMessage(FBFTMsg) blk.SetCurrentCommitSig(bareMinimumCommit) @@ -498,6 +531,22 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return err } + // if leader successfully finalizes the block, send committed message to validators + if err := consensus.msgSender.SendWithRetry( + blk.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend), + true); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") + } else { + consensus.getLogger().Info(). + Str("blockHash", blk.Hash().Hex()). + Uint64("blockNum", consensus.blockNum). + Msg("[preCommitAndPropose] Sent Committed Message") + } + // Send signal to Node to propose the new block for consensus consensus.getLogger().Info().Msg("[preCommitAndPropose] sending block proposal signal") @@ -573,7 +622,7 @@ func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMess // SetupForNewConsensus sets the state for new consensus func (consensus *Consensus) SetupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) { - atomic.AddUint64(&consensus.blockNum, 1) + atomic.StoreUint64(&consensus.blockNum, blk.NumberU64()+1) consensus.SetCurBlockViewID(committedMsg.ViewID + 1) consensus.LeaderPubKey = committedMsg.SenderPubkeys[0] // Update consensus keys at last so the change of leader status doesn't mess up normal flow diff --git a/consensus/leader.go b/consensus/leader.go index 7ae69bb59..26c751ff9 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -80,7 +80,7 @@ func (consensus *Consensus) announce(block *types.Block) { if err := consensus.msgSender.SendWithRetry( consensus.blockNum, msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, p2p.ConstructMessage(msgToSend)); err != nil { + }, p2p.ConstructMessage(msgToSend), true); err != nil { consensus.getLogger().Warn(). Str("groupID", string(nodeconfig.NewGroupIDByShardID( nodeconfig.ShardID(consensus.ShardID), @@ -300,10 +300,13 @@ func (consensus *Consensus) onCommit(msg *msg_pb.Message) { if !quorumWasMet && quorumIsMet { logger.Info().Msg("[OnCommit] 2/3 Enough commits received") - go func() { - // TODO: make it synchronized with commitFinishChan - consensus.preCommitAndPropose(blockObj) - }() + if !blockObj.IsLastBlockInEpoch() { + // only do early commit if it's not epoch block to avoid problems + go func() { + // TODO: make it synchronized with commitFinishChan + consensus.preCommitAndPropose(blockObj) + }() + } consensus.getLogger().Info().Msg("[OnCommit] Starting Grace Period") go func(viewID uint64) { diff --git a/consensus/threshold.go b/consensus/threshold.go index 503185f3b..f9ced1139 100644 --- a/consensus/threshold.go +++ b/consensus/threshold.go @@ -74,6 +74,7 @@ func (consensus *Consensus) didReachPrepareQuorum() error { nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, p2p.ConstructMessage(msgToSend), + true, ); err != nil { consensus.getLogger().Warn().Msg("[OnPrepare] Cannot send prepared message") } else { diff --git a/consensus/validator.go b/consensus/validator.go index 7655da547..a692d3969 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -229,7 +229,11 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { Uint64("MsgViewID", recvMsg.ViewID). Msg("[OnCommitted] Received committed message") - if !consensus.isRightBlockNumCheck(recvMsg) { + // Ok to receive committed from last block since it could have more signatures + if recvMsg.BlockNum < consensus.blockNum-1 { + consensus.getLogger().Debug(). + Uint64("MsgBlockNum", recvMsg.BlockNum). + Msg("Wrong BlockNum Received, ignoring!") return } @@ -244,7 +248,7 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { return } if !consensus.Decider.IsQuorumAchievedByMask(mask) { - consensus.getLogger().Warn().Msgf("[OnCommitted] Quorum Not achieved.") + consensus.getLogger().Warn().Hex("sigbitmap", recvMsg.Payload).Msgf("[OnCommitted] Quorum Not achieved.") return } @@ -272,14 +276,23 @@ func (consensus *Consensus) onCommitted(msg *msg_pb.Message) { consensus.FBFTLog.AddMessage(recvMsg) - if recvMsg.BlockNum > consensus.blockNum { - consensus.getLogger().Info().Msg("[OnCommitted] low consensus block number. Spin up state sync") - consensus.spinUpStateSync() - } - consensus.aggregatedCommitSig = aggSig consensus.commitBitmap = mask + // If we already have a committed signature received before, check whether the new one + // has more signatures and if yes, override the old data. + // Otherwise, simply write the commit signature in db. + commitSigBitmap, err := consensus.Blockchain.ReadCommitSig(blockObj.NumberU64()) + if err == nil && len(commitSigBitmap) == len(recvMsg.Payload) { + new := mask.CountEnabled() + mask.SetMask(commitSigBitmap[bls.BLSSignatureSizeInBytes:]) + cur := mask.CountEnabled() + if new > cur { + consensus.getLogger().Info().Hex("old", commitSigBitmap).Hex("new", recvMsg.Payload).Msg("[OnCommitted] Overriding commit signatures!!") + consensus.Blockchain.WriteCommitSig(blockObj.NumberU64(), recvMsg.Payload) + } + } + consensus.tryCatchup() if recvMsg.BlockNum > consensus.blockNum { consensus.getLogger().Info().Uint64("MsgBlockNum", recvMsg.BlockNum).Msg("[OnCommitted] OUT OF SYNC") diff --git a/consensus/view_change.go b/consensus/view_change.go index 7b8cb42a4..1a94f2892 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -265,6 +265,7 @@ func (consensus *Consensus) startViewChange() { []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), + true, ); err != nil { consensus.getLogger().Err(err). Msg("[startViewChange] could not send out the ViewChange message") @@ -294,6 +295,7 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), + true, ); err != nil { return errors.New("failed to send out the NewView message") } diff --git a/node/node_syncing.go b/node/node_syncing.go index 77d2aa8ca..7e5fab3b6 100644 --- a/node/node_syncing.go +++ b/node/node_syncing.go @@ -260,7 +260,7 @@ func (node *Node) doSync(bc *core.BlockChain, worker *worker.Worker, willJoinCon utils.Logger().Debug().Int("len", node.stateSync.GetActivePeerNumber()).Msg("[SYNC] Get Active Peers") } // TODO: treat fake maximum height - if node.stateSync.IsOutOfSync(bc) { + if node.stateSync.IsOutOfSync(bc, true) { node.IsInSync.UnSet() if willJoinConsensus { node.Consensus.BlocksNotSynchronized() @@ -542,5 +542,5 @@ func (node *Node) GetMaxPeerHeight() uint64 { // IsOutOfSync ... func (node *Node) IsOutOfSync(bc *core.BlockChain) bool { - return node.stateSync.IsOutOfSync(bc) + return node.stateSync.IsOutOfSync(bc, false) } From 0f166583e3e7bad42e175065b9ecc2fc3c566af0 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Wed, 4 Nov 2020 19:30:49 -0800 Subject: [PATCH 16/26] fix comment --- core/headerchain.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/headerchain.go b/core/headerchain.go index 7c077d8a0..1838ed190 100644 --- a/core/headerchain.go +++ b/core/headerchain.go @@ -525,7 +525,7 @@ func (hc *HeaderChain) Config() *params.ChainConfig { return hc.config } // Engine retrieves the header chain's consensus engine. func (hc *HeaderChain) Engine() consensus_engine.Engine { return hc.engine } -// GetBlock implements consensus.Blockchain, and returns nil for every input as +// GetBlock implements consensus.ChainReader, and returns nil for every input as // a header chain does not have blocks available for retrieval. func (hc *HeaderChain) GetBlock(hash common.Hash, number uint64) *types.Block { return nil From 41c5b4251609a3b5a24360fb41739e722165ee1f Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 11:49:48 -0800 Subject: [PATCH 17/26] make block proposing timing aligned with pipelining --- consensus/consensus_v2.go | 12 ++++++------ internal/chain/engine.go | 4 ++-- node/node_newblock.go | 25 ++++++++++++++++++------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index c4acd037e..ef39728ad 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -191,12 +191,6 @@ func (consensus *Consensus) finalCommit() { Int("numStakingTxns", len(block.StakingTransactions())). Msg("HOORAY!!!!!!! CONSENSUS REACHED!!!!!!!") - // Sleep to wait for the full block time - consensus.getLogger().Info().Msg("[finalCommit] Waiting for Block Time") - <-time.After(time.Until(consensus.NextBlockDue)) - // Update time due for next block - consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) - // If still the leader, send commit sig/bitmap to finish the new block proposal, // else, the block proposal will timeout by itself. if consensus.IsLeader() { @@ -324,6 +318,12 @@ func (consensus *Consensus) Start( Uint64("MsgBlockNum", newBlock.NumberU64()). Msg("[ConsensusMainLoop] Received Proposed New Block!") + // Sleep to wait for the full block time + consensus.getLogger().Info().Msg("[ConsensusMainLoop] Waiting for Block Time") + <-time.After(time.Until(consensus.NextBlockDue)) + // Update time due for next block + consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) + //VRF/VDF is only generated in the beacon chain if consensus.NeedsRandomNumberGeneration(newBlock.Header().Epoch()) { // generate VRF if the current block has a new leader diff --git a/internal/chain/engine.go b/internal/chain/engine.go index 9cb28e4cb..5a6f3b4b1 100644 --- a/internal/chain/engine.go +++ b/internal/chain/engine.go @@ -195,8 +195,8 @@ func (e *engineImpl) VerifySeal(chain engine.ChainReader, header *block.Header) lastCommitPayload := signature.ConstructCommitPayload(chain, parentHeader.Epoch(), parentHeader.Hash(), parentHeader.Number().Uint64(), parentHeader.ViewID().Uint64()) if !aggSig.VerifyHash(mask.AggregatePublic, lastCommitPayload) { - const msg = "[VerifySeal] Unable to verify aggregated signature from last block" - return errors.New(msg) + const msg = "[VerifySeal] Unable to verify aggregated signature from last block: %x" + return errors.Errorf(msg, payload) } return nil } diff --git a/node/node_newblock.go b/node/node_newblock.go index 0117d77e2..8837006b6 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -44,7 +44,8 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp Msg("Consensus new block proposal: STOPPED!") return case proposalType := <-readySignal: - if node.Consensus != nil && node.Consensus.IsLeader() { + retryCount := 3 + for node.Consensus != nil && node.Consensus.IsLeader() { time.Sleep(SleepPeriod) utils.Logger().Info(). Uint64("blockNum", node.Blockchain().CurrentBlock().NumberU64()+1). @@ -59,13 +60,12 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp waitTime = 4 * time.Second } select { - case commitSigs := <-commitSigsChan: - utils.Logger().Info().Msg("[ProposeNewBlock] received commit sigs asynchronously") - if len(commitSigs) > bls.BLSSignatureSizeInBytes { - newCommitSigsChan <- commitSigs - } case <-time.After(waitTime): - utils.Logger().Info().Msg("[ProposeNewBlock] timeout waiting for commit sigs, reading directly from DB") + if waitTime == 0 { + utils.Logger().Info().Msg("[ProposeNewBlock] Sync block proposal, reading commit sigs directly from DB") + } else { + utils.Logger().Info().Msg("[ProposeNewBlock] Timeout waiting for commit sigs, reading directly from DB") + } sigs, err := node.Consensus.BlockCommitSigs(node.Blockchain().CurrentBlock().NumberU64()) if err != nil { @@ -73,6 +73,11 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp } else { newCommitSigsChan <- sigs } + case commitSigs := <-commitSigsChan: + utils.Logger().Info().Msg("[ProposeNewBlock] received commit sigs asynchronously") + if len(commitSigs) > bls.BLSSignatureSizeInBytes { + newCommitSigsChan <- commitSigs + } } }() node.Consensus.StartFinalityCount() @@ -92,6 +97,12 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp node.BlockChannel <- newBlock } else { utils.Logger().Err(err).Msg("!!!!!!!!!Failed Proposing New Block!!!!!!!!!") + retryCount-- + if retryCount == 0 { + // break to avoid repeated failures + break + } + continue } } } From 628174b379d3f17e9978ae1f1fe045e363dd3974 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 12:21:39 -0800 Subject: [PATCH 18/26] fix block proposal --- consensus/consensus.go | 2 +- crypto/bls/bls.go | 2 +- node/node_newblock.go | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/consensus/consensus.go b/consensus/consensus.go index dc8e60d60..08a559b6a 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -25,7 +25,7 @@ const ( vdfAndSeedSize = 548 // size of VDF/Proof and Seed ) -var errLeaderPriKeyNotFound = errors.New("getting leader private key from consensus public keys failed") +var errLeaderPriKeyNotFound = errors.New("leader private key not found locally") // ProposalType is to indicate the type of signal for new block proposal type ProposalType byte diff --git a/crypto/bls/bls.go b/crypto/bls/bls.go index c8d0bddf9..2c84e0fc7 100644 --- a/crypto/bls/bls.go +++ b/crypto/bls/bls.go @@ -99,7 +99,7 @@ func (pk *SerializedPublicKey) FromLibBLSPublicKey(key *bls.PublicKey) error { // SeparateSigAndMask parse the commig signature data into signature and bitmap. func SeparateSigAndMask(commitSigs []byte) ([]byte, []byte, error) { if len(commitSigs) < BLSSignatureSizeInBytes { - return nil, nil, errors.New("no mask data found in commit sigs") + return nil, nil, errors.Errorf("no mask data found in commit sigs: %x", commitSigs) } //#### Read payload data from committed msg aggSig := make([]byte, BLSSignatureSizeInBytes) diff --git a/node/node_newblock.go b/node/node_newblock.go index 8837006b6..e7ffe5826 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -95,6 +95,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp // Send the new block to Consensus so it can be confirmed. node.BlockChannel <- newBlock + break } else { utils.Logger().Err(err).Msg("!!!!!!!!!Failed Proposing New Block!!!!!!!!!") retryCount-- From 28487fa38b02282283bc2b7845f92753e2a45e02 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 12:28:27 -0800 Subject: [PATCH 19/26] Add log --- node/node_handler.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/node_handler.go b/node/node_handler.go index c33dfe183..5a6a72864 100644 --- a/node/node_handler.go +++ b/node/node_handler.go @@ -390,10 +390,12 @@ func (node *Node) PostConsensusProcessing(newBlock *types.Block) error { for _, addr := range node.GetAddresses(newBlock.Epoch()) { wrapper, err := node.Beaconchain().ReadValidatorInformation(addr) if err != nil { + utils.Logger().Err(err).Str("addr", addr.Hex()).Msg("failed reaching validator info") return nil } snapshot, err := node.Beaconchain().ReadValidatorSnapshot(addr) if err != nil { + utils.Logger().Err(err).Str("addr", addr.Hex()).Msg("failed reaching validator snapshot") return nil } computed := availability.ComputeCurrentSigning( From 2be83cede7026798db2a45aeab2dd8e73642d106 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 12:31:21 -0800 Subject: [PATCH 20/26] revert verify block --- consensus/validator.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/consensus/validator.go b/consensus/validator.go index a692d3969..e1615541f 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -155,16 +155,6 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - if consensus.BlockVerifier == nil { - consensus.getLogger().Debug().Msg("[onPrepared] consensus received message before init. Ignoring") - return - } - if err := consensus.BlockVerifier(&blockObj); err != nil { - consensus.getLogger().Error().Err(err).Msg("[OnPrepared] Block verification failed") - return - } - consensus.FBFTLog.MarkBlockVerified(&blockObj) - consensus.FBFTLog.AddBlock(&blockObj) // add block field blockPayload := make([]byte, len(recvMsg.Block)) @@ -178,6 +168,16 @@ func (consensus *Consensus) onPrepared(msg *msg_pb.Message) { Hex("blockHash", recvMsg.BlockHash[:]). Msg("[OnPrepared] Prepared message and block added") + if consensus.BlockVerifier == nil { + consensus.getLogger().Debug().Msg("[onPrepared] consensus received message before init. Ignoring") + return + } + if err := consensus.BlockVerifier(&blockObj); err != nil { + consensus.getLogger().Error().Err(err).Msg("[OnPrepared] Block verification failed") + return + } + consensus.FBFTLog.MarkBlockVerified(&blockObj) + if consensus.checkViewID(recvMsg) != nil { if consensus.current.Mode() == Normal { consensus.getLogger().Debug(). From 0029ced6b2b0c9a800e1e48975d0357bc7bd3a72 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 14:02:00 -0800 Subject: [PATCH 21/26] Add constant for timeouts --- api/service/syncing/syncing.go | 14 +++----------- consensus/consensus_v2.go | 11 ++++++++++- internal/chain/reward.go | 8 +++++++- node/node_newblock.go | 2 +- node/worker/worker.go | 4 +++- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/api/service/syncing/syncing.go b/api/service/syncing/syncing.go index 124e45d92..187501d5c 100644 --- a/api/service/syncing/syncing.go +++ b/api/service/syncing/syncing.go @@ -914,8 +914,8 @@ func (ss *StateSync) IsOutOfSync(bc *core.BlockChain, doubleCheck bool) bool { Msg("[SYNC] Checking sync status") return wasOutOfSync } - time.Sleep(3 * time.Second) - // double check the sync status after 3 second to confirm (avoid false alarm) + time.Sleep(1 * time.Second) + // double check the sync status after 1 second to confirm (avoid false alarm) otherHeight2 := ss.getMaxPeerHeight(false) currentHeight := bc.CurrentBlock().NumberU64() @@ -936,11 +936,7 @@ func (ss *StateSync) SyncLoop(bc *core.BlockChain, worker *worker.Worker, isBeac if !isBeacon { ss.RegisterNodeInfo() } - // remove SyncLoopFrequency - ticker := time.NewTicker(SyncLoopFrequency * time.Second) - defer ticker.Stop() - outOfSyncCount := 1 - for range ticker.C { + for { otherHeight := ss.getMaxPeerHeight(isBeacon) currentHeight := bc.CurrentBlock().NumberU64() if currentHeight >= otherHeight { @@ -949,10 +945,6 @@ func (ss *StateSync) SyncLoop(bc *core.BlockChain, worker *worker.Worker, isBeac isBeacon, bc.ShardID(), otherHeight, currentHeight) break } - if outOfSyncCount < syncStatusCheckCount { - outOfSyncCount++ - continue - } utils.Logger().Info(). Msgf("[SYNC] Node is OUT OF SYNC (isBeacon: %t, ShardID: %d, otherHeight: %d, currentHeight: %d)", isBeacon, bc.ShardID(), otherHeight, currentHeight) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index ef39728ad..29e7f4433 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -29,6 +29,15 @@ var ( errVerifyMessageSignature = errors.New("verify message signature failed") ) +// timeout constant +const ( + // CommitSigSenderTimeout is the timeout for sending the commit sig to finish block proposal + CommitSigSenderTimeout = 6 * time.Second + // CommitSigReceiverTimeout is the timeout for the receiving side of the commit sig + // if timeout, the receiver should instead ready directly from db for the commit sig + CommitSigReceiverTimeout = 4 * time.Second +) + // IsViewChangingMode return true if curernt mode is viewchanging func (consensus *Consensus) IsViewChangingMode() bool { return consensus.current.Mode() == ViewChanging @@ -205,7 +214,7 @@ func (consensus *Consensus) finalCommit() { go func() { select { case consensus.CommitSigChannel <- commitSigAndBitmap: - case <-time.After(6 * time.Second): + case <-time.After(CommitSigSenderTimeout): utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap") } }() diff --git a/internal/chain/reward.go b/internal/chain/reward.go index 6b0a7d06e..eeaf1e873 100644 --- a/internal/chain/reward.go +++ b/internal/chain/reward.go @@ -28,6 +28,12 @@ import ( "github.com/pkg/errors" ) +// timeout constant +const ( + // AsyncBlockProposalTimeout is the timeout which will abort the async block proposal. + AsyncBlockProposalTimeout = 5 * time.Second +) + func ballotResultBeaconchain( bc engine.ChainReader, header *block.Header, ) (*big.Int, shard.SlotList, shard.SlotList, shard.SlotList, error) { @@ -473,7 +479,7 @@ func waitForCommitSigs(sigsReady chan bool) error { return errors.New("Failed to get commit sigs") } utils.Logger().Info().Msg("Commit sigs are ready") - case <-time.After(5 * time.Second): + case <-time.After(AsyncBlockProposalTimeout): return errors.New("Timeout waiting for commit sigs for reward calculation") } return nil diff --git a/node/node_newblock.go b/node/node_newblock.go index e7ffe5826..4e0b9f9da 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -57,7 +57,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp go func() { waitTime := 0 * time.Second if proposalType == consensus.AsyncProposal { - waitTime = 4 * time.Second + waitTime = consensus.CommitSigReceiverTimeout } select { case <-time.After(waitTime): diff --git a/node/worker/worker.go b/node/worker/worker.go index 496c81091..2347f0f96 100644 --- a/node/worker/worker.go +++ b/node/worker/worker.go @@ -7,6 +7,8 @@ import ( "sort" "time" + "github.com/harmony-one/harmony/consensus" + "github.com/harmony-one/harmony/crypto/bls" "github.com/harmony-one/harmony/crypto/hash" @@ -504,7 +506,7 @@ func (w *Worker) FinalizeNewBlock( copyHeader.SetLastCommitBitmap(signers) } sigsReady <- true - case <-time.After(5 * time.Second): + case <-time.After(consensus.CommitSigReceiverTimeoutz): // Exit goroutine utils.Logger().Warn().Msg("Timeout waiting for commit sigs") } From 46bf183d1fb815e492502f276f8c7d2809d0bacb Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 14:12:42 -0800 Subject: [PATCH 22/26] fix typo --- node/worker/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/worker/worker.go b/node/worker/worker.go index 2347f0f96..78272fd67 100644 --- a/node/worker/worker.go +++ b/node/worker/worker.go @@ -506,7 +506,7 @@ func (w *Worker) FinalizeNewBlock( copyHeader.SetLastCommitBitmap(signers) } sigsReady <- true - case <-time.After(consensus.CommitSigReceiverTimeoutz): + case <-time.After(consensus.CommitSigReceiverTimeout): // Exit goroutine utils.Logger().Warn().Msg("Timeout waiting for commit sigs") } From 56d4bc2b1bb897fdc97fc5adee4caf11c6e968f3 Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 14:40:23 -0800 Subject: [PATCH 23/26] fix comment --- api/service/syncing/syncing.go | 3 --- consensus/consensus_v2.go | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/service/syncing/syncing.go b/api/service/syncing/syncing.go index 187501d5c..167226f92 100644 --- a/api/service/syncing/syncing.go +++ b/api/service/syncing/syncing.go @@ -29,14 +29,11 @@ import ( // Constants for syncing. const ( downloadBlocksRetryLimit = 5 // downloadBlocks service retry limit - TimesToFail = 5 // downloadBlocks service retry limit RegistrationNumber = 3 SyncingPortDifference = 3000 inSyncThreshold = 0 // when peerBlockHeight - myBlockHeight <= inSyncThreshold, it's ready to join consensus - syncStatusCheckCount = 3 // check this many times before confirming it's out of sync SyncLoopBatchSize uint32 = 1000 // maximum size for one query of block hashes verifyHeaderBatchSize uint64 = 100 // block chain header verification batch size - SyncLoopFrequency = 1 // unit in second LastMileBlocksSize = 50 // after cutting off a number of connected peers, the result number of peers diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 29e7f4433..067f980c2 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -158,6 +158,9 @@ func (consensus *Consensus) finalCommit() { } // if leader successfully finalizes the block, send committed message to validators + // Note: leader already sent 67% commit in preCommit. The 100% commit won't be sent immediately + // to save network traffic. It will only be sent in retry if consensus doesn't move forward. + // Or if the leader is changed for next block, the 100% committed sig will be sent to the next leader immediately. sendImmediately := false if !consensus.IsLeader() || block.IsLastBlockInEpoch() { sendImmediately = true From ceaf76e021446dc0bc719230ab44f248582b49fb Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 14:42:44 -0800 Subject: [PATCH 24/26] prevent nil pointer crash --- consensus/quorum/one-node-staked-vote.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/consensus/quorum/one-node-staked-vote.go b/consensus/quorum/one-node-staked-vote.go index f0099f968..e18b99f99 100644 --- a/consensus/quorum/one-node-staked-vote.go +++ b/consensus/quorum/one-node-staked-vote.go @@ -173,9 +173,11 @@ func (v *stakedVoteWeight) computeTotalPowerByMask(mask *bls_cosi.Mask) *numeric for key, i := range mask.PublicsIndex { if enabled, err := mask.IndexEnabled(i); err == nil && enabled { - currentTotal = currentTotal.Add( - v.roster.Voters[key].OverallPercent, - ) + if voter, ok := v.roster.Voters[key]; ok { + currentTotal = currentTotal.Add( + voter.OverallPercent, + ) + } } } return ¤tTotal From f91d781fe449e3c92a39c3c4e5ea49742363cc0f Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 22:23:47 -0800 Subject: [PATCH 25/26] add Delayed sending --- consensus/consensus_msg_sender.go | 17 ++++++++++---- consensus/consensus_v2.go | 37 +++++++++++++++++++------------ consensus/leader.go | 2 +- consensus/threshold.go | 1 - consensus/view_change.go | 2 -- node/node_newblock.go | 9 ++++---- 6 files changed, 42 insertions(+), 26 deletions(-) diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index bbd63a746..6712ea3e7 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -56,7 +56,7 @@ func (sender *MessageSender) Reset(blockNum uint64) { } // SendWithRetry sends message with retry logic. -func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte, immediate bool) error { +func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte) error { if sender.retryTimes != 0 { msgRetry := MessageRetry{blockNum: blockNum, groups: groups, p2pMsg: p2pMsg, msgType: msgType, retryCount: 0} atomic.StoreUint32(&msgRetry.isActive, 1) @@ -65,10 +65,19 @@ func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.Messa sender.Retry(&msgRetry) }() } - if immediate { - return sender.host.SendMessageToGroups(groups, p2pMsg) + return sender.host.SendMessageToGroups(groups, p2pMsg) +} + +// DelayedSendWithRetry is similar to SendWithRetry but without the initial message sending but only retries. +func (sender *MessageSender) DelayedSendWithRetry(blockNum uint64, msgType msg_pb.MessageType, groups []nodeconfig.GroupID, p2pMsg []byte) { + if sender.retryTimes != 0 { + msgRetry := MessageRetry{blockNum: blockNum, groups: groups, p2pMsg: p2pMsg, msgType: msgType, retryCount: 0} + atomic.StoreUint32(&msgRetry.isActive, 1) + sender.messagesToRetry.Store(msgType, &msgRetry) + go func() { + sender.Retry(&msgRetry) + }() } - return nil } // SendWithoutRetry sends message without retry logic. diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index 067f980c2..ad01d0691 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -161,23 +161,33 @@ func (consensus *Consensus) finalCommit() { // Note: leader already sent 67% commit in preCommit. The 100% commit won't be sent immediately // to save network traffic. It will only be sent in retry if consensus doesn't move forward. // Or if the leader is changed for next block, the 100% committed sig will be sent to the next leader immediately. - sendImmediately := false if !consensus.IsLeader() || block.IsLastBlockInEpoch() { - sendImmediately = true - } - if err := consensus.msgSender.SendWithRetry( - block.NumberU64(), - msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ - nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, - p2p.ConstructMessage(msgToSend), - sendImmediately); err != nil { - consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message") + // send immediately + if err := consensus.msgSender.SendWithRetry( + block.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend)); err != nil { + consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message") + } else { + consensus.getLogger().Info(). + Hex("blockHash", curBlockHash[:]). + Uint64("blockNum", consensus.blockNum). + Msg("[finalCommit] Sent Committed Message") + } } else { + // delayed send + consensus.msgSender.DelayedSendWithRetry( + block.NumberU64(), + msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ + nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), + }, + p2p.ConstructMessage(msgToSend)) consensus.getLogger().Info(). Hex("blockHash", curBlockHash[:]). Uint64("blockNum", consensus.blockNum). - Msg("[finalCommit] Sent Committed Message") + Msg("[finalCommit] Queued Committed Message") } // Dump new block into level db @@ -550,8 +560,7 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, - p2p.ConstructMessage(msgToSend), - true); err != nil { + p2p.ConstructMessage(msgToSend)); err != nil { consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message") } else { consensus.getLogger().Info(). diff --git a/consensus/leader.go b/consensus/leader.go index 26c751ff9..664054383 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -80,7 +80,7 @@ func (consensus *Consensus) announce(block *types.Block) { if err := consensus.msgSender.SendWithRetry( consensus.blockNum, msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), - }, p2p.ConstructMessage(msgToSend), true); err != nil { + }, p2p.ConstructMessage(msgToSend)); err != nil { consensus.getLogger().Warn(). Str("groupID", string(nodeconfig.NewGroupIDByShardID( nodeconfig.ShardID(consensus.ShardID), diff --git a/consensus/threshold.go b/consensus/threshold.go index f9ced1139..503185f3b 100644 --- a/consensus/threshold.go +++ b/consensus/threshold.go @@ -74,7 +74,6 @@ func (consensus *Consensus) didReachPrepareQuorum() error { nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, p2p.ConstructMessage(msgToSend), - true, ); err != nil { consensus.getLogger().Warn().Msg("[OnPrepare] Cannot send prepared message") } else { diff --git a/consensus/view_change.go b/consensus/view_change.go index 1a94f2892..7b8cb42a4 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -265,7 +265,6 @@ func (consensus *Consensus) startViewChange() { []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), - true, ); err != nil { consensus.getLogger().Err(err). Msg("[startViewChange] could not send out the ViewChange message") @@ -295,7 +294,6 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, p2p.ConstructMessage(msgToSend), - true, ); err != nil { return errors.New("failed to send out the NewView message") } diff --git a/node/node_newblock.go b/node/node_newblock.go index 4e0b9f9da..e8f8698d9 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -44,7 +44,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp Msg("Consensus new block proposal: STOPPED!") return case proposalType := <-readySignal: - retryCount := 3 + retryCount := 0 for node.Consensus != nil && node.Consensus.IsLeader() { time.Sleep(SleepPeriod) utils.Logger().Info(). @@ -97,9 +97,10 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp node.BlockChannel <- newBlock break } else { - utils.Logger().Err(err).Msg("!!!!!!!!!Failed Proposing New Block!!!!!!!!!") - retryCount-- - if retryCount == 0 { + retryCount++ + utils.Logger().Err(err).Int("retryCount", retryCount). + Msg("!!!!!!!!!Failed Proposing New Block!!!!!!!!!") + if retryCount > 3 { // break to avoid repeated failures break } From 7ac47952e15f5151ea90255cbfab5cdb28992c4b Mon Sep 17 00:00:00 2001 From: Rongjian Lan Date: Thu, 5 Nov 2020 23:30:22 -0800 Subject: [PATCH 26/26] fix leader consensus timeout timing --- consensus/consensus_v2.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index ad01d0691..f9a5dc910 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -176,6 +176,7 @@ func (consensus *Consensus) finalCommit() { Uint64("blockNum", consensus.blockNum). Msg("[finalCommit] Sent Committed Message") } + consensus.consensusTimeout[timeoutConsensus].Start() } else { // delayed send consensus.msgSender.DelayedSendWithRetry( @@ -202,7 +203,6 @@ func (consensus *Consensus) finalCommit() { } else { consensus.getLogger().Info().Msg("[finalCommit] Start consensus timer") } - consensus.consensusTimeout[timeoutConsensus].Start() consensus.getLogger().Info(). Uint64("blockNum", block.NumberU64()). @@ -568,6 +568,7 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { Uint64("blockNum", consensus.blockNum). Msg("[preCommitAndPropose] Sent Committed Message") } + consensus.consensusTimeout[timeoutConsensus].Start() // Send signal to Node to propose the new block for consensus consensus.getLogger().Info().Msg("[preCommitAndPropose] sending block proposal signal")