The core protocol of WoopChain
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
woop/consensus/consensus_v2.go

915 lines
31 KiB

package consensus
import (
"bytes"
"context"
"encoding/hex"
2 years ago
"fmt"
"math/big"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common"
bls2 "github.com/harmony-one/bls/ffi/go/bls"
"github.com/harmony-one/harmony/consensus/signature"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
"github.com/harmony-one/harmony/internal/utils"
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
"github.com/rs/zerolog"
msg_pb "github.com/harmony-one/harmony/api/proto/message"
"github.com/harmony-one/harmony/block"
"github.com/harmony-one/harmony/consensus/quorum"
"github.com/harmony-one/harmony/core/types"
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
"github.com/harmony-one/harmony/crypto/bls"
vrf_bls "github.com/harmony-one/harmony/crypto/vrf/bls"
"github.com/harmony-one/harmony/p2p"
"github.com/harmony-one/harmony/shard"
"github.com/harmony-one/vdf/src/vdf_go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)
var (
errSenderPubKeyNotLeader = errors.New("sender pubkey doesn't match leader")
errVerifyMessageSignature = errors.New("verify message signature failed")
errParsingFBFTMessage = errors.New("failed parsing FBFT message")
)
// timeout constant
const (
// CommitSigSenderTimeout is the timeout for sending the commit sig to finish block proposal
CommitSigSenderTimeout = 10 * time.Second
// CommitSigReceiverTimeout is the timeout for the receiving side of the commit sig
// if timeout, the receiver should instead ready directly from db for the commit sig
CommitSigReceiverTimeout = 8 * time.Second
)
// IsViewChangingMode return true if curernt mode is viewchanging
func (consensus *Consensus) IsViewChangingMode() bool {
return consensus.isViewChangingMode()
}
func (consensus *Consensus) isViewChangingMode() bool {
return consensus.current.Mode() == ViewChanging
}
// HandleMessageUpdate will update the consensus state according to received message
func (consensus *Consensus) HandleMessageUpdate(ctx context.Context, msg *msg_pb.Message, senderKey *bls.SerializedPublicKey) error {
// when node is in ViewChanging mode, it still accepts normal messages into FBFTLog
// in order to avoid possible trap forever but drop PREPARE and COMMIT
// which are message types specifically for a node acting as leader
// so we just ignore those messages
if consensus.isViewChangingMode() &&
[slash][consensus] Notice double sign & broadcast, factor out tech debt of consensus (#2152) * [slash] Remove dead interface, associated piping * [slash] Expand out structs * [consensus] Write to a chan when find a case of double-signing, remove dead code * [slash] Broadcast the noticing of a double signing * [rawdb] CRUD for slashing candidates * [slashing][node][proto] Broadcast the slash record after receive from consensus, handle received proto message, persist in off-chain db while pending * [slash][node][propose-block] Add verified slashes proposed into the header in block proposal * [slash][shard] Factor out external validator as method on shard state, add double-signature field * [slash][engine] Apply slash, name boolean expression for sorts, use stable sort * [slash] Abstract Ballot results so keep track of both pre and post double sign event * [slash] Fix type errors on test code * [slash] Read from correct rawdb * [slash] Add epoch based guards in CRUD of slashing * [slash] Write to correct cache for slashing candidates * [shard] Use explicit named type of BLS Signature, use convention * [slash] Fix mistake done in refactor, improper header used. Factor out fromSlice to set * [slash][node] Restore newblock to master, try again minimial change * [cx-receipts] Break up one-liner, use SliceStable, not Slice * [network] Finish refactor that makes network message headers once * [network] Simplify creation further of headers write * [slash] Adjust data structure of slash after offline discussion with RJ, Chao * [slash] Still did need signature of the double signature * [consensus] Prepare message does not have block header * [consensus] Soft reset three files to 968517d~1 * [consensus] Begin factor consensus network intended message out with prepare first * [consensus] Factor out Prepared message * [consensus] Factor out announce message creation * [consensus] Committed Message, branch on verify sender key for clearer log * [consensus] Committed Message Factor out * [consensus] Do jenkins MVP of signatures adjustment * [main][slash] Provide YAML config as webhook config for double sign event * [consensus] Adjust signatures, whitespace, lessen GC pressure * [consensus] Remove dead code * [consensus] Factor out commit overloaded message, give commit payload override in construct * [consensus] Fix travis tests * [consensus] Provide block bytes in SubmitVote(quorum.Commit) * [consensus] Factor out noisy sanity checks in BFT, move existing commit check earlier as was before * [quorum] Adjust signatures in quorum * [staking] Adjust after merge from master * [consensus] Finish refactor of consensus * [node] Fix import * [consensus] Fix travis * [consensus] Use origin/master copy of block, fix mistake of pointer to empty byte * [consensus] Less verbose bools * [consensus] Remove unused trailing mutation hook in message construct * [consensus] Address some TODOs on err, comment out double sign
5 years ago
(msg.Type == msg_pb.MessageType_PREPARE ||
msg.Type == msg_pb.MessageType_COMMIT) {
return nil
}
// Do easier check before signature check
if msg.Type == msg_pb.MessageType_ANNOUNCE || msg.Type == msg_pb.MessageType_PREPARED || msg.Type == msg_pb.MessageType_COMMITTED {
// Only validator needs to check whether the message is from the correct leader
if !bytes.Equal(senderKey[:], consensus.LeaderPubKey.Bytes[:]) &&
consensus.current.Mode() == Normal && !consensus.IgnoreViewIDCheck.IsSet() {
return errSenderPubKeyNotLeader
}
}
if msg.Type != msg_pb.MessageType_PREPARE && msg.Type != msg_pb.MessageType_COMMIT {
// Leader doesn't need to check validator's message signature since the consensus signature will be checked
if !consensus.senderKeySanityChecks(msg, senderKey) {
return errVerifyMessageSignature
}
}
// Parse FBFT message
var fbftMsg *FBFTMessage
var err error
switch t := msg.Type; true {
case t == msg_pb.MessageType_VIEWCHANGE:
fbftMsg, err = ParseViewChangeMessage(msg)
case t == msg_pb.MessageType_NEWVIEW:
members := consensus.Decider.Participants()
fbftMsg, err = ParseNewViewMessage(msg, members)
default:
fbftMsg, err = consensus.ParseFBFTMessage(msg)
}
if err != nil || fbftMsg == nil {
return errors.Wrapf(err, "unable to parse consensus msg with type: %s", msg.Type)
}
3 years ago
canHandleViewChange := true
intendedForValidator, intendedForLeader :=
!consensus.IsLeader(),
consensus.IsLeader()
3 years ago
// if in backup normal mode, force ignore view change event and leader event.
if consensus.current.Mode() == NormalBackup {
canHandleViewChange = false
intendedForLeader = false
}
// Route message to handler
switch t := msg.Type; true {
// Handle validator intended messages first
case t == msg_pb.MessageType_ANNOUNCE && intendedForValidator:
consensus.onAnnounce(msg)
case t == msg_pb.MessageType_PREPARED && intendedForValidator:
consensus.onPrepared(fbftMsg)
case t == msg_pb.MessageType_COMMITTED && intendedForValidator:
consensus.onCommitted(fbftMsg)
[slash][consensus] Notice double sign & broadcast, factor out tech debt of consensus (#2152) * [slash] Remove dead interface, associated piping * [slash] Expand out structs * [consensus] Write to a chan when find a case of double-signing, remove dead code * [slash] Broadcast the noticing of a double signing * [rawdb] CRUD for slashing candidates * [slashing][node][proto] Broadcast the slash record after receive from consensus, handle received proto message, persist in off-chain db while pending * [slash][node][propose-block] Add verified slashes proposed into the header in block proposal * [slash][shard] Factor out external validator as method on shard state, add double-signature field * [slash][engine] Apply slash, name boolean expression for sorts, use stable sort * [slash] Abstract Ballot results so keep track of both pre and post double sign event * [slash] Fix type errors on test code * [slash] Read from correct rawdb * [slash] Add epoch based guards in CRUD of slashing * [slash] Write to correct cache for slashing candidates * [shard] Use explicit named type of BLS Signature, use convention * [slash] Fix mistake done in refactor, improper header used. Factor out fromSlice to set * [slash][node] Restore newblock to master, try again minimial change * [cx-receipts] Break up one-liner, use SliceStable, not Slice * [network] Finish refactor that makes network message headers once * [network] Simplify creation further of headers write * [slash] Adjust data structure of slash after offline discussion with RJ, Chao * [slash] Still did need signature of the double signature * [consensus] Prepare message does not have block header * [consensus] Soft reset three files to 968517d~1 * [consensus] Begin factor consensus network intended message out with prepare first * [consensus] Factor out Prepared message * [consensus] Factor out announce message creation * [consensus] Committed Message, branch on verify sender key for clearer log * [consensus] Committed Message Factor out * [consensus] Do jenkins MVP of signatures adjustment * [main][slash] Provide YAML config as webhook config for double sign event * [consensus] Adjust signatures, whitespace, lessen GC pressure * [consensus] Remove dead code * [consensus] Factor out commit overloaded message, give commit payload override in construct * [consensus] Fix travis tests * [consensus] Provide block bytes in SubmitVote(quorum.Commit) * [consensus] Factor out noisy sanity checks in BFT, move existing commit check earlier as was before * [quorum] Adjust signatures in quorum * [staking] Adjust after merge from master * [consensus] Finish refactor of consensus * [node] Fix import * [consensus] Fix travis * [consensus] Use origin/master copy of block, fix mistake of pointer to empty byte * [consensus] Less verbose bools * [consensus] Remove unused trailing mutation hook in message construct * [consensus] Address some TODOs on err, comment out double sign
5 years ago
// Handle leader intended messages now
case t == msg_pb.MessageType_PREPARE && intendedForLeader:
consensus.onPrepare(fbftMsg)
case t == msg_pb.MessageType_COMMIT && intendedForLeader:
consensus.onCommit(fbftMsg)
// Handle view change messages
3 years ago
case t == msg_pb.MessageType_VIEWCHANGE && canHandleViewChange:
consensus.onViewChange(fbftMsg)
3 years ago
case t == msg_pb.MessageType_NEWVIEW && canHandleViewChange:
consensus.onNewView(fbftMsg)
}
return nil
}
func (consensus *Consensus) finalCommit() {
2 years ago
// THIS IS NOT GOOD PLACE FOR LEADER SWITCHING
numCommits := consensus.Decider.SignersCount(quorum.Commit)
consensus.getLogger().Info().
Int64("NumCommits", numCommits).
Msg("[finalCommit] Finalizing Consensus")
beforeCatchupNum := consensus.BlockNum()
leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey()
if err != nil {
consensus.getLogger().Error().Err(err).Msg("[finalCommit] leader not found")
return
}
// Construct committed message
network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey})
if err != nil {
[slash][consensus] Notice double sign & broadcast, factor out tech debt of consensus (#2152) * [slash] Remove dead interface, associated piping * [slash] Expand out structs * [consensus] Write to a chan when find a case of double-signing, remove dead code * [slash] Broadcast the noticing of a double signing * [rawdb] CRUD for slashing candidates * [slashing][node][proto] Broadcast the slash record after receive from consensus, handle received proto message, persist in off-chain db while pending * [slash][node][propose-block] Add verified slashes proposed into the header in block proposal * [slash][shard] Factor out external validator as method on shard state, add double-signature field * [slash][engine] Apply slash, name boolean expression for sorts, use stable sort * [slash] Abstract Ballot results so keep track of both pre and post double sign event * [slash] Fix type errors on test code * [slash] Read from correct rawdb * [slash] Add epoch based guards in CRUD of slashing * [slash] Write to correct cache for slashing candidates * [shard] Use explicit named type of BLS Signature, use convention * [slash] Fix mistake done in refactor, improper header used. Factor out fromSlice to set * [slash][node] Restore newblock to master, try again minimial change * [cx-receipts] Break up one-liner, use SliceStable, not Slice * [network] Finish refactor that makes network message headers once * [network] Simplify creation further of headers write * [slash] Adjust data structure of slash after offline discussion with RJ, Chao * [slash] Still did need signature of the double signature * [consensus] Prepare message does not have block header * [consensus] Soft reset three files to 968517d~1 * [consensus] Begin factor consensus network intended message out with prepare first * [consensus] Factor out Prepared message * [consensus] Factor out announce message creation * [consensus] Committed Message, branch on verify sender key for clearer log * [consensus] Committed Message Factor out * [consensus] Do jenkins MVP of signatures adjustment * [main][slash] Provide YAML config as webhook config for double sign event * [consensus] Adjust signatures, whitespace, lessen GC pressure * [consensus] Remove dead code * [consensus] Factor out commit overloaded message, give commit payload override in construct * [consensus] Fix travis tests * [consensus] Provide block bytes in SubmitVote(quorum.Commit) * [consensus] Factor out noisy sanity checks in BFT, move existing commit check earlier as was before * [quorum] Adjust signatures in quorum * [staking] Adjust after merge from master * [consensus] Finish refactor of consensus * [node] Fix import * [consensus] Fix travis * [consensus] Use origin/master copy of block, fix mistake of pointer to empty byte * [consensus] Less verbose bools * [consensus] Remove unused trailing mutation hook in message construct * [consensus] Address some TODOs on err, comment out double sign
5 years ago
consensus.getLogger().Warn().Err(err).
Msg("[finalCommit] Unable to construct Committed message")
return
}
msgToSend, FBFTMsg :=
[slash][consensus] Notice double sign & broadcast, factor out tech debt of consensus (#2152) * [slash] Remove dead interface, associated piping * [slash] Expand out structs * [consensus] Write to a chan when find a case of double-signing, remove dead code * [slash] Broadcast the noticing of a double signing * [rawdb] CRUD for slashing candidates * [slashing][node][proto] Broadcast the slash record after receive from consensus, handle received proto message, persist in off-chain db while pending * [slash][node][propose-block] Add verified slashes proposed into the header in block proposal * [slash][shard] Factor out external validator as method on shard state, add double-signature field * [slash][engine] Apply slash, name boolean expression for sorts, use stable sort * [slash] Abstract Ballot results so keep track of both pre and post double sign event * [slash] Fix type errors on test code * [slash] Read from correct rawdb * [slash] Add epoch based guards in CRUD of slashing * [slash] Write to correct cache for slashing candidates * [shard] Use explicit named type of BLS Signature, use convention * [slash] Fix mistake done in refactor, improper header used. Factor out fromSlice to set * [slash][node] Restore newblock to master, try again minimial change * [cx-receipts] Break up one-liner, use SliceStable, not Slice * [network] Finish refactor that makes network message headers once * [network] Simplify creation further of headers write * [slash] Adjust data structure of slash after offline discussion with RJ, Chao * [slash] Still did need signature of the double signature * [consensus] Prepare message does not have block header * [consensus] Soft reset three files to 968517d~1 * [consensus] Begin factor consensus network intended message out with prepare first * [consensus] Factor out Prepared message * [consensus] Factor out announce message creation * [consensus] Committed Message, branch on verify sender key for clearer log * [consensus] Committed Message Factor out * [consensus] Do jenkins MVP of signatures adjustment * [main][slash] Provide YAML config as webhook config for double sign event * [consensus] Adjust signatures, whitespace, lessen GC pressure * [consensus] Remove dead code * [consensus] Factor out commit overloaded message, give commit payload override in construct * [consensus] Fix travis tests * [consensus] Provide block bytes in SubmitVote(quorum.Commit) * [consensus] Factor out noisy sanity checks in BFT, move existing commit check earlier as was before * [quorum] Adjust signatures in quorum * [staking] Adjust after merge from master * [consensus] Finish refactor of consensus * [node] Fix import * [consensus] Fix travis * [consensus] Use origin/master copy of block, fix mistake of pointer to empty byte * [consensus] Less verbose bools * [consensus] Remove unused trailing mutation hook in message construct * [consensus] Address some TODOs on err, comment out double sign
5 years ago
network.Bytes,
network.FBFTMsg
4 years ago
commitSigAndBitmap := FBFTMsg.Payload
consensus.FBFTLog.AddVerifiedMessage(FBFTMsg)
// find correct block content
curBlockHash := consensus.blockHash
block := consensus.FBFTLog.GetBlockByHash(curBlockHash)
if block == nil {
consensus.getLogger().Warn().
Str("blockHash", hex.EncodeToString(curBlockHash[:])).
Msg("[finalCommit] Cannot find block by hash")
return
}
if err := consensus.verifyLastCommitSig(commitSigAndBitmap, block); err != nil {
consensus.getLogger().Warn().Err(err).Msg("[finalCommit] failed verifying last commit sig")
return
}
consensus.getLogger().Info().Hex("new", commitSigAndBitmap).Msg("[finalCommit] Overriding commit signatures!!")
consensus.Blockchain().WriteCommitSig(block.NumberU64(), commitSigAndBitmap)
// Send committed message before block insertion.
4 years ago
// if leader successfully finalizes the block, send committed message to validators
4 years ago
// Note: leader already sent 67% commit in preCommit. The 100% commit won't be sent immediately
// to save network traffic. It will only be sent in retry if consensus doesn't move forward.
// Or if the leader is changed for next block, the 100% committed sig will be sent to the next leader immediately.
if !consensus.IsLeader() || block.IsLastBlockInEpoch() {
// send immediately
if err := consensus.msgSender.SendWithRetry(
block.NumberU64(),
msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{
nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)),
},
p2p.ConstructMessage(msgToSend)); err != nil {
consensus.getLogger().Warn().Err(err).Msg("[finalCommit] Cannot send committed message")
} else {
consensus.getLogger().Info().
Hex("blockHash", curBlockHash[:]).
Uint64("blockNum", consensus.BlockNum()).
Msg("[finalCommit] Sent Committed Message")
}
4 years ago
consensus.getLogger().Info().Msg("[finalCommit] Start consensus timer")
consensus.consensusTimeout[timeoutConsensus].Start()
} else {
// delayed send
consensus.msgSender.DelayedSendWithRetry(
block.NumberU64(),
msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{
nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)),
},
p2p.ConstructMessage(msgToSend))
consensus.getLogger().Info().
Hex("blockHash", curBlockHash[:]).
Uint64("blockNum", consensus.BlockNum()).
Hex("lastCommitSig", commitSigAndBitmap).
Msg("[finalCommit] Queued Committed Message")
}
block.SetCurrentCommitSig(commitSigAndBitmap)
err = consensus.commitBlock(block, FBFTMsg)
if err != nil || consensus.BlockNum()-beforeCatchupNum != 1 {
consensus.getLogger().Err(err).
Uint64("beforeCatchupBlockNum", beforeCatchupNum).
Msg("[finalCommit] Leader failed to commit the confirmed block")
}
// Dump new block into level db
// In current code, we add signatures in block in tryCatchup, the block dump to explorer does not contains signatures
// but since explorer doesn't need signatures, it should be fine
// in future, we will move signatures to next block
//explorer.GetStorageInstance(consensus.leader.IP, consensus.leader.Port, true).Dump(block, beforeCatchupNum)
if consensus.consensusTimeout[timeoutBootstrap].IsActive() {
consensus.consensusTimeout[timeoutBootstrap].Stop()
consensus.getLogger().Info().Msg("[finalCommit] stop bootstrap timer only once")
}
consensus.getLogger().Info().
Uint64("blockNum", block.NumberU64()).
Uint64("epochNum", block.Epoch().Uint64()).
Uint64("ViewId", block.Header().ViewID().Uint64()).
Str("blockHash", block.Hash().String()).
5 years ago
Int("numTxns", len(block.Transactions())).
Int("numStakingTxns", len(block.StakingTransactions())).
Msg("HOORAY!!!!!!! CONSENSUS REACHED!!!!!!!")
consensus.UpdateLeaderMetrics(float64(numCommits), float64(block.NumberU64()))
// If still the leader, send commit sig/bitmap to finish the new block proposal,
// else, the block proposal will timeout by itself.
if consensus.IsLeader() {
if block.IsLastBlockInEpoch() {
// No pipelining
go func() {
consensus.getLogger().Info().Msg("[finalCommit] sending block proposal signal")
consensus.ReadySignal <- SyncProposal
}()
} else {
// pipelining
go func() {
select {
case consensus.CommitSigChannel <- commitSigAndBitmap:
case <-time.After(CommitSigSenderTimeout):
utils.Logger().Error().Err(err).Msg("[finalCommit] channel not received after 6s for commitSigAndBitmap")
}
}()
}
}
}
4 years ago
// BlockCommitSigs returns the byte array of aggregated
// commit signature and bitmap signed on the block
func (consensus *Consensus) BlockCommitSigs(blockNum uint64) ([]byte, error) {
if consensus.BlockNum() <= 1 {
return nil, nil
}
lastCommits, err := consensus.Blockchain().ReadCommitSig(blockNum)
[double-sign] Provide proof of double sign in slash record sent to beaconchain (#2253) * [double-sign] Commit changes in consensus needed for double-sign * [double-sign] Leader captures when valdator double signs, broadcasts to beaconchain * [slash] Add quick iteration tool for testing double-signing * [slash] Add webhook example * [slash] Add http server for hook to trigger double sign behavior * [double-sign] Use bin/trigger-double-sign to cause a double-sign * [double-sign] Full feedback loop working * [slash] Thread through the slash records in the block proposal step * [slash] Compute the slashing rate * [double-sign] Generalize yaml malicious for many keys * [double-sign][slash] Modify data structures, verify via webhook handler * [slash][double-sign] Find one address of bls public key signer, seemingly settle on data structures * [slash] Apply to state slashing for double signing * [slash][double-sign] Checkpoint for working code that slashes on beaconchain * [slash] Keep track of the total slash and total reporters reward * [slash] Dump account state before and after the slash * [slash] Satisfy Travis * [slash][state] Apply slash to the snapshot at beginning of epoch, now need to capture also the new delegates * [slash] Capture the unique new delegations since snapshot as well * [slash] Filter undelegation by epoch of double sign * [slash] Add TODO of correctness needed in slash needs on off-chain data * [rpc] Fix closure issue on shardID * [slash] Add delegator to double-sign testing script * [slash] Expand crt-validator.sh with commenting printfs and make delegation * [slash] Finish track payment of leftover slash debt after undelegation runs out * [slash] Now be explicit about error wrt delegatorSlashApply * [slash] Capture specific sanity check on slash paidoff * [slash] Track slash from undelegation piecemeal * [slash][delegation] Named slice types, .String() * [slash] Do no RLP encode twice, once is enough * [slash] Remove special case of validators own delegation * [slash] Refactor approach to slash state application * [slash] Begin expanding out Verify * [slash] Slash on snapshot delegations, not current * [slash] Fix Epoch Cmp * [slash] Third iteration on slash logic * [slash] Use full slash amount * [slash] More log, whitespace * [slash] Remove Println, add log * [slash] Remove debug Println * [slash] Add record in unit test * [slash] Build Validator snapshot, current. Fill out slash record * [slash] Need to get RLP dump of a header to use in test * [slash] Factor out double sign test constants * [slash] Factor out common for validator, stub out slash application, finish out deserialization setup * [slash] Factor out data structure creation because of var lexical scoping * [slash] Seem to have pipeline of unit test e2e executing * [slash] Add expected snitch, slash amounts * [slash] Checkpoint * [slash] Unit test correctly checks case of validator own stake which could drop below 1 ONE in slashing * [config] add double-sign testnet config (#1) Signed-off-by: Leo Chen <leo@harmony.one> * [slash] Commit for as is code & data of current dump.json * [slash] Order of state operation not correct in test, hence bad results, thank you dlv * [slash] Add snapshot state dump * [slash] Pay off slash of validator own delegation correctly * [slash] Pay off slash debt with special case for min-self * [slash] Pass first scenario conclusively * [slash] 2% slash passes unit test for own delegation and external * [slash] Parameterize unit test to easily test .02 vs .80 slash * [slash] Handle own delegation correctly at 80% slash * [slash] Have 80% slash working with external delegator * [slash] Remove debug code from slash * [slash] Adjust Apply signature, test again for 2% slash * [slash] Factor out scenario in testing so can test 2% and 80% at same time * [slash] Correct balance deduction on plan delegation * [slash] Mock out ChainReader for TestVerify * [slash] Small surface area interface, now feedback loop for verify * [slash] Remove development json * [slash] trigger-double-sign consumes yaml * [slash] Remove dead code * [slash][test] Factor ValidatorWrapper into scenario * [slash][test] Add example from local-testing dump - caution might be off * [slash] Factor out mutation of slashDebt * [slash][test] Factor out tests so can easily load test-case from bytes * [slash] Fix payment mistake in validator own delegation wrt min-self-delgation respected * [slash] Satisfy Travis * [slash] Begin cleanup of PR * [slash] Apply slash from header to Finalize via state processor * [slash] Productionize code, Println => logs; adjust slash picked in newblock * [slash] Need pointer for rlp.Decode * [slash] ValidatorInformation use full wrapper * Fix median stake * [staking] Adjust MarshalJSON for Validator, Wrapper * Refactor offchain data commit; Make block onchain/offchain commit atomic (#2279) * Refactor offchain data; Add epoch to ValidatorSnapshot * Make block onchain/offchain data commit atomically * [slash][committee] Set .Active to false on double sign, do not consider banned or inactive for committee assignment * [effective] VC eligible.go * [consensus] Redundant field in printf * [docker] import-ks for a dev account * [slash] Create BLS key for dockerfile and crt-validator.sh * [slash][docker] Easy deployment of double-sign testing * [docker] Have slash work as single docker command * [rpc] Fix median-stake RPC * [slash] Update webhook with default docker BLS key * [docker][slash] Fresh yaml copy for docker build, remove dev code in main.go * [slash] Remove helper binary, commented out code, change to local config * [params] Factor out test genesis value * Add shard checking to Tx-Pool & correct blacklist (#2301) * [core] Fix blacklist & add shardID check * [staking + node + cmd] Fix blacklist & add shardID check * [slash] Adjust to PR comments part 1 * [docker] Use different throw away funded account * [docker] Create easier testing for delegation with private keys * [docker] Update yaml * [slash] Remove special case for slashing validator own delegation wrt min-self-delegate * [docker] Install nano as well * [slash] Early error if banned * [quorum] Expose earning account in decider marshal json * Revert "Refactor offchain data commit; Make block onchain/offchain commit atomic (#2279)" This reverts commit 9ffbf682c075b49188923c65a0bbf39ac188be00. * [slash] Add non-sanity check way to update validator * [reward] Increase percision on percentage in schedule * [slash] Adjust logs * [committee] Check eligibility of validator before doing sanity check * [slash] Update docker * [slash] Move create validator script to test * [slash] More log * [param] Make things faster * [slash][off-chain] Clear out slashes from pending in writeblockwithstate * [cross-link] Log is not error, just info * [blockchain] Not necessary to guard DeletePendingSlashingCandidates * [slash][consensus] Use plain []byte for signature b/c bls.Sign has private impl fields, rlp does not encode that * [slash][test] Use faucet as sender, assume user imported * [slash] Test setup * [slash] reserve error for real error in logs * [slash][availability] Apply availability correct, bump signing count each block * [slash][staking] Consider banned field in sanity check, pay snitch only half of what was actually slashed * [slash] Pay as much as can * [slash] use right nowAmt * [slash] Take away from rewards as well * [slash] iterate faster * [slash] Remove dev based timing * [slash] Add more log, sanity check incoming slash records, only count external for slash rate * [availability][state] Adjust signature of ValidatorWrapper wrt state, filter out for staked validators, correct availaibility measure on running counters * [availability] More log * [slash] Simply pre slash erra slashing * [slash] Remove development code * [slash] Use height from recvMsg, todo on epoch * [staking] Not necessary to touch LastEpochInCommittee in staking_verifier * [slash] Undo ds in endpoint pattern config * [slash] Add TODO and log when delegation becomes 0 b/c slash debt payment * [slash] Abstract staked validators from shard.State into type, set slash rate based BLSKey count Co-authored-by: Leo Chen <leo@harmony.one> Co-authored-by: flicker-harmony <52401354+flicker-harmony@users.noreply.github.com> Co-authored-by: Rongjian Lan <rongjian@harmony.one> Co-authored-by: Daniel Van Der Maden <daniel@harmony.one>
5 years ago
if err != nil ||
len(lastCommits) < bls.BLSSignatureSizeInBytes {
msgs := consensus.FBFTLog.GetMessagesByTypeSeq(
msg_pb.MessageType_COMMITTED, blockNum,
)
if len(msgs) != 1 {
consensus.getLogger().Error().
Int("numCommittedMsg", len(msgs)).
Msg("GetLastCommitSig failed with wrong number of committed message")
return nil, errors.Errorf(
"GetLastCommitSig failed with wrong number of committed message %d", len(msgs),
)
}
lastCommits = msgs[0].Payload
}
return lastCommits, nil
}
// Start waits for the next new block and run consensus
func (consensus *Consensus) Start(
stopChan chan struct{},
) {
go func() {
consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Consensus started")
go func() {
ticker := time.NewTicker(250 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-stopChan:
return
case <-ticker.C:
consensus.mutex.Lock()
consensus.tick()
consensus.mutex.Unlock()
}
}
}()
consensus.mutex.Lock()
consensus.consensusTimeout[timeoutBootstrap].Start()
consensus.getLogger().Info().Msg("[ConsensusMainLoop] Start bootstrap timeout (only once)")
// Set up next block due time.
consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod)
consensus.mutex.Unlock()
}()
if consensus.dHelper != nil {
consensus.dHelper.start()
}
}
func (consensus *Consensus) StartChannel() {
2 years ago
consensus.mutex.Lock()
consensus.isInitialLeader = consensus.IsLeader()
if consensus.isInitialLeader {
consensus.start = true
consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Send ReadySignal")
2 years ago
consensus.mutex.Unlock()
consensus.ReadySignal <- SyncProposal
2 years ago
return
}
2 years ago
consensus.mutex.Unlock()
}
func (consensus *Consensus) syncReadyChan() {
consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan")
if consensus.BlockNum() < consensus.Blockchain().CurrentHeader().Number().Uint64()+1 {
consensus.SetBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1)
consensus.SetViewIDs(consensus.Blockchain().CurrentHeader().ViewID().Uint64() + 1)
mode := consensus.UpdateConsensusInformation()
consensus.current.SetMode(mode)
consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer")
consensus.consensusTimeout[timeoutConsensus].Start()
consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC")
consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc()
} else if consensus.Mode() == Syncing {
// Corner case where sync is triggered before `onCommitted` and there is a race
// for block insertion between consensus and downloader.
mode := consensus.UpdateConsensusInformation()
consensus.SetMode(mode)
consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer")
consensus.consensusTimeout[timeoutConsensus].Start()
consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc()
}
}
func (consensus *Consensus) syncNotReadyChan() {
consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncNotReadyChan")
consensus.SetBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1)
consensus.current.SetMode(Syncing)
consensus.getLogger().Info().Msg("[ConsensusMainLoop] Node is OUT OF SYNC")
consensusSyncCounterVec.With(prometheus.Labels{"consensus": "out_of_sync"}).Inc()
}
func (consensus *Consensus) tick() {
if !consensus.start && consensus.isInitialLeader {
return
}
for k, v := range consensus.consensusTimeout {
// stop timer in listening mode
if consensus.current.Mode() == Listening {
v.Stop()
continue
}
if consensus.current.Mode() == Syncing {
// never stop bootstrap timer here in syncing mode as it only starts once
// if it is stopped, bootstrap will be stopped and nodes
// can't start view change or join consensus
// the bootstrap timer will be stopped once consensus is reached or view change
// is succeeded
if k != timeoutBootstrap {
consensus.getLogger().Debug().
Str("k", k.String()).
Str("Mode", consensus.current.Mode().String()).
Msg("[ConsensusMainLoop] consensusTimeout stopped!!!")
v.Stop()
continue
}
}
if !v.CheckExpire() {
continue
}
if k != timeoutViewChange {
consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops Consensus Timeout!!!")
consensus.startViewChange()
break
} else {
consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops View Change Timeout!!!")
consensus.startViewChange()
break
}
}
}
// Close closes the consensus. If current is in normal commit phase, wait until the commit
// phase end.
func (consensus *Consensus) Close() error {
if consensus.dHelper != nil {
consensus.dHelper.close()
}
consensus.waitForCommit()
return nil
}
func (consensus *Consensus) BlockChannel(newBlock *types.Block) {
//consensus.ReshardingNextLeader(newBlock)consensus.getLogger().Info().
Uint64("MsgBlockNum", newBlock.NumberU64()).
Msg("[ConsensusMainLoop] Received Proposed New Block!")
if newBlock.NumberU64() < consensus.BlockNum() {
consensus.getLogger().Warn().Uint64("newBlockNum", newBlock.NumberU64()).
Msg("[ConsensusMainLoop] received old block, abort")
return
}
// Sleep to wait for the full block time
consensus.getLogger().Info().Msg("[ConsensusMainLoop] Waiting for Block Time")
time.AfterFunc(time.Until(consensus.NextBlockDue), func() {
consensus.StartFinalityCount()
// Update time due for next block
consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod)
startTime = time.Now()
consensus.msgSender.Reset(newBlock.NumberU64())
consensus.getLogger().Info().
Int("numTxs", len(newBlock.Transactions())).
Int("numStakingTxs", len(newBlock.StakingTransactions())).
Time("startTime", startTime).
Int64("publicKeys", consensus.Decider.ParticipantsCount()).
Msg("[ConsensusMainLoop] STARTING CONSENSUS")
consensus.announce(newBlock)
})
if consensus.dHelper != nil {
consensus.dHelper.start()
}
}
2 years ago
// Close closes the consensus. If current is in normal commit phase, wait until the commit
// phase end.
func (consensus *Consensus) Close() error {
if consensus.dHelper != nil {
consensus.dHelper.close()
}
consensus.waitForCommit()
return nil
}
// waitForCommit wait extra 2 seconds for commit phase to finish
func (consensus *Consensus) waitForCommit() {
if consensus.Mode() != Normal || consensus.phase.Get() != FBFTCommit {
return
}
// We only need to wait consensus is in normal commit phase
utils.Logger().Warn().Str("phase", consensus.phase.String()).Msg("[shutdown] commit phase has to wait")
maxWait := time.Now().Add(2 * consensus.BlockPeriod)
for time.Now().Before(maxWait) && consensus.GetConsensusPhase() == "Commit" {
utils.Logger().Warn().Msg("[shutdown] wait for consensus finished")
time.Sleep(time.Millisecond * 100)
}
}
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
// LastMileBlockIter is the iterator to iterate over the last mile blocks in consensus cache.
// All blocks returned are guaranteed to pass the verification.
type LastMileBlockIter struct {
blockCandidates []*types.Block
fbftLog *FBFTLog
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
verify func(*types.Block) error
curIndex int
logger *zerolog.Logger
}
// GetLastMileBlockIter get the iterator of the last mile blocks starting from number bnStart
func (consensus *Consensus) GetLastMileBlockIter(bnStart uint64) (*LastMileBlockIter, error) {
consensus.mutex.Lock()
defer consensus.mutex.Unlock()
if consensus.BlockVerifier == nil {
return nil, errors.New("consensus haven't initialized yet")
}
blocks, _, err := consensus.getLastMileBlocksAndMsg(bnStart)
if err != nil {
return nil, err
}
return &LastMileBlockIter{
blockCandidates: blocks,
fbftLog: consensus.FBFTLog,
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
verify: consensus.BlockVerifier,
curIndex: 0,
logger: consensus.getLogger(),
}, nil
}
// Next iterate to the next last mile block
func (iter *LastMileBlockIter) Next() *types.Block {
if iter.curIndex >= len(iter.blockCandidates) {
return nil
}
block := iter.blockCandidates[iter.curIndex]
iter.curIndex++
if !iter.fbftLog.IsBlockVerified(block.Hash()) {
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
if err := iter.verify(block); err != nil {
iter.logger.Debug().Err(err).Msg("block verification failed in consensus last mile block")
return nil
}
iter.fbftLog.MarkBlockVerified(block)
}
return block
}
func (consensus *Consensus) getLastMileBlocksAndMsg(bnStart uint64) ([]*types.Block, []*FBFTMessage, error) {
var (
blocks []*types.Block
msgs []*FBFTMessage
)
for blockNum := bnStart; ; blockNum++ {
blk, msg, err := consensus.FBFTLog.GetCommittedBlockAndMsgsFromNumber(blockNum, consensus.getLogger())
if err != nil {
if err == errFBFTLogNotFound {
break
}
return nil, nil, err
}
blocks = append(blocks, blk)
msgs = append(msgs, msg)
}
return blocks, msgs, nil
}
// preCommitAndPropose commit the current block with 67% commit signatures and start
// proposing new block which will wait on the full commit signatures to finish
func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error {
2 years ago
//fmt.Println("preCommitAndPropose", utils.GetPort(), blk.NumberU64())
if blk == nil {
return errors.New("block to pre-commit is nil")
}
leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey()
if err != nil {
consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] leader not found")
return err
}
// Construct committed message
network, err := consensus.construct(msg_pb.MessageType_COMMITTED, nil, []*bls.PrivateKeyWrapper{leaderPriKey})
if err != nil {
consensus.getLogger().Warn().Err(err).
Msg("[preCommitAndPropose] Unable to construct Committed message")
return err
}
4 years ago
msgToSend, FBFTMsg :=
network.Bytes,
network.FBFTMsg
bareMinimumCommit := FBFTMsg.Payload
consensus.FBFTLog.AddVerifiedMessage(FBFTMsg)
if err := consensus.verifyLastCommitSig(bareMinimumCommit, blk); err != nil {
return errors.Wrap(err, "[preCommitAndPropose] failed verifying last commit sig")
}
go func() {
blk.SetCurrentCommitSig(bareMinimumCommit)
// Send committed message to validators since 2/3 commit is already collected
if err := consensus.msgSender.SendWithRetry(
blk.NumberU64(),
msg_pb.MessageType_COMMITTED, []nodeconfig.GroupID{
nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)),
},
p2p.ConstructMessage(msgToSend)); err != nil {
consensus.getLogger().Warn().Err(err).Msg("[preCommitAndPropose] Cannot send committed message")
} else {
consensus.getLogger().Info().
Str("blockHash", blk.Hash().Hex()).
Uint64("blockNum", consensus.BlockNum()).
Hex("lastCommitSig", bareMinimumCommit).
Msg("[preCommitAndPropose] Sent Committed Message")
}
if _, err := consensus.Blockchain().InsertChain([]*types.Block{blk}, !consensus.FBFTLog.IsBlockVerified(blk.Hash())); err != nil {
consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] Failed to add block to chain")
return
}
4 years ago
consensus.getLogger().Info().Msg("[preCommitAndPropose] Start consensus timer")
consensus.consensusTimeout[timeoutConsensus].Start()
// Send signal to Node to propose the new block for consensus
consensus.getLogger().Info().Msg("[preCommitAndPropose] sending block proposal signal")
consensus.ReadySignal <- AsyncProposal
}()
return nil
}
func (consensus *Consensus) verifyLastCommitSig(lastCommitSig []byte, blk *types.Block) error {
if len(lastCommitSig) < bls.BLSSignatureSizeInBytes {
return errors.New("lastCommitSig not have enough length")
}
aggSigBytes := lastCommitSig[0:bls.BLSSignatureSizeInBytes]
aggSig := bls2.Sign{}
err := aggSig.Deserialize(aggSigBytes)
if err != nil {
return errors.New("unable to deserialize multi-signature from payload")
}
aggPubKey := consensus.commitBitmap.AggregatePublic
commitPayload := signature.ConstructCommitPayload(consensus.Blockchain(),
blk.Epoch(), blk.Hash(), blk.NumberU64(), blk.Header().ViewID().Uint64())
if !aggSig.VerifyHash(aggPubKey, commitPayload) {
return errors.New("Failed to verify the multi signature for last commit sig")
}
return nil
}
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
// tryCatchup add the last mile block in PBFT log memory cache to blockchain.
func (consensus *Consensus) tryCatchup() error {
// TODO: change this to a more systematic symbol
if consensus.BlockVerifier == nil {
return errors.New("consensus haven't finished initialization")
}
initBN := consensus.BlockNum()
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
defer consensus.postCatchup(initBN)
blks, msgs, err := consensus.getLastMileBlocksAndMsg(initBN)
if err != nil {
return errors.Wrapf(err, "[TryCatchup] Failed to get last mile blocks: %v", err)
}
for i := range blks {
blk, msg := blks[i], msgs[i]
if blk == nil {
return nil
}
blk.SetCurrentCommitSig(msg.Payload)
if err := consensus.VerifyBlock(blk); err != nil {
consensus.getLogger().Err(err).Msg("[TryCatchup] failed block verifier")
return err
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
}
consensus.getLogger().Info().Msg("[TryCatchup] Adding block to chain")
if err := consensus.commitBlock(blk, msgs[i]); err != nil {
consensus.getLogger().Error().Err(err).Msg("[TryCatchup] Failed to add block to chain")
return err
}
select {
// TODO: Remove this when removing dns sync and stream sync is fully up
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
case consensus.VerifiedNewBlock <- blk:
default:
consensus.getLogger().Info().
Str("blockHash", blk.Hash().String()).
Msg("[TryCatchup] consensus verified block send to chan failed")
continue
}
}
return nil
}
func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMessage) error {
2 years ago
// this function evaluates for all, leader and validators.
if consensus.Blockchain().CurrentBlock().NumberU64() < blk.NumberU64() {
if _, err := consensus.Blockchain().InsertChain([]*types.Block{blk}, !consensus.FBFTLog.IsBlockVerified(blk.Hash())); err != nil {
consensus.getLogger().Error().Err(err).Msg("[commitBlock] Failed to add block to chain")
return err
}
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
}
if !committedMsg.HasSingleSender() {
4 years ago
consensus.getLogger().Error().Msg("[TryCatchup] Leader message can not have multiple sender keys")
4 years ago
return errIncorrectSender
4 years ago
}
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
consensus.FinishFinalityCount()
consensus.PostConsensusJob(blk)
consensus.SetupForNewConsensus(blk, committedMsg)
utils.Logger().Info().Uint64("blockNum", blk.NumberU64()).
Str("hash", blk.Header().Hash().Hex()).
Msg("Added New Block to Blockchain!!!")
return nil
}
// rotateLeader rotates the leader to the next leader in the committee.
// This function must be called with enabled leader rotation.
2 years ago
func (consensus *Consensus) rotateLeader(epoch *big.Int) {
prev := consensus.GetLeaderPubKey()
bc := consensus.Blockchain()
curNumber := bc.CurrentHeader().Number().Uint64()
utils.Logger().Info().Msgf("[Rotating leader] epoch: %v rotation:%v numblocks:%d", epoch.Uint64(), bc.Config().IsLeaderRotation(epoch), bc.Config().LeaderRotationBlocksCount)
leader := consensus.GetLeaderPubKey()
for i := 0; i < bc.Config().LeaderRotationBlocksCount; i++ {
header := bc.GetHeaderByNumber(curNumber - uint64(i))
if header == nil {
return
}
// Previous epoch, we should not change leader.
if header.Epoch().Uint64() != epoch.Uint64() {
return
2 years ago
}
// Check if the same leader.
pub, err := bc.GetLeaderPubKeyFromCoinbase(header)
if err != nil {
utils.Logger().Error().Err(err).Msg("Failed to get leader public key from coinbase")
return
}
if !pub.Object.IsEqual(leader.Object) {
// Another leader.
return
}
2 years ago
}
// Passed all checks, we can change leader.
wasFound, next := consensus.Decider.NthNextHmy(shard.Schedule.InstanceForEpoch(epoch), leader, 1)
if !wasFound {
utils.Logger().Error().Msg("Failed to get next leader")
return
} else {
consensus.SetLeaderPubKey(next)
}
if consensus.IsLeader() && !consensus.GetLeaderPubKey().Object.IsEqual(prev.Object) {
// leader changed
go func() {
consensus.ReadySignal <- SyncProposal
}()
}
2 years ago
}
// SetupForNewConsensus sets the state for new consensus
func (consensus *Consensus) SetupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) {
atomic.StoreUint64(&consensus.blockNum, blk.NumberU64()+1)
consensus.SetCurBlockViewID(committedMsg.ViewID + 1)
consensus.LeaderPubKey = committedMsg.SenderPubkeys[0]
var epoch *big.Int
if blk.IsLastBlockInEpoch() {
epoch = new(big.Int).Add(blk.Epoch(), common.Big1)
} else {
epoch = blk.Epoch()
}
if consensus.Blockchain.Config().IsLeaderRotation(epoch) {
consensus.rotateLeader(epoch)
2 years ago
}
// Update consensus keys at last so the change of leader status doesn't mess up normal flow
if blk.IsLastBlockInEpoch() {
consensus.SetMode(consensus.UpdateConsensusInformation())
}
consensus.FBFTLog.PruneCacheBeforeBlock(blk.NumberU64())
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
consensus.ResetState()
}
func (consensus *Consensus) getEpochFirstBlockViewID(epoch *big.Int) (uint64, error) {
if epoch.Uint64() == 0 {
return 0, nil
}
epochBlock := consensus.Blockchain.GetBlockByNumber(epoch.Uint64() - 1)
if epochBlock == nil {
return 0, errors.Errorf("block not found for number %d", epoch.Uint64()-1)
}
return epochBlock.Header().ViewID().Uint64() + 1, nil
}
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
func (consensus *Consensus) postCatchup(initBN uint64) {
if initBN < consensus.BlockNum() {
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
consensus.getLogger().Info().
Uint64("From", initBN).
Uint64("To", consensus.BlockNum()).
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
Msg("[TryCatchup] Caught up!")
consensus.switchPhase("TryCatchup", FBFTAnnounce)
}
// catch up and skip from view change trap
if initBN < consensus.BlockNum() && consensus.isViewChangingMode() {
[consensus][sync] Better coordination between state sync and consensus module. (#3352) * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [core] switch back the batch write condition in InsertReceiptChain * [rawdb] add error handling to all rawdb write. Add fdlimit module. Fix the node stuck * [consensus] refactored and optimized tryCatchup logic * [sync] added consensus last mile block in sync. * [consensus] remove time wait for consensus inform sync. Make block low chan a buffered chan * [consensus] fix rebase errors, and optimize one line code * [consensus][sync] fix golint error and added prune logic in sync * [consensus] move header verify after adding FBFT log in onPrepared * [consensus] more change on block verification logic * [consensus] fix the verified panic issue * [consensus][sync] add block verification in consensus last mile, change it to iterator * [consensus] fix two nil pointer references when running local node (Still cannot find the root cause for it) * remove coverage.txt and add to gitignore * [consensus] add leader key check. Move quorum check logic after tryCatchup and can spin state sync * [consensus] remove the leader sender check for now. Will add later * [consensus] refactor fbftlog to get rid of unsafe mapset module. Replace with map * [consensus] move the isQuorumAchived logic back. We surely need to check it before add to FBFTlog * [consensus] remove the redundant block nil check * [test] fix the consensus test * [consensus] rebase main and fix stuff. Removed isSendByLeader * [consensus] added logic to spin up sync when received message is greater than consensus block number * [consensus] more changes in consensus. Remove some spin sync logic. * fix error in main * [consensus] change the hash algorithm of the FBFTLog to get rid of rlp error * [consensus] use seperate mutex in FBFT message * [consensus] change fbft log id to a shorter form. Added unit test case
4 years ago
consensus.current.SetMode(Normal)
consensus.consensusTimeout[timeoutViewChange].Stop()
}
}
// GenerateVrfAndProof generates new VRF/Proof from hash of previous block
func (consensus *Consensus) GenerateVrfAndProof(newHeader *block.Header) error {
key, err := consensus.GetConsensusLeaderPrivateKey()
if err != nil {
return errors.New("[GenerateVrfAndProof] no leader private key provided")
}
sk := vrf_bls.NewVRFSigner(key.Pri)
previousHeader := consensus.Blockchain().GetHeaderByNumber(
newHeader.Number().Uint64() - 1,
[double-sign] Provide proof of double sign in slash record sent to beaconchain (#2253) * [double-sign] Commit changes in consensus needed for double-sign * [double-sign] Leader captures when valdator double signs, broadcasts to beaconchain * [slash] Add quick iteration tool for testing double-signing * [slash] Add webhook example * [slash] Add http server for hook to trigger double sign behavior * [double-sign] Use bin/trigger-double-sign to cause a double-sign * [double-sign] Full feedback loop working * [slash] Thread through the slash records in the block proposal step * [slash] Compute the slashing rate * [double-sign] Generalize yaml malicious for many keys * [double-sign][slash] Modify data structures, verify via webhook handler * [slash][double-sign] Find one address of bls public key signer, seemingly settle on data structures * [slash] Apply to state slashing for double signing * [slash][double-sign] Checkpoint for working code that slashes on beaconchain * [slash] Keep track of the total slash and total reporters reward * [slash] Dump account state before and after the slash * [slash] Satisfy Travis * [slash][state] Apply slash to the snapshot at beginning of epoch, now need to capture also the new delegates * [slash] Capture the unique new delegations since snapshot as well * [slash] Filter undelegation by epoch of double sign * [slash] Add TODO of correctness needed in slash needs on off-chain data * [rpc] Fix closure issue on shardID * [slash] Add delegator to double-sign testing script * [slash] Expand crt-validator.sh with commenting printfs and make delegation * [slash] Finish track payment of leftover slash debt after undelegation runs out * [slash] Now be explicit about error wrt delegatorSlashApply * [slash] Capture specific sanity check on slash paidoff * [slash] Track slash from undelegation piecemeal * [slash][delegation] Named slice types, .String() * [slash] Do no RLP encode twice, once is enough * [slash] Remove special case of validators own delegation * [slash] Refactor approach to slash state application * [slash] Begin expanding out Verify * [slash] Slash on snapshot delegations, not current * [slash] Fix Epoch Cmp * [slash] Third iteration on slash logic * [slash] Use full slash amount * [slash] More log, whitespace * [slash] Remove Println, add log * [slash] Remove debug Println * [slash] Add record in unit test * [slash] Build Validator snapshot, current. Fill out slash record * [slash] Need to get RLP dump of a header to use in test * [slash] Factor out double sign test constants * [slash] Factor out common for validator, stub out slash application, finish out deserialization setup * [slash] Factor out data structure creation because of var lexical scoping * [slash] Seem to have pipeline of unit test e2e executing * [slash] Add expected snitch, slash amounts * [slash] Checkpoint * [slash] Unit test correctly checks case of validator own stake which could drop below 1 ONE in slashing * [config] add double-sign testnet config (#1) Signed-off-by: Leo Chen <leo@harmony.one> * [slash] Commit for as is code & data of current dump.json * [slash] Order of state operation not correct in test, hence bad results, thank you dlv * [slash] Add snapshot state dump * [slash] Pay off slash of validator own delegation correctly * [slash] Pay off slash debt with special case for min-self * [slash] Pass first scenario conclusively * [slash] 2% slash passes unit test for own delegation and external * [slash] Parameterize unit test to easily test .02 vs .80 slash * [slash] Handle own delegation correctly at 80% slash * [slash] Have 80% slash working with external delegator * [slash] Remove debug code from slash * [slash] Adjust Apply signature, test again for 2% slash * [slash] Factor out scenario in testing so can test 2% and 80% at same time * [slash] Correct balance deduction on plan delegation * [slash] Mock out ChainReader for TestVerify * [slash] Small surface area interface, now feedback loop for verify * [slash] Remove development json * [slash] trigger-double-sign consumes yaml * [slash] Remove dead code * [slash][test] Factor ValidatorWrapper into scenario * [slash][test] Add example from local-testing dump - caution might be off * [slash] Factor out mutation of slashDebt * [slash][test] Factor out tests so can easily load test-case from bytes * [slash] Fix payment mistake in validator own delegation wrt min-self-delgation respected * [slash] Satisfy Travis * [slash] Begin cleanup of PR * [slash] Apply slash from header to Finalize via state processor * [slash] Productionize code, Println => logs; adjust slash picked in newblock * [slash] Need pointer for rlp.Decode * [slash] ValidatorInformation use full wrapper * Fix median stake * [staking] Adjust MarshalJSON for Validator, Wrapper * Refactor offchain data commit; Make block onchain/offchain commit atomic (#2279) * Refactor offchain data; Add epoch to ValidatorSnapshot * Make block onchain/offchain data commit atomically * [slash][committee] Set .Active to false on double sign, do not consider banned or inactive for committee assignment * [effective] VC eligible.go * [consensus] Redundant field in printf * [docker] import-ks for a dev account * [slash] Create BLS key for dockerfile and crt-validator.sh * [slash][docker] Easy deployment of double-sign testing * [docker] Have slash work as single docker command * [rpc] Fix median-stake RPC * [slash] Update webhook with default docker BLS key * [docker][slash] Fresh yaml copy for docker build, remove dev code in main.go * [slash] Remove helper binary, commented out code, change to local config * [params] Factor out test genesis value * Add shard checking to Tx-Pool & correct blacklist (#2301) * [core] Fix blacklist & add shardID check * [staking + node + cmd] Fix blacklist & add shardID check * [slash] Adjust to PR comments part 1 * [docker] Use different throw away funded account * [docker] Create easier testing for delegation with private keys * [docker] Update yaml * [slash] Remove special case for slashing validator own delegation wrt min-self-delegate * [docker] Install nano as well * [slash] Early error if banned * [quorum] Expose earning account in decider marshal json * Revert "Refactor offchain data commit; Make block onchain/offchain commit atomic (#2279)" This reverts commit 9ffbf682c075b49188923c65a0bbf39ac188be00. * [slash] Add non-sanity check way to update validator * [reward] Increase percision on percentage in schedule * [slash] Adjust logs * [committee] Check eligibility of validator before doing sanity check * [slash] Update docker * [slash] Move create validator script to test * [slash] More log * [param] Make things faster * [slash][off-chain] Clear out slashes from pending in writeblockwithstate * [cross-link] Log is not error, just info * [blockchain] Not necessary to guard DeletePendingSlashingCandidates * [slash][consensus] Use plain []byte for signature b/c bls.Sign has private impl fields, rlp does not encode that * [slash][test] Use faucet as sender, assume user imported * [slash] Test setup * [slash] reserve error for real error in logs * [slash][availability] Apply availability correct, bump signing count each block * [slash][staking] Consider banned field in sanity check, pay snitch only half of what was actually slashed * [slash] Pay as much as can * [slash] use right nowAmt * [slash] Take away from rewards as well * [slash] iterate faster * [slash] Remove dev based timing * [slash] Add more log, sanity check incoming slash records, only count external for slash rate * [availability][state] Adjust signature of ValidatorWrapper wrt state, filter out for staked validators, correct availaibility measure on running counters * [availability] More log * [slash] Simply pre slash erra slashing * [slash] Remove development code * [slash] Use height from recvMsg, todo on epoch * [staking] Not necessary to touch LastEpochInCommittee in staking_verifier * [slash] Undo ds in endpoint pattern config * [slash] Add TODO and log when delegation becomes 0 b/c slash debt payment * [slash] Abstract staked validators from shard.State into type, set slash rate based BLSKey count Co-authored-by: Leo Chen <leo@harmony.one> Co-authored-by: flicker-harmony <52401354+flicker-harmony@users.noreply.github.com> Co-authored-by: Rongjian Lan <rongjian@harmony.one> Co-authored-by: Daniel Van Der Maden <daniel@harmony.one>
5 years ago
)
if previousHeader == nil {
return errors.New("[GenerateVrfAndProof] no parent header found")
}
previousHash := previousHeader.Hash()
vrf, proof := sk.Evaluate(previousHash[:])
if proof == nil {
return errors.New("[GenerateVrfAndProof] failed to generate vrf")
}
newHeader.SetVrf(append(vrf[:], proof...))
consensus.getLogger().Info().
Uint64("BlockNum", newHeader.Number().Uint64()).
Uint64("Epoch", newHeader.Epoch().Uint64()).
Hex("VRF+Proof", newHeader.Vrf()).
Msg("[GenerateVrfAndProof] Leader generated a VRF")
return nil
}
// GenerateVdfAndProof generates new VDF/Proof from VRFs in the current epoch
func (consensus *Consensus) GenerateVdfAndProof(newBlock *types.Block, vrfBlockNumbers []uint64) {
//derive VDF seed from VRFs generated in the current epoch
seed := [32]byte{}
for i := 0; i < consensus.VdfSeedSize(); i++ {
previousVrf := consensus.Blockchain().GetVrfByNumber(vrfBlockNumbers[i])
for j := 0; j < len(seed); j++ {
seed[j] = seed[j] ^ previousVrf[j]
}
}
consensus.getLogger().Info().
Uint64("MsgBlockNum", newBlock.NumberU64()).
Uint64("Epoch", newBlock.Header().Epoch().Uint64()).
Int("Num of VRF", len(vrfBlockNumbers)).
Msg("[ConsensusMainLoop] VDF computation started")
// TODO ek – limit concurrency
go func() {
vdf := vdf_go.New(shard.Schedule.VdfDifficulty(), seed)
outputChannel := vdf.GetOutputChannel()
start := time.Now()
vdf.Execute()
duration := time.Since(start)
consensus.getLogger().Info().
Dur("duration", duration).
Msg("[ConsensusMainLoop] VDF computation finished")
output := <-outputChannel
// The first 516 bytes are the VDF+proof and the last 32 bytes are XORed VRF as seed
rndBytes := [548]byte{}
copy(rndBytes[:516], output[:])
copy(rndBytes[516:], seed[:])
consensus.RndChannel <- rndBytes
}()
}
// ValidateVdfAndProof validates the VDF/proof in the current epoch
func (consensus *Consensus) ValidateVdfAndProof(headerObj *block.Header) bool {
vrfBlockNumbers, err := consensus.Blockchain().ReadEpochVrfBlockNums(headerObj.Epoch())
if err != nil {
consensus.getLogger().Error().Err(err).
Str("MsgBlockNum", headerObj.Number().String()).
Msg("[OnAnnounce] failed to read VRF block numbers for VDF computation")
}
//extra check to make sure there's no index out of range error
//it can happen if epoch is messed up, i.e. VDF ouput is generated in the next epoch
if consensus.VdfSeedSize() > len(vrfBlockNumbers) {
return false
}
seed := [32]byte{}
for i := 0; i < consensus.VdfSeedSize(); i++ {
previousVrf := consensus.Blockchain().GetVrfByNumber(vrfBlockNumbers[i])
for j := 0; j < len(seed); j++ {
seed[j] = seed[j] ^ previousVrf[j]
}
}
vdfObject := vdf_go.New(shard.Schedule.VdfDifficulty(), seed)
vdfOutput := [516]byte{}
copy(vdfOutput[:], headerObj.Vdf())
if vdfObject.Verify(vdfOutput) {
consensus.getLogger().Info().
Str("MsgBlockNum", headerObj.Number().String()).
Int("Num of VRF", consensus.VdfSeedSize()).
Msg("[OnAnnounce] validated a new VDF")
} else {
consensus.getLogger().Warn().
Str("MsgBlockNum", headerObj.Number().String()).
Uint64("Epoch", headerObj.Epoch().Uint64()).
Int("Num of VRF", consensus.VdfSeedSize()).
Msg("[OnAnnounce] VDF proof is not valid")
return false
}
return true
}