|
|
|
package node
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"math"
|
|
|
|
"math/big"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
"strconv"
|
|
|
|
"syscall"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
|
|
"github.com/ethereum/go-ethereum/rlp"
|
|
|
|
"github.com/harmony-one/bls/ffi/go/bls"
|
|
|
|
proto_node "github.com/harmony-one/harmony/api/proto/node"
|
|
|
|
"github.com/harmony-one/harmony/core/types"
|
|
|
|
nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
|
|
|
|
"github.com/harmony-one/harmony/internal/ctxerror"
|
|
|
|
"github.com/harmony-one/harmony/internal/utils"
|
|
|
|
"github.com/harmony-one/harmony/p2p/host"
|
|
|
|
"github.com/harmony-one/harmony/shard"
|
|
|
|
"github.com/harmony-one/harmony/shard/committee"
|
|
|
|
)
|
|
|
|
|
|
|
|
// validateNewShardState validate whether the new shard state root matches
|
|
|
|
func (node *Node) validateNewShardState(block *types.Block) error {
|
|
|
|
// Common case first – blocks without resharding proposal
|
|
|
|
header := block.Header()
|
|
|
|
if header.ShardStateHash() == (common.Hash{}) {
|
|
|
|
// No new shard state was proposed
|
|
|
|
if block.ShardID() == shard.BeaconChainShardID {
|
|
|
|
if shard.Schedule.IsLastBlock(block.Number().Uint64()) {
|
|
|
|
// TODO ek - invoke view change
|
|
|
|
return errors.New("beacon leader did not propose resharding")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if node.nextShardState.master != nil &&
|
|
|
|
!time.Now().Before(node.nextShardState.proposeTime) {
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
return errors.New("regular leader did not propose resharding")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// We aren't expecting to reshard, so proceed to sign
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
shardState := &shard.State{}
|
|
|
|
err := rlp.DecodeBytes(header.ShardState(), shardState)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
proposed := *shardState
|
|
|
|
if block.ShardID() == shard.BeaconChainShardID {
|
|
|
|
// Beacon validators independently recalculate the master state and
|
|
|
|
// compare it against the proposed copy.
|
|
|
|
// TODO ek – this may be called from regular shards,
|
|
|
|
// for vetting beacon chain blocks received during block syncing.
|
|
|
|
// DRand may or or may not get in the way. Test this out.
|
|
|
|
expected, err := committee.WithStakingEnabled.ReadFromDB(
|
|
|
|
new(big.Int).Sub(block.Header().Epoch(), common.Big1),
|
|
|
|
node.Beaconchain(),
|
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
utils.Logger().Error().Err(err).Msg("cannot calculate expected shard state")
|
|
|
|
return ctxerror.New("cannot calculate expected shard state").
|
|
|
|
WithCause(err)
|
|
|
|
}
|
|
|
|
if shard.CompareShardState(expected, proposed) != 0 {
|
|
|
|
// TODO ek – log state proposal differences
|
|
|
|
// TODO ek – this error should trigger view change
|
|
|
|
err := errors.New("shard state proposal is different from expected")
|
|
|
|
// TODO ek/chao – calculated shard state is different even with the
|
|
|
|
// same input, i.e. it is nondeterministic.
|
|
|
|
// Don't treat this as a blocker until we fix the nondeterminism.
|
|
|
|
utils.Logger().Warn().Err(err).Msg("shard state proposal is different from expected")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Regular validators fetch the local-shard copy on the beacon chain
|
|
|
|
// and compare it against the proposed copy.
|
|
|
|
//
|
|
|
|
// We trust the master proposal in our copy of beacon chain.
|
|
|
|
// The sanity check for the master proposal is done earlier,
|
|
|
|
// when the beacon block containing the master proposal is received
|
|
|
|
// and before it is admitted into the local beacon chain.
|
|
|
|
//
|
|
|
|
// TODO ek – fetch masterProposal from beaconchain instead
|
|
|
|
masterProposal := node.nextShardState.master.ShardState
|
|
|
|
expected := masterProposal.FindCommitteeByID(block.ShardID())
|
|
|
|
switch len(proposed) {
|
|
|
|
case 0:
|
|
|
|
// Proposal to discontinue shard
|
|
|
|
if expected != nil {
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
utils.Logger().Error().Msg("leader proposed to disband against beacon decision")
|
|
|
|
return errors.New(
|
|
|
|
"leader proposed to disband against beacon decision")
|
|
|
|
}
|
|
|
|
case 1:
|
|
|
|
// Proposal to continue shard
|
|
|
|
proposed := proposed[0]
|
|
|
|
// Sanity check: Shard ID should match
|
|
|
|
if proposed.ShardID != block.ShardID() {
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
utils.Logger().Error().
|
|
|
|
Uint32("proposedShard", proposed.ShardID).
|
|
|
|
Uint32("blockShard", block.ShardID()).
|
|
|
|
Msg("proposal has incorrect shard ID")
|
|
|
|
return ctxerror.New("proposal has incorrect shard ID",
|
|
|
|
"proposedShard", proposed.ShardID,
|
|
|
|
"blockShard", block.ShardID())
|
|
|
|
}
|
|
|
|
// Did beaconchain say we are no more?
|
|
|
|
if expected == nil {
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
|
|
|
|
utils.Logger().Error().Msg("leader proposed to continue against beacon decision")
|
|
|
|
return errors.New(
|
|
|
|
"leader proposed to continue against beacon decision")
|
|
|
|
}
|
|
|
|
// Did beaconchain say the same proposal?
|
|
|
|
if shard.CompareCommittee(expected, &proposed) != 0 {
|
|
|
|
// TODO ek – log differences
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
utils.Logger().Error().Msg("proposal differs from one in beacon chain")
|
|
|
|
return errors.New("proposal differs from one in beacon chain")
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
// TODO ek – invoke view change
|
|
|
|
utils.Logger().Error().
|
|
|
|
Int("numShards", len(proposed)).
|
|
|
|
Msg("regular resharding proposal has incorrect number of shards")
|
|
|
|
return ctxerror.New(
|
|
|
|
"regular resharding proposal has incorrect number of shards",
|
|
|
|
"numShards", len(proposed))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (node *Node) broadcastEpochShardState(newBlock *types.Block) error {
|
|
|
|
shardState, err := newBlock.Header().GetShardState()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
epochShardStateMessage := proto_node.ConstructEpochShardStateMessage(
|
|
|
|
shard.EpochShardState{
|
|
|
|
Epoch: newBlock.Header().Epoch().Uint64() + 1,
|
|
|
|
ShardState: shardState,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return node.host.SendMessageToGroups(
|
|
|
|
[]nodeconfig.GroupID{node.NodeConfig.GetClientGroupID()},
|
|
|
|
host.ConstructP2pMessage(byte(0), epochShardStateMessage))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (node *Node) epochShardStateMessageHandler(msgPayload []byte) error {
|
|
|
|
epochShardState, err := proto_node.DeserializeEpochShardStateFromMessage(msgPayload)
|
|
|
|
if err != nil {
|
|
|
|
utils.Logger().Error().Err(err).Msg("Can't get shard state message")
|
|
|
|
return ctxerror.New("Can't get shard state message").WithCause(err)
|
|
|
|
}
|
|
|
|
if node.Consensus == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
receivedEpoch := big.NewInt(int64(epochShardState.Epoch))
|
|
|
|
utils.Logger().Info().
|
|
|
|
Int64("epoch", receivedEpoch.Int64()).
|
|
|
|
Msg("received new shard state")
|
|
|
|
|
|
|
|
node.nextShardState.master = epochShardState
|
|
|
|
if node.Consensus.IsLeader() {
|
|
|
|
// Wait a bit to allow the master table to reach other validators.
|
|
|
|
node.nextShardState.proposeTime = time.Now().Add(5 * time.Second)
|
|
|
|
} else {
|
|
|
|
// Wait a bit to allow the master table to reach the leader,
|
|
|
|
// and to allow the leader to propose next shard state based upon it.
|
|
|
|
node.nextShardState.proposeTime = time.Now().Add(15 * time.Second)
|
|
|
|
}
|
|
|
|
// TODO ek – this should be done from replaying beaconchain once
|
|
|
|
// beaconchain sync is fixed
|
|
|
|
err = node.Beaconchain().WriteShardState(
|
|
|
|
receivedEpoch, epochShardState.ShardState)
|
|
|
|
if err != nil {
|
|
|
|
utils.Logger().Error().
|
|
|
|
Uint64("epoch", receivedEpoch.Uint64()).
|
|
|
|
Err(err).Msg("cannot store shard state")
|
|
|
|
return ctxerror.New("cannot store shard state", "epoch", receivedEpoch).
|
|
|
|
WithCause(err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
func (node *Node) transitionIntoNextEpoch(shardState types.State) {
|
|
|
|
logger = logger.New(
|
|
|
|
"blsPubKey", hex.EncodeToString(node.Consensus.PubKey.Serialize()),
|
|
|
|
"curShard", node.Blockchain().ShardID(),
|
|
|
|
"curLeader", node.Consensus.IsLeader())
|
|
|
|
for _, c := range shardState {
|
|
|
|
utils.Logger().Debug().
|
|
|
|
Uint32("shardID", c.ShardID).
|
|
|
|
Str("nodeList", c.Slots).
|
|
|
|
Msg("new shard information")
|
|
|
|
}
|
|
|
|
myShardID, isNextLeader := findRoleInShardState(
|
|
|
|
node.Consensus.PubKey, shardState)
|
|
|
|
logger = logger.New(
|
|
|
|
"nextShard", myShardID,
|
|
|
|
"nextLeader", isNextLeader)
|
|
|
|
|
|
|
|
if myShardID == math.MaxUint32 {
|
|
|
|
getLogger().Info("Somehow I got kicked out. Exiting")
|
|
|
|
os.Exit(8) // 8 represents it's a loop and the program restart itself
|
|
|
|
}
|
|
|
|
|
|
|
|
myShardState := shardState[myShardID]
|
|
|
|
|
|
|
|
// Update public keys
|
|
|
|
var publicKeys []*bls.PublicKey
|
|
|
|
for idx, nodeID := range myShardState.Slots {
|
|
|
|
key := &bls.PublicKey{}
|
|
|
|
err := key.Deserialize(nodeID.BlsPublicKey[:])
|
|
|
|
if err != nil {
|
|
|
|
getLogger().Error("Failed to deserialize BLS public key in shard state",
|
|
|
|
"idx", idx,
|
|
|
|
"error", err)
|
|
|
|
}
|
|
|
|
publicKeys = append(publicKeys, key)
|
|
|
|
}
|
|
|
|
node.Consensus.UpdatePublicKeys(publicKeys)
|
|
|
|
// node.DRand.UpdatePublicKeys(publicKeys)
|
|
|
|
|
|
|
|
if node.Blockchain().ShardID() == myShardID {
|
|
|
|
getLogger().Info("staying in the same shard")
|
|
|
|
} else {
|
|
|
|
getLogger().Info("moving to another shard")
|
|
|
|
if err := node.shardChains.Close(); err != nil {
|
|
|
|
getLogger().Error("cannot close shard chains", "error", err)
|
|
|
|
}
|
|
|
|
restartProcess(getRestartArguments(myShardID))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
func findRoleInShardState(
|
|
|
|
key *bls.PublicKey, state shard.State,
|
|
|
|
) (shardID uint32, isLeader bool) {
|
|
|
|
keyBytes := key.Serialize()
|
|
|
|
for idx, shard := range state {
|
|
|
|
for nodeIdx, nodeID := range shard.Slots {
|
|
|
|
if bytes.Compare(nodeID.BlsPublicKey[:], keyBytes) == 0 {
|
|
|
|
return uint32(idx), nodeIdx == 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return math.MaxUint32, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func restartProcess(args []string) {
|
|
|
|
execFile, err := getBinaryPath()
|
|
|
|
if err != nil {
|
|
|
|
utils.Logger().Error().
|
|
|
|
Err(err).
|
|
|
|
Str("file", execFile).
|
|
|
|
Msg("Failed to get program path when restarting program")
|
|
|
|
}
|
|
|
|
utils.Logger().Info().
|
|
|
|
Strs("args", args).
|
|
|
|
Strs("env", os.Environ()).
|
|
|
|
Msg("Restarting program")
|
|
|
|
err = syscall.Exec(execFile, args, os.Environ())
|
|
|
|
if err != nil {
|
|
|
|
utils.Logger().Error().
|
|
|
|
Err(err).
|
|
|
|
Msg("Failed to restart program after resharding")
|
|
|
|
}
|
|
|
|
panic("syscall.Exec() is not supposed to return")
|
|
|
|
}
|
|
|
|
|
|
|
|
func getRestartArguments(myShardID uint32) []string {
|
|
|
|
args := os.Args
|
|
|
|
hasShardID := false
|
|
|
|
shardIDFlag := "-shard_id"
|
|
|
|
// newNodeFlag := "-is_newnode"
|
|
|
|
for i, arg := range args {
|
|
|
|
if arg == shardIDFlag {
|
|
|
|
if i+1 < len(args) {
|
|
|
|
args[i+1] = strconv.Itoa(int(myShardID))
|
|
|
|
} else {
|
|
|
|
args = append(args, strconv.Itoa(int(myShardID)))
|
|
|
|
}
|
|
|
|
hasShardID = true
|
|
|
|
}
|
|
|
|
// TODO: enable this
|
|
|
|
//if arg == newNodeFlag {
|
|
|
|
// args[i] = "" // remove new node flag
|
|
|
|
//}
|
|
|
|
}
|
|
|
|
if !hasShardID {
|
|
|
|
args = append(args, shardIDFlag)
|
|
|
|
args = append(args, strconv.Itoa(int(myShardID)))
|
|
|
|
}
|
|
|
|
return args
|
|
|
|
}
|
|
|
|
|
|
|
|
// Gets the path of this currently running binary program.
|
|
|
|
func getBinaryPath() (argv0 string, err error) {
|
|
|
|
argv0, err = exec.LookPath(os.Args[0])
|
|
|
|
if nil != err {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if _, err = os.Stat(argv0); nil != err {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|