The core protocol of WoopChain
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
woop/node/node_syncing.go

739 lines
22 KiB

package node
import (
"fmt"
"math/rand"
"net"
"strconv"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/rlp"
lru "github.com/hashicorp/golang-lru"
"github.com/pkg/errors"
"github.com/harmony-one/harmony/api/service"
"github.com/harmony-one/harmony/api/service/legacysync"
legdownloader "github.com/harmony-one/harmony/api/service/legacysync/downloader"
downloader_pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto"
"github.com/harmony-one/harmony/api/service/synchronize"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/hmy/downloader"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
"github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/node/worker"
"github.com/harmony-one/harmony/p2p"
"github.com/harmony-one/harmony/shard"
)
// Constants related to doing syncing.
const (
SyncFrequency = 60
)
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
// BeaconSyncHook is the hook function called after inserted beacon in downloader
// TODO: This is a small misc piece of consensus logic. Better put it to consensus module.
func (node *Node) BeaconSyncHook() {
if node.Consensus.IsLeader() || rand.Intn(100) == 0 {
// TODO: Instead of leader, it would better be validator do this broadcast since leader do
// not have much idle resources.
node.BroadcastCrossLink()
}
}
// GenerateRandomString generates a random string with given length
func GenerateRandomString(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
// getNeighborPeers is a helper function to return list of peers
// based on different neightbor map
func getNeighborPeers(neighbor *sync.Map) []p2p.Peer {
tmp := []p2p.Peer{}
neighbor.Range(func(k, v interface{}) bool {
p := v.(p2p.Peer)
t := p.Port
p.Port = legacysync.GetSyncingPort(t)
tmp = append(tmp, p)
return true
})
return tmp
}
// DoSyncWithoutConsensus gets sync-ed to blockchain without joining consensus
func (node *Node) DoSyncWithoutConsensus() {
go node.DoSyncing(node.Blockchain(), node.Worker, false) //Don't join consensus
}
// IsSameHeight tells whether node is at same bc height as a peer
func (node *Node) IsSameHeight() (uint64, bool) {
if node.stateSync == nil {
node.stateSync = node.createStateSync()
}
return node.stateSync.IsSameBlockchainHeight(node.Blockchain())
}
func (node *Node) createStateSync() *legacysync.StateSync {
// Temp hack: The actual port used in dns sync is node.downloaderServer.Port.
// But registration is done through an old way of port arithmetics (syncPort + 3000).
// Thus for compatibility, we are doing the arithmetics here, and not to change the
// protocol itself. This is just the temporary hack and will not be a concern after
// state sync.
syncPort := node.downloaderServer.Port
mutatedPort := strconv.Itoa(syncPort + legacysync.SyncingPortDifference)
return legacysync.CreateStateSync(node.SelfPeer.IP, mutatedPort,
node.GetSyncID(), node.NodeConfig.Role() == nodeconfig.ExplorerNode)
}
// SyncingPeerProvider is an interface for getting the peers in the given shard.
type SyncingPeerProvider interface {
SyncingPeers(shardID uint32) (peers []p2p.Peer, err error)
}
// LegacySyncingPeerProvider uses neighbor lists stored in a Node to serve
// syncing peer list query.
type LegacySyncingPeerProvider struct {
node *Node
shardID func() uint32
}
// NewLegacySyncingPeerProvider creates and returns a new node-based syncing
// peer provider.
func NewLegacySyncingPeerProvider(node *Node) *LegacySyncingPeerProvider {
var shardID func() uint32
if node.shardChains != nil {
shardID = node.Blockchain().ShardID
}
return &LegacySyncingPeerProvider{node: node, shardID: shardID}
}
// SyncingPeers returns peers stored in neighbor maps in the node structure.
func (p *LegacySyncingPeerProvider) SyncingPeers(shardID uint32) (peers []p2p.Peer, err error) {
switch shardID {
case p.shardID():
peers = getNeighborPeers(&p.node.Neighbors)
case 0:
peers = getNeighborPeers(&p.node.BeaconNeighbors)
default:
return nil, errors.Errorf("unsupported shard ID %v", shardID)
}
return peers, nil
}
// DNSSyncingPeerProvider uses the given DNS zone to resolve syncing peers.
type DNSSyncingPeerProvider struct {
zone, port string
lookupHost func(name string) (addrs []string, err error)
}
// NewDNSSyncingPeerProvider returns a provider that uses given DNS name and
// port number to resolve syncing peers.
func NewDNSSyncingPeerProvider(zone, port string) *DNSSyncingPeerProvider {
return &DNSSyncingPeerProvider{
zone: zone,
port: port,
lookupHost: net.LookupHost,
}
}
// SyncingPeers resolves DNS name into peers and returns them.
func (p *DNSSyncingPeerProvider) SyncingPeers(shardID uint32) (peers []p2p.Peer, err error) {
dns := fmt.Sprintf("s%d.%s", shardID, p.zone)
addrs, err := p.lookupHost(dns)
if err != nil {
return nil, errors.Wrapf(err,
"[SYNC] cannot find peers using DNS name %#v", dns)
}
for _, addr := range addrs {
peers = append(peers, p2p.Peer{IP: addr, Port: p.port})
}
return peers, nil
}
// LocalSyncingPeerProvider uses localnet deployment convention to synthesize
// syncing peers.
type LocalSyncingPeerProvider struct {
basePort, selfPort uint16
numShards, shardSize uint32
}
// NewLocalSyncingPeerProvider returns a provider that synthesizes syncing
// peers given the network configuration
func NewLocalSyncingPeerProvider(
basePort, selfPort uint16, numShards, shardSize uint32,
) *LocalSyncingPeerProvider {
return &LocalSyncingPeerProvider{
basePort: basePort,
selfPort: selfPort,
numShards: numShards,
shardSize: shardSize,
}
}
// SyncingPeers returns local syncing peers using the sharding configuration.
func (p *LocalSyncingPeerProvider) SyncingPeers(shardID uint32) (peers []p2p.Peer, err error) {
if shardID >= p.numShards {
return nil, errors.Errorf(
"shard ID %d out of range 0..%d", shardID, p.numShards-1)
}
firstPort := uint32(p.basePort) + shardID
endPort := uint32(p.basePort) + p.numShards*p.shardSize
for port := firstPort; port < endPort; port += p.numShards {
if port == uint32(p.selfPort) {
continue // do not sync from self
}
peers = append(peers, p2p.Peer{IP: "127.0.0.1", Port: fmt.Sprint(port)})
}
return peers, nil
}
// doBeaconSyncing update received beaconchain blocks and downloads missing beacon chain blocks
func (node *Node) doBeaconSyncing() {
if node.NodeConfig.IsOffline {
return
}
if !node.NodeConfig.Downloader {
// If Downloader is not working, we need also deal with blocks from beaconBlockChannel
go func(node *Node) {
// TODO ek – infinite loop; add shutdown/cleanup logic
for beaconBlock := range node.BeaconBlockChannel {
if node.beaconSync != nil {
err := node.beaconSync.UpdateBlockAndStatus(
beaconBlock, node.Beaconchain(), true,
)
if err != nil {
node.beaconSync.AddLastMileBlock(beaconBlock)
} else if node.Consensus.IsLeader() || rand.Intn(100) == 0 {
// Only leader or 1% of validators broadcast crosslink to avoid spamming p2p
node.BroadcastCrossLink()
}
}
}
}(node)
}
// TODO ek – infinite loop; add shutdown/cleanup logic
for {
if node.beaconSync == nil {
utils.Logger().Info().Msg("initializing beacon sync")
node.beaconSync = node.createStateSync()
}
if node.beaconSync.GetActivePeerNumber() == 0 {
utils.Logger().Info().Msg("no peers; bootstrapping beacon sync config")
// 0 means shardID=0 here
peers, err := node.SyncingPeerProvider.SyncingPeers(0)
if err != nil {
utils.Logger().Warn().
Err(err).
Msg("cannot retrieve beacon syncing peers")
continue
}
if err := node.beaconSync.CreateSyncConfig(peers, true); err != nil {
utils.Logger().Warn().Err(err).Msg("cannot create beacon sync config")
continue
}
}
node.beaconSync.SyncLoop(node.Beaconchain(), node.BeaconWorker, true, nil)
time.Sleep(time.Duration(SyncFrequency) * time.Second)
}
}
// DoSyncing keep the node in sync with other peers, willJoinConsensus means the node will try to join consensus after catch up
func (node *Node) DoSyncing(bc *core.BlockChain, worker *worker.Worker, willJoinConsensus bool) {
if node.NodeConfig.IsOffline {
return
}
ticker := time.NewTicker(time.Duration(SyncFrequency) * time.Second)
// TODO ek – infinite loop; add shutdown/cleanup logic
for {
select {
case <-ticker.C:
node.doSync(bc, worker, willJoinConsensus)
case <-node.Consensus.BlockNumLowChan:
node.doSync(bc, worker, willJoinConsensus)
}
}
}
// doSync keep the node in sync with other peers, willJoinConsensus means the node will try to join consensus after catch up
func (node *Node) doSync(bc *core.BlockChain, worker *worker.Worker, willJoinConsensus bool) {
if node.stateSync.GetActivePeerNumber() < legacysync.NumPeersLowBound {
shardID := bc.ShardID()
peers, err := node.SyncingPeerProvider.SyncingPeers(shardID)
if err != nil {
utils.Logger().Warn().
Err(err).
Uint32("shard_id", shardID).
Msg("cannot retrieve syncing peers")
return
}
if err := node.stateSync.CreateSyncConfig(peers, false); err != nil {
utils.Logger().Warn().
Err(err).
Interface("peers", peers).
Msg("[SYNC] create peers error")
return
}
utils.Logger().Debug().Int("len", node.stateSync.GetActivePeerNumber()).Msg("[SYNC] Get Active Peers")
}
// TODO: treat fake maximum height
if outOfSync, _ := node.stateSync.IsOutOfSync(bc, true); outOfSync {
node.IsInSync.UnSet()
if willJoinConsensus {
node.Consensus.BlocksNotSynchronized()
}
node.stateSync.SyncLoop(bc, worker, false, node.Consensus)
if willJoinConsensus {
node.IsInSync.Set()
node.Consensus.BlocksSynchronized()
}
}
node.IsInSync.Set()
}
// SupportGRPCSyncServer do gRPC sync server
func (node *Node) SupportGRPCSyncServer(port int) {
node.InitSyncingServer(port)
node.StartSyncingServer(port)
}
// StartGRPCSyncClient start the legacy gRPC sync process
func (node *Node) StartGRPCSyncClient() {
if node.Blockchain().ShardID() != shard.BeaconChainShardID {
utils.Logger().Info().
Uint32("shardID", node.Blockchain().ShardID()).
Msg("SupportBeaconSyncing")
go node.doBeaconSyncing()
}
node.supportSyncing()
}
// supportSyncing keeps sleeping until it's doing consensus or it's a leader.
func (node *Node) supportSyncing() {
joinConsensus := false
// Check if the current node is explorer node.
switch node.NodeConfig.Role() {
case nodeconfig.Validator:
joinConsensus = true
}
// Send new block to unsync node if the current node is not explorer node.
// TODO: leo this pushing logic has to be removed
if joinConsensus {
go node.SendNewBlockToUnsync()
}
if node.stateSync == nil {
node.stateSync = node.createStateSync()
utils.Logger().Debug().Msg("[SYNC] initialized state sync")
}
go node.DoSyncing(node.Blockchain(), node.Worker, joinConsensus)
}
// InitSyncingServer starts downloader server.
func (node *Node) InitSyncingServer(port int) {
if node.downloaderServer == nil {
node.downloaderServer = legdownloader.NewServer(node, port)
}
}
// StartSyncingServer starts syncing server.
func (node *Node) StartSyncingServer(port int) {
utils.Logger().Info().Msg("[SYNC] support_syncing: StartSyncingServer")
if node.downloaderServer.GrpcServer == nil {
node.downloaderServer.Start()
}
}
// SendNewBlockToUnsync send latest verified block to unsync, registered nodes
func (node *Node) SendNewBlockToUnsync() {
for {
block := <-node.Consensus.VerifiedNewBlock
blockBytes, err := rlp.EncodeToBytes(block)
if err != nil {
utils.Logger().Warn().Msg("[SYNC] unable to encode block to hashes")
continue
}
blockWithSigBytes, err := node.getEncodedBlockWithSigFromBlock(block)
if err != nil {
utils.Logger().Warn().Err(err).Msg("[SYNC] rlp encode BlockWithSig")
continue
}
node.stateMutex.Lock()
for peerID, config := range node.peerRegistrationRecord {
elapseTime := time.Now().UnixNano() - config.timestamp
if elapseTime > broadcastTimeout {
utils.Logger().Warn().Str("peerID", peerID).Msg("[SYNC] SendNewBlockToUnsync to peer timeout")
node.peerRegistrationRecord[peerID].client.Close()
delete(node.peerRegistrationRecord, peerID)
continue
}
sendBytes := blockBytes
if config.withSig {
sendBytes = blockWithSigBytes
}
response, err := config.client.PushNewBlock(node.GetSyncID(), sendBytes, false)
// close the connection if cannot push new block to unsync node
if err != nil {
node.peerRegistrationRecord[peerID].client.Close()
delete(node.peerRegistrationRecord, peerID)
}
if response != nil && response.Type == downloader_pb.DownloaderResponse_INSYNC {
node.peerRegistrationRecord[peerID].client.Close()
delete(node.peerRegistrationRecord, peerID)
}
}
node.stateMutex.Unlock()
}
}
// CalculateResponse implements DownloadInterface on Node object.
func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, incomingPeer string) (*downloader_pb.DownloaderResponse, error) {
response := &downloader_pb.DownloaderResponse{}
if node.NodeConfig.IsOffline {
return response, nil
}
switch request.Type {
case downloader_pb.DownloaderRequest_BLOCKHASH:
if request.BlockHash == nil {
return response, fmt.Errorf("[SYNC] GetBlockHashes Request BlockHash is NIL")
}
if request.Size == 0 || request.Size > legacysync.SyncLoopBatchSize {
return response, fmt.Errorf("[SYNC] GetBlockHashes Request contains invalid Size %v", request.Size)
}
size := uint64(request.Size)
var startHashHeader common.Hash
copy(startHashHeader[:], request.BlockHash[:])
startHeader := node.Blockchain().GetHeaderByHash(startHashHeader)
if startHeader == nil {
return response, fmt.Errorf("[SYNC] GetBlockHashes Request cannot find startHash %s", startHashHeader.Hex())
}
startHeight := startHeader.Number().Uint64()
endHeight := node.Blockchain().CurrentBlock().NumberU64()
if startHeight >= endHeight {
utils.Logger().
Debug().
Uint64("myHeight", endHeight).
Uint64("requestHeight", startHeight).
Str("incomingIP", request.Ip).
Str("incomingPort", request.Port).
Str("incomingPeer", incomingPeer).
Msg("[SYNC] GetBlockHashes Request: I am not higher than requested node")
return response, nil
}
for blockNum := startHeight; blockNum <= startHeight+size; blockNum++ {
header := node.Blockchain().GetHeaderByNumber(blockNum)
if header == nil {
break
}
blockHash := header.Hash()
response.Payload = append(response.Payload, blockHash[:])
}
case downloader_pb.DownloaderRequest_BLOCKHEADER:
var hash common.Hash
for _, bytes := range request.Hashes {
hash.SetBytes(bytes)
encodedBlockHeader, err := node.getEncodedBlockHeaderByHash(hash)
if err == nil {
response.Payload = append(response.Payload, encodedBlockHeader)
}
}
case downloader_pb.DownloaderRequest_BLOCK:
var hash common.Hash
withSig := request.GetBlocksWithSig
for _, bytes := range request.Hashes {
hash.SetBytes(bytes)
var (
encoded []byte
err error
)
if withSig {
encoded, err = node.getEncodedBlockWithSigByHash(hash)
} else {
encoded, err = node.getEncodedBlockByHash(hash)
}
if err == nil {
response.Payload = append(response.Payload, encoded)
}
}
case downloader_pb.DownloaderRequest_BLOCKHEIGHT:
response.BlockHeight = node.Blockchain().CurrentBlock().NumberU64()
// this is the out of sync node acts as grpc server side
case downloader_pb.DownloaderRequest_NEWBLOCK:
if node.IsInSync.IsSet() {
response.Type = downloader_pb.DownloaderResponse_INSYNC
return response, nil
}
block, err := legacysync.RlpDecodeBlockOrBlockWithSig(request.BlockHash)
if err != nil {
utils.Logger().Warn().Err(err).Msg("[SYNC] unable to decode received new block")
return response, err
}
node.stateSync.AddNewBlock(request.PeerHash, block)
case downloader_pb.DownloaderRequest_REGISTER:
peerID := string(request.PeerHash[:])
ip := request.Ip
port := request.Port
withSig := request.RegisterWithSig
node.stateMutex.Lock()
defer node.stateMutex.Unlock()
if _, ok := node.peerRegistrationRecord[peerID]; ok {
response.Type = downloader_pb.DownloaderResponse_FAIL
utils.Logger().Warn().
Interface("ip", ip).
Interface("port", port).
Msg("[SYNC] peerRegistration record already exists")
return response, nil
} else if len(node.peerRegistrationRecord) >= maxBroadcastNodes {
response.Type = downloader_pb.DownloaderResponse_FAIL
utils.Logger().Debug().
Str("ip", ip).
Str("port", port).
Msg("[SYNC] maximum registration limit exceeds")
return response, nil
} else {
response.Type = downloader_pb.DownloaderResponse_FAIL
syncPort := legacysync.GetSyncingPort(port)
client := legdownloader.ClientSetup(ip, syncPort)
if client == nil {
utils.Logger().Warn().
Str("ip", ip).
Str("port", port).
Msg("[SYNC] unable to setup client for peerID")
return response, nil
}
config := &syncConfig{timestamp: time.Now().UnixNano(), client: client, withSig: withSig}
node.peerRegistrationRecord[peerID] = config
utils.Logger().Debug().
Str("ip", ip).
Str("port", port).
Msg("[SYNC] register peerID success")
response.Type = downloader_pb.DownloaderResponse_SUCCESS
}
case downloader_pb.DownloaderRequest_REGISTERTIMEOUT:
if !node.IsInSync.IsSet() {
count := node.stateSync.RegisterNodeInfo()
utils.Logger().Debug().
Int("number", count).
Msg("[SYNC] extra node registered")
}
}
return response, nil
}
const (
headerCacheSize = 10000
blockCacheSize = 10000
blockWithSigCacheSize = 10000
)
var (
// Cached fields for block header and block requests
headerReqCache, _ = lru.New(headerCacheSize)
blockReqCache, _ = lru.New(blockCacheSize)
blockWithSigReqCache, _ = lru.New(blockWithSigCacheSize)
errHeaderNotExist = errors.New("header not exist")
errBlockNotExist = errors.New("block not exist")
)
func (node *Node) getEncodedBlockHeaderByHash(hash common.Hash) ([]byte, error) {
if b, ok := headerReqCache.Get(hash); ok {
return b.([]byte), nil
}
h := node.Blockchain().GetHeaderByHash(hash)
if h == nil {
return nil, errHeaderNotExist
}
b, err := rlp.EncodeToBytes(h)
if err != nil {
return nil, err
}
headerReqCache.Add(hash, b)
return b, nil
}
func (node *Node) getEncodedBlockByHash(hash common.Hash) ([]byte, error) {
if b, ok := blockReqCache.Get(hash); ok {
return b.([]byte), nil
}
blk := node.Blockchain().GetBlockByHash(hash)
if blk == nil {
return nil, errBlockNotExist
}
b, err := rlp.EncodeToBytes(blk)
if err != nil {
return nil, err
}
blockReqCache.Add(hash, b)
return b, nil
}
func (node *Node) getEncodedBlockWithSigByHash(hash common.Hash) ([]byte, error) {
if b, ok := blockWithSigReqCache.Get(hash); ok {
return b.([]byte), nil
}
blk := node.Blockchain().GetBlockByHash(hash)
if blk == nil {
return nil, errBlockNotExist
}
sab, err := node.getCommitSigAndBitmapFromChildOrDB(blk)
if err != nil {
return nil, err
}
bwh := legacysync.BlockWithSig{
Block: blk,
CommitSigAndBitmap: sab,
}
b, err := rlp.EncodeToBytes(bwh)
if err != nil {
return nil, err
}
blockWithSigReqCache.Add(hash, b)
return b, nil
}
func (node *Node) getEncodedBlockWithSigFromBlock(block *types.Block) ([]byte, error) {
bwh := legacysync.BlockWithSig{
Block: block,
CommitSigAndBitmap: block.GetCurrentCommitSig(),
}
return rlp.EncodeToBytes(bwh)
}
func (node *Node) getCommitSigAndBitmapFromChildOrDB(block *types.Block) ([]byte, error) {
child := node.Blockchain().GetBlockByNumber(block.NumberU64() + 1)
if child != nil {
return node.getCommitSigFromChild(block, child)
}
return node.getCommitSigFromDB(block)
}
func (node *Node) getCommitSigFromChild(parent, child *types.Block) ([]byte, error) {
if child.ParentHash() != parent.Hash() {
return nil, fmt.Errorf("child's parent hash unexpected: %v / %v",
child.ParentHash().String(), parent.Hash().String())
}
sig := child.Header().LastCommitSignature()
bitmap := child.Header().LastCommitBitmap()
return append(sig[:], bitmap...), nil
}
func (node *Node) getCommitSigFromDB(block *types.Block) ([]byte, error) {
return node.Blockchain().ReadCommitSig(block.NumberU64())
}
// SyncStatus return the syncing status, including whether node is syncing
// and the target block number.
func (node *Node) SyncStatus(shardID uint32) (bool, uint64) {
ds := node.getDownloaders()
if ds == nil || !ds.IsActive() {
// downloaders inactive. Ask DNS sync instead
return node.legacySyncStatus(shardID)
}
return ds.SyncStatus(shardID)
}
func (node *Node) legacySyncStatus(shardID uint32) (bool, uint64) {
switch shardID {
case node.NodeConfig.ShardID:
if node.stateSync == nil {
return false, 0
}
return node.stateSync.SyncStatus(node.Blockchain())
case shard.BeaconChainShardID:
if node.beaconSync == nil {
return false, 0
}
return node.beaconSync.SyncStatus(node.Beaconchain())
default:
// Shard node is not working on
return false, 0
}
}
// IsOutOfSync return whether the node is out of sync of the given hsardID
func (node *Node) IsOutOfSync(shardID uint32) bool {
ds := node.getDownloaders()
if ds == nil || !ds.IsActive() {
// downloaders inactive. Ask DNS sync instead
return node.legacyIsOutOfSync(shardID)
}
isSyncing, _ := ds.SyncStatus(shardID)
return !isSyncing
}
func (node *Node) legacyIsOutOfSync(shardID uint32) bool {
switch shardID {
case node.NodeConfig.ShardID:
if node.stateSync == nil {
return true
}
outOfSync, _ := node.stateSync.IsOutOfSync(node.Blockchain(), false)
return outOfSync
case shard.BeaconChainShardID:
if node.beaconSync == nil {
return true
}
outOfSync, _ := node.beaconSync.IsOutOfSync(node.Beaconchain(), false)
return outOfSync
default:
return true
}
}
// SyncPeers return connected sync peers for each shard
func (node *Node) SyncPeers() map[string]int {
ds := node.getDownloaders()
if ds == nil {
return nil
}
nums := ds.NumPeers()
res := make(map[string]int)
for sid, num := range nums {
s := fmt.Sprintf("shard-%v", sid)
res[s] = num
}
return res
}
func (node *Node) getDownloaders() *downloader.Downloaders {
syncService := node.serviceManager.GetService(service.Synchronize)
if syncService == nil {
return nil
}
dsService, ok := syncService.(*synchronize.Service)
if !ok {
return nil
}
return dsService.Downloaders
}