staged stream sync v1.0

pull/4377/head
“GheisMohammadi” 2 years ago committed by Casey Gardiner
parent b242506df5
commit dc3ee197f9
  1. 3
      .gitignore
  2. 3
      api/service/manager.go
  3. 34
      api/service/stagedstreamsync/adapter.go
  4. 156
      api/service/stagedstreamsync/beacon_helper.go
  5. 133
      api/service/stagedstreamsync/block_by_hash_manager.go
  6. 75
      api/service/stagedstreamsync/block_hash_result.go
  7. 172
      api/service/stagedstreamsync/block_manager.go
  8. 81
      api/service/stagedstreamsync/const.go
  9. 87
      api/service/stagedstreamsync/default_stages.go
  10. 303
      api/service/stagedstreamsync/downloader.go
  11. 96
      api/service/stagedstreamsync/downloaders.go
  12. 56
      api/service/stagedstreamsync/errors.go
  13. 114
      api/service/stagedstreamsync/helpers.go
  14. 98
      api/service/stagedstreamsync/metric.go
  15. 30
      api/service/stagedstreamsync/service.go
  16. 221
      api/service/stagedstreamsync/short_range_helper.go
  17. 112
      api/service/stagedstreamsync/stage.go
  18. 420
      api/service/stagedstreamsync/stage_bodies.go
  19. 198
      api/service/stagedstreamsync/stage_epoch.go
  20. 114
      api/service/stagedstreamsync/stage_finish.go
  21. 157
      api/service/stagedstreamsync/stage_heads.go
  22. 205
      api/service/stagedstreamsync/stage_short_range.go
  23. 295
      api/service/stagedstreamsync/stage_state.go
  24. 597
      api/service/stagedstreamsync/staged_stream_sync.go
  25. 71
      api/service/stagedstreamsync/stages.go
  26. 320
      api/service/stagedstreamsync/syncing.go
  27. 287
      api/service/stagedstreamsync/types.go
  28. 266
      api/service/stagedstreamsync/types_test.go
  29. 44
      cmd/harmony/main.go
  30. 1
      go.mod
  31. 2
      go.sum
  32. 3
      hmy/downloader/const.go
  33. 14
      hmy/downloader/metric.go
  34. 2
      hmy/downloader/shortrange.go
  35. 28
      node/node_syncing.go
  36. 12
      p2p/stream/common/requestmanager/interface_test.go
  37. 12
      p2p/stream/common/streammanager/interface_test.go
  38. 23
      p2p/stream/common/streammanager/streammanager.go
  39. 30
      p2p/stream/protocols/sync/client.go
  40. 3
      p2p/stream/protocols/sync/const.go
  41. 36
      p2p/stream/protocols/sync/protocol.go
  42. 18
      p2p/stream/types/stream.go
  43. 27
      p2p/stream/types/utils.go

3
.gitignore vendored

@ -94,3 +94,6 @@ explorer_storage_*
# pprof profiles # pprof profiles
profiles/*.pb.gz profiles/*.pb.gz
# cache db
cache_*_db

@ -23,6 +23,7 @@ const (
Prometheus Prometheus
Synchronize Synchronize
CrosslinkSending CrosslinkSending
StagedStreamSync
) )
func (t Type) String() string { func (t Type) String() string {
@ -45,6 +46,8 @@ func (t Type) String() string {
return "Synchronize" return "Synchronize"
case CrosslinkSending: case CrosslinkSending:
return "CrosslinkSending" return "CrosslinkSending"
case StagedStreamSync:
return "StagedStreamSync"
default: default:
return "Unknown" return "Unknown"
} }

@ -0,0 +1,34 @@
package stagedstreamsync
import (
"context"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/event"
"github.com/harmony-one/harmony/consensus/engine"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/p2p/stream/common/streammanager"
syncproto "github.com/harmony-one/harmony/p2p/stream/protocols/sync"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
)
type syncProtocol interface {
GetCurrentBlockNumber(ctx context.Context, opts ...syncproto.Option) (uint64, sttypes.StreamID, error)
GetBlocksByNumber(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([]*types.Block, sttypes.StreamID, error)
GetRawBlocksByNumber(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([][]byte, [][]byte, sttypes.StreamID, error)
GetBlockHashes(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([]common.Hash, sttypes.StreamID, error)
GetBlocksByHashes(ctx context.Context, hs []common.Hash, opts ...syncproto.Option) ([]*types.Block, sttypes.StreamID, error)
RemoveStream(stID sttypes.StreamID) // If a stream delivers invalid data, remove the stream
StreamFailed(stID sttypes.StreamID, reason string)
SubscribeAddStreamEvent(ch chan<- streammanager.EvtStreamAdded) event.Subscription
NumStreams() int
}
type blockChain interface {
engine.ChainReader
Engine() engine.Engine
InsertChain(chain types.Blocks, verifyHeaders bool) (int, error)
WriteCommitSig(blockNum uint64, lastCommits []byte) error
}

@ -0,0 +1,156 @@
package stagedstreamsync
import (
"time"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/internal/utils"
"github.com/rs/zerolog"
)
// lastMileCache keeps the last 50 number blocks in memory cache
const lastMileCap = 50
type (
// beaconHelper is the helper for the beacon downloader. The beaconHelper is only started
// when node is running on side chain, listening to beacon client pub-sub message and
// insert the latest blocks to the beacon chain.
beaconHelper struct {
bc blockChain
blockC <-chan *types.Block
// TODO: refactor this hook to consensus module. We'd better put it in
// consensus module under a subscription.
insertHook func()
lastMileCache *blocksByNumber
insertC chan insertTask
closeC chan struct{}
logger zerolog.Logger
}
insertTask struct {
doneC chan struct{}
}
)
func newBeaconHelper(bc blockChain, blockC <-chan *types.Block, insertHook func()) *beaconHelper {
return &beaconHelper{
bc: bc,
blockC: blockC,
insertHook: insertHook,
lastMileCache: newBlocksByNumber(lastMileCap),
insertC: make(chan insertTask, 1),
closeC: make(chan struct{}),
logger: utils.Logger().With().
Str("module", "downloader").
Str("sub-module", "beacon helper").
Logger(),
}
}
func (bh *beaconHelper) start() {
go bh.loop()
}
func (bh *beaconHelper) close() {
close(bh.closeC)
}
func (bh *beaconHelper) loop() {
t := time.NewTicker(10 * time.Second)
defer t.Stop()
for {
select {
case <-t.C:
bh.insertAsync()
case b, ok := <-bh.blockC:
if !ok {
return // blockC closed. Node exited
}
if b == nil {
continue
}
bh.lastMileCache.push(b)
bh.insertAsync()
case it := <-bh.insertC:
inserted, bn, err := bh.insertLastMileBlocks()
numBlocksInsertedBeaconHelperCounter.Add(float64(inserted))
if err != nil {
bh.logger.Error().Err(err).Msg(WrapStagedSyncMsg("insert last mile blocks error"))
continue
}
bh.logger.Info().Int("inserted", inserted).
Uint64("end height", bn).
Uint32("shard", bh.bc.ShardID()).
Msg(WrapStagedSyncMsg("insert last mile blocks"))
close(it.doneC)
case <-bh.closeC:
return
}
}
}
// insertAsync triggers the insert last mile without blocking
func (bh *beaconHelper) insertAsync() {
select {
case bh.insertC <- insertTask{
doneC: make(chan struct{}),
}:
default:
}
}
// insertSync triggers the insert last mile while blocking
func (bh *beaconHelper) insertSync() {
task := insertTask{
doneC: make(chan struct{}),
}
bh.insertC <- task
<-task.doneC
}
func (bh *beaconHelper) insertLastMileBlocks() (inserted int, bn uint64, err error) {
bn = bh.bc.CurrentBlock().NumberU64() + 1
for {
b := bh.getNextBlock(bn)
if b == nil {
bn--
return
}
// TODO: Instruct the beacon helper to verify signatures. This may require some forks
// in pub-sub message (add commit sigs in node.block.sync messages)
if _, err = bh.bc.InsertChain(types.Blocks{b}, true); err != nil {
bn--
return
}
bh.logger.Info().Uint64("number", b.NumberU64()).Msg(WrapStagedSyncMsg("Inserted block from beacon pub-sub"))
if bh.insertHook != nil {
bh.insertHook()
}
inserted++
bn++
}
}
func (bh *beaconHelper) getNextBlock(expBN uint64) *types.Block {
for bh.lastMileCache.len() > 0 {
b := bh.lastMileCache.pop()
if b == nil {
return nil
}
if b.NumberU64() < expBN {
continue
}
if b.NumberU64() > expBN {
bh.lastMileCache.push(b)
return nil
}
return b
}
return nil
}

@ -0,0 +1,133 @@
package stagedstreamsync
import (
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/types"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/pkg/errors"
)
type getBlocksByHashManager struct {
hashes []common.Hash
pendings map[common.Hash]struct{}
results map[common.Hash]blockResult
whitelist []sttypes.StreamID
lock sync.Mutex
}
func newGetBlocksByHashManager(hashes []common.Hash, whitelist []sttypes.StreamID) *getBlocksByHashManager {
return &getBlocksByHashManager{
hashes: hashes,
pendings: make(map[common.Hash]struct{}),
results: make(map[common.Hash]blockResult),
whitelist: whitelist,
}
}
func (m *getBlocksByHashManager) getNextHashes() ([]common.Hash, []sttypes.StreamID, error) {
m.lock.Lock()
defer m.lock.Unlock()
num := m.numBlocksPerRequest()
hashes := make([]common.Hash, 0, num)
if len(m.whitelist) == 0 {
return nil, nil, errors.New("empty white list")
}
for _, hash := range m.hashes {
if len(hashes) == num {
break
}
_, ok1 := m.pendings[hash]
_, ok2 := m.results[hash]
if !ok1 && !ok2 {
hashes = append(hashes, hash)
}
}
sts := make([]sttypes.StreamID, len(m.whitelist))
copy(sts, m.whitelist)
return hashes, sts, nil
}
func (m *getBlocksByHashManager) numBlocksPerRequest() int {
val := divideCeil(len(m.hashes), len(m.whitelist))
if val < BlockByHashesLowerCap {
val = BlockByHashesLowerCap
}
if val > BlockByHashesUpperCap {
val = BlockByHashesUpperCap
}
return val
}
func (m *getBlocksByHashManager) numRequests() int {
return divideCeil(len(m.hashes), m.numBlocksPerRequest())
}
func (m *getBlocksByHashManager) addResult(hashes []common.Hash, blocks []*types.Block, stid sttypes.StreamID) {
m.lock.Lock()
defer m.lock.Unlock()
for i, hash := range hashes {
block := blocks[i]
delete(m.pendings, hash)
m.results[hash] = blockResult{
block: block,
stid: stid,
}
}
}
func (m *getBlocksByHashManager) handleResultError(hashes []common.Hash, stid sttypes.StreamID) {
m.lock.Lock()
defer m.lock.Unlock()
m.removeStreamID(stid)
for _, hash := range hashes {
delete(m.pendings, hash)
}
}
func (m *getBlocksByHashManager) getResults() ([]*types.Block, []sttypes.StreamID, error) {
m.lock.Lock()
defer m.lock.Unlock()
blocks := make([]*types.Block, 0, len(m.hashes))
stids := make([]sttypes.StreamID, 0, len(m.hashes))
for _, hash := range m.hashes {
if m.results[hash].block == nil {
return nil, nil, errors.New("SANITY: nil block found")
}
blocks = append(blocks, m.results[hash].block)
stids = append(stids, m.results[hash].stid)
}
return blocks, stids, nil
}
func (m *getBlocksByHashManager) isDone() bool {
m.lock.Lock()
defer m.lock.Unlock()
return len(m.results) == len(m.hashes)
}
func (m *getBlocksByHashManager) removeStreamID(target sttypes.StreamID) {
// O(n^2) complexity. But considering the whitelist size is small, should not
// have performance issue.
loop:
for i, stid := range m.whitelist {
if stid == target {
if i == len(m.whitelist) {
m.whitelist = m.whitelist[:i]
} else {
m.whitelist = append(m.whitelist[:i], m.whitelist[i+1:]...)
}
goto loop
}
}
return
}

@ -0,0 +1,75 @@
package stagedstreamsync
import (
"sync"
"github.com/ethereum/go-ethereum/common"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
)
type (
blockHashResults struct {
bns []uint64
results []map[sttypes.StreamID]common.Hash
lock sync.Mutex
}
)
func newBlockHashResults(bns []uint64) *blockHashResults {
results := make([]map[sttypes.StreamID]common.Hash, 0, len(bns))
for range bns {
results = append(results, make(map[sttypes.StreamID]common.Hash))
}
return &blockHashResults{
bns: bns,
results: results,
}
}
func (res *blockHashResults) addResult(hashes []common.Hash, stid sttypes.StreamID) {
res.lock.Lock()
defer res.lock.Unlock()
for i, h := range hashes {
if h == emptyHash {
return // nil block hash reached
}
res.results[i][stid] = h
}
return
}
func (res *blockHashResults) computeLongestHashChain() ([]common.Hash, []sttypes.StreamID) {
var (
whitelist map[sttypes.StreamID]struct{}
hashChain []common.Hash
)
for _, result := range res.results {
hash, nextWl := countHashMaxVote(result, whitelist)
if hash == emptyHash {
break
}
hashChain = append(hashChain, hash)
whitelist = nextWl
}
sts := make([]sttypes.StreamID, 0, len(whitelist))
for st := range whitelist {
sts = append(sts, st)
}
return hashChain, sts
}
func (res *blockHashResults) numBlocksWithResults() int {
res.lock.Lock()
defer res.lock.Unlock()
cnt := 0
for _, result := range res.results {
if len(result) != 0 {
cnt++
}
}
return cnt
}

@ -0,0 +1,172 @@
package stagedstreamsync
import (
"sync"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/rs/zerolog"
)
type BlockDownloadDetails struct {
loopID int
streamID sttypes.StreamID
}
// blockDownloadManager is the helper structure for get blocks request management
type blockDownloadManager struct {
chain blockChain
tx kv.RwTx
targetBN uint64
requesting map[uint64]struct{} // block numbers that have been assigned to workers but not received
processing map[uint64]struct{} // block numbers received requests but not inserted
retries *prioritizedNumbers // requests where error happens
rq *resultQueue // result queue wait to be inserted into blockchain
bdd map[uint64]BlockDownloadDetails // details about how this block was downloaded
logger zerolog.Logger
lock sync.Mutex
}
func newBlockDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, logger zerolog.Logger) *blockDownloadManager {
return &blockDownloadManager{
chain: chain,
tx: tx,
targetBN: targetBN,
requesting: make(map[uint64]struct{}),
processing: make(map[uint64]struct{}),
retries: newPrioritizedNumbers(),
rq: newResultQueue(),
bdd: make(map[uint64]BlockDownloadDetails),
logger: logger,
}
}
// GetNextBatch get the next block numbers batch
func (gbm *blockDownloadManager) GetNextBatch() []uint64 {
gbm.lock.Lock()
defer gbm.lock.Unlock()
cap := BlocksPerRequest
bns := gbm.getBatchFromRetries(cap)
if len(bns) > 0 {
cap -= len(bns)
gbm.addBatchToRequesting(bns)
}
if gbm.availableForMoreTasks() {
addBNs := gbm.getBatchFromUnprocessed(cap)
gbm.addBatchToRequesting(addBNs)
bns = append(bns, addBNs...)
}
return bns
}
// HandleRequestError handles the error result
func (gbm *blockDownloadManager) HandleRequestError(bns []uint64, err error, streamID sttypes.StreamID) {
gbm.lock.Lock()
defer gbm.lock.Unlock()
// add requested block numbers to retries
for _, bn := range bns {
delete(gbm.requesting, bn)
gbm.retries.push(bn)
}
}
// HandleRequestResult handles get blocks result
func (gbm *blockDownloadManager) HandleRequestResult(bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error {
gbm.lock.Lock()
defer gbm.lock.Unlock()
for i, bn := range bns {
delete(gbm.requesting, bn)
if len(blockBytes[i]) <= 1 {
gbm.retries.push(bn)
} else {
gbm.processing[bn] = struct{}{}
gbm.bdd[bn] = BlockDownloadDetails{
loopID: loopID,
streamID: streamID,
}
}
}
return nil
}
// SetDownloadDetails sets the download details for a batch of blocks
func (gbm *blockDownloadManager) SetDownloadDetails(bns []uint64, loopID int, streamID sttypes.StreamID) error {
gbm.lock.Lock()
defer gbm.lock.Unlock()
for _, bn := range bns {
gbm.bdd[bn] = BlockDownloadDetails{
loopID: loopID,
streamID: streamID,
}
}
return nil
}
// GetDownloadDetails returns the download details for a block
func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) {
gbm.lock.Lock()
defer gbm.lock.Unlock()
return gbm.bdd[blockNumber].loopID, gbm.bdd[blockNumber].streamID
}
// getBatchFromRetries get the block number batch to be requested from retries.
func (gbm *blockDownloadManager) getBatchFromRetries(cap int) []uint64 {
var (
requestBNs []uint64
curHeight = gbm.chain.CurrentBlock().NumberU64()
)
for cnt := 0; cnt < cap; cnt++ {
bn := gbm.retries.pop()
if bn == 0 {
break // no more retries
}
if bn <= curHeight {
continue
}
requestBNs = append(requestBNs, bn)
}
return requestBNs
}
// getBatchFromUnprocessed returns a batch of block numbers to be requested from unprocessed.
func (gbm *blockDownloadManager) getBatchFromUnprocessed(cap int) []uint64 {
var (
requestBNs []uint64
curHeight = gbm.chain.CurrentBlock().NumberU64()
)
bn := curHeight + 1
// TODO: this algorithm can be potentially optimized.
for cnt := 0; cnt < cap && bn <= gbm.targetBN; cnt++ {
for bn <= gbm.targetBN {
_, ok1 := gbm.requesting[bn]
_, ok2 := gbm.processing[bn]
if !ok1 && !ok2 {
requestBNs = append(requestBNs, bn)
bn++
break
}
bn++
}
}
return requestBNs
}
func (gbm *blockDownloadManager) availableForMoreTasks() bool {
return gbm.rq.results.Len() < SoftQueueCap
}
func (gbm *blockDownloadManager) addBatchToRequesting(bns []uint64) {
for _, bn := range bns {
gbm.requesting[bn] = struct{}{}
}
}

@ -0,0 +1,81 @@
package stagedstreamsync
import (
"time"
"github.com/harmony-one/harmony/core/types"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
)
const (
BlocksPerRequest int = 10 // number of blocks for each request
BlocksPerInsertion int = 50 // number of blocks for each insert batch
BlockHashesPerRequest int = 20 // number of get block hashes for short range sync
BlockByHashesUpperCap int = 10 // number of get blocks by hashes upper cap
BlockByHashesLowerCap int = 3 // number of get blocks by hashes lower cap
LastMileBlocksThreshold int = 10
// SoftQueueCap is the soft cap of size in resultQueue. When the queue size is larger than this limit,
// no more request will be assigned to workers to wait for InsertChain to finish.
SoftQueueCap int = 100
// DefaultConcurrency is the default settings for concurrency
DefaultConcurrency = 4
// ShortRangeTimeout is the timeout for each short range sync, which allow short range sync
// to restart automatically when stuck in `getBlockHashes`
ShortRangeTimeout = 1 * time.Minute
)
type (
// Config is the downloader config
Config struct {
// Only run stream sync protocol as a server.
// TODO: remove this when stream sync is fully up.
ServerOnly bool
// parameters
Network nodeconfig.NetworkType
Concurrency int // Number of concurrent sync requests
MinStreams int // Minimum number of streams to do sync
InitStreams int // Number of streams requirement for initial bootstrap
// stream manager config
SmSoftLowCap int
SmHardLowCap int
SmHiCap int
SmDiscBatch int
// config for beacon config
BHConfig *BeaconHelperConfig
// log the stage progress
LogProgress bool
}
// BeaconHelperConfig is the extra config used for beaconHelper which uses
// pub-sub block message to do sync.
BeaconHelperConfig struct {
BlockC <-chan *types.Block
InsertHook func()
}
)
func (c *Config) fixValues() {
if c.Concurrency == 0 {
c.Concurrency = DefaultConcurrency
}
if c.Concurrency > c.MinStreams {
c.MinStreams = c.Concurrency
}
if c.MinStreams > c.InitStreams {
c.InitStreams = c.MinStreams
}
if c.MinStreams > c.SmSoftLowCap {
c.SmSoftLowCap = c.MinStreams
}
if c.MinStreams > c.SmHardLowCap {
c.SmHardLowCap = c.MinStreams
}
}

@ -0,0 +1,87 @@
package stagedstreamsync
import (
"context"
)
type ForwardOrder []SyncStageID
type RevertOrder []SyncStageID
type CleanUpOrder []SyncStageID
var DefaultForwardOrder = ForwardOrder{
Heads,
SyncEpoch,
ShortRange,
BlockBodies,
// Stages below don't use Internet
States,
Finish,
}
var DefaultRevertOrder = RevertOrder{
Finish,
States,
BlockBodies,
ShortRange,
SyncEpoch,
Heads,
}
var DefaultCleanUpOrder = CleanUpOrder{
Finish,
States,
BlockBodies,
ShortRange,
SyncEpoch,
Heads,
}
func DefaultStages(ctx context.Context,
headsCfg StageHeadsCfg,
seCfg StageEpochCfg,
srCfg StageShortRangeCfg,
bodiesCfg StageBodiesCfg,
statesCfg StageStatesCfg,
finishCfg StageFinishCfg,
) []*Stage {
handlerStageHeads := NewStageHeads(headsCfg)
handlerStageShortRange := NewStageShortRange(srCfg)
handlerStageEpochSync := NewStageEpoch(seCfg)
handlerStageBodies := NewStageBodies(bodiesCfg)
handlerStageStates := NewStageStates(statesCfg)
handlerStageFinish := NewStageFinish(finishCfg)
return []*Stage{
{
ID: Heads,
Description: "Retrieve Chain Heads",
Handler: handlerStageHeads,
},
{
ID: SyncEpoch,
Description: "Sync only Last Block of Epoch",
Handler: handlerStageEpochSync,
},
{
ID: ShortRange,
Description: "Short Range Sync",
Handler: handlerStageShortRange,
},
{
ID: BlockBodies,
Description: "Retrieve Block Bodies",
Handler: handlerStageBodies,
},
{
ID: States,
Description: "Update Blockchain State",
Handler: handlerStageStates,
},
{
ID: Finish,
Description: "Finalize Changes",
Handler: handlerStageFinish,
},
}
}

@ -0,0 +1,303 @@
package stagedstreamsync
import (
"context"
"fmt"
"time"
"github.com/ethereum/go-ethereum/event"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/crypto/bls"
"github.com/harmony-one/harmony/internal/chain"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
"github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/p2p"
"github.com/harmony-one/harmony/p2p/stream/common/streammanager"
"github.com/harmony-one/harmony/p2p/stream/protocols/sync"
"github.com/harmony-one/harmony/shard"
)
type (
// Downloader is responsible for sync task of one shard
Downloader struct {
bc blockChain
syncProtocol syncProtocol
bh *beaconHelper
stagedSyncInstance *StagedStreamSync
downloadC chan struct{}
closeC chan struct{}
ctx context.Context
cancel func()
config Config
logger zerolog.Logger
}
)
// NewDownloader creates a new downloader
func NewDownloader(host p2p.Host, bc core.BlockChain, config Config) *Downloader {
config.fixValues()
sp := sync.NewProtocol(sync.Config{
Chain: bc,
Host: host.GetP2PHost(),
Discovery: host.GetDiscovery(),
ShardID: nodeconfig.ShardID(bc.ShardID()),
Network: config.Network,
SmSoftLowCap: config.SmSoftLowCap,
SmHardLowCap: config.SmHardLowCap,
SmHiCap: config.SmHiCap,
DiscBatch: config.SmDiscBatch,
})
host.AddStreamProtocol(sp)
var bh *beaconHelper
if config.BHConfig != nil && bc.ShardID() == shard.BeaconChainShardID {
bh = newBeaconHelper(bc, config.BHConfig.BlockC, config.BHConfig.InsertHook)
}
logger := utils.Logger().With().Str("module", "StagedStreamSync").Uint32("ShardID", bc.ShardID()).Logger()
ctx, cancel := context.WithCancel(context.Background())
//TODO: use mem db should be in config file
stagedSyncInstance, err := CreateStagedSync(ctx, bc, false, sp, config, logger, config.LogProgress)
if err != nil {
return nil
}
return &Downloader{
bc: bc,
syncProtocol: sp,
bh: bh,
stagedSyncInstance: stagedSyncInstance,
downloadC: make(chan struct{}),
closeC: make(chan struct{}),
ctx: ctx,
cancel: cancel,
config: config,
logger: logger,
}
}
// Start starts the downloader
func (d *Downloader) Start() {
go func() {
d.waitForBootFinish()
fmt.Printf("boot completed for shard %d, %d streams are connected\n", d.bc.ShardID(), d.syncProtocol.NumStreams())
d.loop()
}()
if d.bh != nil {
d.bh.start()
}
}
// Close closes the downloader
func (d *Downloader) Close() {
close(d.closeC)
d.cancel()
if d.bh != nil {
d.bh.close()
}
}
// DownloadAsync triggers the download async.
func (d *Downloader) DownloadAsync() {
select {
case d.downloadC <- struct{}{}:
consensusTriggeredDownloadCounterVec.With(d.promLabels()).Inc()
case <-time.After(100 * time.Millisecond):
}
}
// NumPeers returns the number of peers connected of a specific shard.
func (d *Downloader) NumPeers() int {
return d.syncProtocol.NumStreams()
}
// IsSyncing returns the current sync status
func (d *Downloader) SyncStatus() (bool, uint64, uint64) {
syncing, target := d.stagedSyncInstance.status.get()
if !syncing {
target = d.bc.CurrentBlock().NumberU64()
}
return syncing, target, 0
}
// SubscribeDownloadStarted subscribes download started
func (d *Downloader) SubscribeDownloadStarted(ch chan struct{}) event.Subscription {
d.stagedSyncInstance.evtDownloadStartedSubscribed = true
return d.stagedSyncInstance.evtDownloadStarted.Subscribe(ch)
}
// SubscribeDownloadFinished subscribes the download finished
func (d *Downloader) SubscribeDownloadFinished(ch chan struct{}) event.Subscription {
d.stagedSyncInstance.evtDownloadFinishedSubscribed = true
return d.stagedSyncInstance.evtDownloadFinished.Subscribe(ch)
}
// waitForBootFinish waits for stream manager to finish the initial discovery and have
// enough peers to start downloader
func (d *Downloader) waitForBootFinish() {
evtCh := make(chan streammanager.EvtStreamAdded, 1)
sub := d.syncProtocol.SubscribeAddStreamEvent(evtCh)
defer sub.Unsubscribe()
checkCh := make(chan struct{}, 1)
trigger := func() {
select {
case checkCh <- struct{}{}:
default:
}
}
trigger()
t := time.NewTicker(10 * time.Second)
defer t.Stop()
for {
select {
case <-t.C:
trigger()
case <-evtCh:
trigger()
case <-checkCh:
if d.syncProtocol.NumStreams() >= d.config.InitStreams {
return
}
case <-d.closeC:
return
}
}
}
func (d *Downloader) loop() {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
initSync := d.bc.ShardID() != shard.BeaconChainShardID
trigger := func() {
select {
case d.downloadC <- struct{}{}:
case <-time.After(100 * time.Millisecond):
}
}
go trigger()
for {
select {
case <-ticker.C:
go trigger()
case <-d.downloadC:
addedBN, err := d.stagedSyncInstance.doSync(d.ctx, initSync)
if err != nil {
//TODO: if there is a bad block which can't be resolved
if d.stagedSyncInstance.invalidBlock.Active {
numTriedStreams := len(d.stagedSyncInstance.invalidBlock.StreamID)
// if many streams couldn't solve it, then that's an unresolvable bad block
if numTriedStreams >= d.config.InitStreams {
if !d.stagedSyncInstance.invalidBlock.IsLogged {
fmt.Println("unresolvable bad block:", d.stagedSyncInstance.invalidBlock.Number)
d.stagedSyncInstance.invalidBlock.IsLogged = true
}
//TODO: if we don't have any new or untried stream in the list, sleep or panic
}
}
// If error happens, sleep 5 seconds and retry
d.logger.Error().
Err(err).
Bool("initSync", initSync).
Msg(WrapStagedSyncMsg("sync loop failed"))
go func() {
time.Sleep(5 * time.Second)
trigger()
}()
time.Sleep(1 * time.Second)
continue
}
d.logger.Info().Int("block added", addedBN).
Uint64("current height", d.bc.CurrentBlock().NumberU64()).
Bool("initSync", initSync).
Uint32("shard", d.bc.ShardID()).
Msg(WrapStagedSyncMsg("sync finished"))
if addedBN != 0 {
// If block number has been changed, trigger another sync
// and try to add last mile from pub-sub (blocking)
go trigger()
if d.bh != nil {
d.bh.insertSync()
}
}
d.stagedSyncInstance.initSync = false
initSync = false
case <-d.closeC:
return
}
}
}
var emptySigVerifyErr *sigVerifyErr
type sigVerifyErr struct {
err error
}
func (e *sigVerifyErr) Error() string {
return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error())
}
func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) {
for i, block := range blocks {
if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil {
return i, err
}
}
return len(blocks), nil
}
func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error {
var (
sigBytes bls.SerializedSignature
bitmap []byte
err error
)
if len(nextBlocks) > 0 {
// get commit sig from the next block
next := nextBlocks[0]
sigBytes = next.Header().LastCommitSignature()
bitmap = next.Header().LastCommitBitmap()
} else {
// get commit sig from current block
sigBytes, bitmap, err = chain.ParseCommitSigAndBitmap(block.GetCurrentCommitSig())
if err != nil {
return errors.Wrap(err, "parse commitSigAndBitmap")
}
}
if err := bc.Engine().VerifyHeaderSignature(bc, block.Header(), sigBytes, bitmap); err != nil {
return &sigVerifyErr{err}
}
if err := bc.Engine().VerifyHeader(bc, block.Header(), true); err != nil {
return errors.Wrap(err, "[VerifyHeader]")
}
if _, err := bc.InsertChain(types.Blocks{block}, false); err != nil {
return errors.Wrap(err, "[InsertChain]")
}
return nil
}

@ -0,0 +1,96 @@
package stagedstreamsync
import (
"github.com/harmony-one/abool"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/p2p"
)
// Downloaders is the set of downloaders
type Downloaders struct {
ds map[uint32]*Downloader
active *abool.AtomicBool
config Config
}
// NewDownloaders creates Downloaders for sync of multiple blockchains
func NewDownloaders(host p2p.Host, bcs []core.BlockChain, config Config) *Downloaders {
ds := make(map[uint32]*Downloader)
for _, bc := range bcs {
if bc == nil {
continue
}
if _, ok := ds[bc.ShardID()]; ok {
continue
}
ds[bc.ShardID()] = NewDownloader(host, bc, config)
}
return &Downloaders{
ds: ds,
active: abool.New(),
config: config,
}
}
// Start starts the downloaders
func (ds *Downloaders) Start() {
if ds.config.ServerOnly {
// Run in server only mode. Do not start downloaders.
return
}
ds.active.Set()
for _, d := range ds.ds {
d.Start()
}
}
// Close closes the downloaders
func (ds *Downloaders) Close() {
if ds.config.ServerOnly {
// Run in server only mode. Downloaders not started.
return
}
ds.active.UnSet()
for _, d := range ds.ds {
d.Close()
}
}
// DownloadAsync triggers a download
func (ds *Downloaders) DownloadAsync(shardID uint32) {
d, ok := ds.ds[shardID]
if !ok && d != nil {
d.DownloadAsync()
}
}
// GetShardDownloader returns the downloader with the given shard ID
func (ds *Downloaders) GetShardDownloader(shardID uint32) *Downloader {
return ds.ds[shardID]
}
// NumPeers returns the connected peers for each shard
func (ds *Downloaders) NumPeers() map[uint32]int {
res := make(map[uint32]int)
for sid, d := range ds.ds {
res[sid] = d.NumPeers()
}
return res
}
// SyncStatus returns whether the given shard is doing syncing task and the target block number
func (ds *Downloaders) SyncStatus(shardID uint32) (bool, uint64, uint64) {
d, ok := ds.ds[shardID]
if !ok {
return false, 0, 0
}
return d.SyncStatus()
}
// IsActive returns whether the downloader is active
func (ds *Downloaders) IsActive() bool {
return ds.active.IsSet()
}

@ -0,0 +1,56 @@
package stagedstreamsync
import (
"fmt"
)
// Errors ...
var (
ErrRegistrationFail = WrapStagedSyncError("registration failed")
ErrGetBlock = WrapStagedSyncError("get block failed")
ErrGetBlockHash = WrapStagedSyncError("get block hash failed")
ErrGetConsensusHashes = WrapStagedSyncError("get consensus hashes failed")
ErrGenStateSyncTaskQueue = WrapStagedSyncError("generate state sync task queue failed")
ErrDownloadBlocks = WrapStagedSyncError("get download blocks failed")
ErrUpdateBlockAndStatus = WrapStagedSyncError("update block and status failed")
ErrGenerateNewState = WrapStagedSyncError("get generate new state failed")
ErrFetchBlockHashProgressFail = WrapStagedSyncError("fetch cache progress for block hashes stage failed")
ErrFetchCachedBlockHashFail = WrapStagedSyncError("fetch cached block hashes failed")
ErrNotEnoughBlockHashes = WrapStagedSyncError("peers haven't sent all requested block hashes")
ErrRetrieveCachedProgressFail = WrapStagedSyncError("retrieving cache progress for block hashes stage failed")
ErrRetrieveCachedHashProgressFail = WrapStagedSyncError("retrieving cache progress for block hashes stage failed")
ErrSaveBlockHashesProgressFail = WrapStagedSyncError("saving progress for block hashes stage failed")
ErrSaveCachedBlockHashesProgressFail = WrapStagedSyncError("saving cache progress for block hashes stage failed")
ErrSavingCacheLastBlockHashFail = WrapStagedSyncError("saving cache last block hash for block hashes stage failed")
ErrCachingBlockHashFail = WrapStagedSyncError("caching downloaded block hashes failed")
ErrCommitTransactionFail = WrapStagedSyncError("failed to write db commit")
ErrUnexpectedNumberOfBlocks = WrapStagedSyncError("unexpected number of block delivered")
ErrSavingBodiesProgressFail = WrapStagedSyncError("saving progress for block bodies stage failed")
ErrAddTasksToQueueFail = WrapStagedSyncError("cannot add task to queue")
ErrSavingCachedBodiesProgressFail = WrapStagedSyncError("saving cache progress for blocks stage failed")
ErrRetrievingCachedBodiesProgressFail = WrapStagedSyncError("retrieving cache progress for blocks stage failed")
ErrNoConnectedPeers = WrapStagedSyncError("haven't connected to any peer yet!")
ErrNotEnoughConnectedPeers = WrapStagedSyncError("not enough connected peers")
ErrSaveStateProgressFail = WrapStagedSyncError("saving progress for block States stage failed")
ErrPruningCursorCreationFail = WrapStagedSyncError("failed to create cursor for pruning")
ErrInvalidBlockNumber = WrapStagedSyncError("invalid block number")
ErrInvalidBlockBytes = WrapStagedSyncError("invalid block bytes to insert into chain")
ErrAddTaskFailed = WrapStagedSyncError("cannot add task to queue")
ErrNodeNotEnoughBlockHashes = WrapStagedSyncError("some of the nodes didn't provide all block hashes")
ErrCachingBlocksFail = WrapStagedSyncError("caching downloaded block bodies failed")
ErrSaveBlocksFail = WrapStagedSyncError("save downloaded block bodies failed")
ErrStageNotFound = WrapStagedSyncError("stage not found")
ErrSomeNodesNotReady = WrapStagedSyncError("some nodes are not ready")
ErrSomeNodesBlockHashFail = WrapStagedSyncError("some nodes failed to download block hashes")
ErrMaxPeerHeightFail = WrapStagedSyncError("get max peer height failed")
)
// WrapStagedSyncError wraps errors for staged sync and returns error object
func WrapStagedSyncError(context string) error {
return fmt.Errorf("[STAGED_STREAM_SYNC]: %s", context)
}
// WrapStagedSyncMsg wraps message for staged sync and returns string
func WrapStagedSyncMsg(context string) string {
return fmt.Sprintf("[STAGED_STREAM_SYNC]: %s", context)
}

@ -0,0 +1,114 @@
package stagedstreamsync
import (
"encoding/binary"
"fmt"
"math"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/types"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/pkg/errors"
)
func marshalData(blockNumber uint64) []byte {
return encodeBigEndian(blockNumber)
}
func unmarshalData(data []byte) (uint64, error) {
if len(data) == 0 {
return 0, nil
}
if len(data) < 8 {
return 0, fmt.Errorf("value must be at least 8 bytes, got %d", len(data))
}
return binary.BigEndian.Uint64(data[:8]), nil
}
func encodeBigEndian(n uint64) []byte {
var v [8]byte
binary.BigEndian.PutUint64(v[:], n)
return v[:]
}
func divideCeil(x, y int) int {
fVal := float64(x) / float64(y)
return int(math.Ceil(fVal))
}
// computeBlockNumberByMaxVote computes the target block number by max vote.
func computeBlockNumberByMaxVote(votes map[sttypes.StreamID]uint64) uint64 {
var (
nm = make(map[uint64]int)
res uint64
maxCnt int
)
for _, bn := range votes {
_, ok := nm[bn]
if !ok {
nm[bn] = 0
}
nm[bn]++
cnt := nm[bn]
if cnt > maxCnt || (cnt == maxCnt && bn > res) {
res = bn
maxCnt = cnt
}
}
return res
}
func checkGetBlockByHashesResult(blocks []*types.Block, hashes []common.Hash) error {
if len(blocks) != len(hashes) {
return errors.New("unexpected number of getBlocksByHashes result")
}
for i, block := range blocks {
if block == nil {
return errors.New("nil block found")
}
if block.Hash() != hashes[i] {
return fmt.Errorf("unexpected block hash: %x / %x", block.Hash(), hashes[i])
}
}
return nil
}
func countHashMaxVote(m map[sttypes.StreamID]common.Hash, whitelist map[sttypes.StreamID]struct{}) (common.Hash, map[sttypes.StreamID]struct{}) {
var (
voteM = make(map[common.Hash]int)
res common.Hash
maxCnt = 0
)
for st, h := range m {
if len(whitelist) != 0 {
if _, ok := whitelist[st]; !ok {
continue
}
}
if _, ok := voteM[h]; !ok {
voteM[h] = 0
}
voteM[h]++
if voteM[h] > maxCnt {
maxCnt = voteM[h]
res = h
}
}
nextWl := make(map[sttypes.StreamID]struct{})
for st, h := range m {
if h != res {
continue
}
if len(whitelist) != 0 {
if _, ok := whitelist[st]; ok {
nextWl[st] = struct{}{}
}
} else {
nextWl[st] = struct{}{}
}
}
return res, nextWl
}

@ -0,0 +1,98 @@
package stagedstreamsync
import (
"fmt"
prom "github.com/harmony-one/harmony/api/service/prometheus"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
prom.PromRegistry().MustRegister(
consensusTriggeredDownloadCounterVec,
longRangeSyncedBlockCounterVec,
longRangeFailInsertedBlockCounterVec,
numShortRangeCounterVec,
numFailedDownloadCounterVec,
numBlocksInsertedShortRangeHistogramVec,
numBlocksInsertedBeaconHelperCounter,
)
}
var (
consensusTriggeredDownloadCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "consensus_trigger",
Help: "number of times consensus triggered download task",
},
[]string{"ShardID"},
)
longRangeSyncedBlockCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "num_blocks_synced_long_range",
Help: "number of blocks synced in long range sync",
},
[]string{"ShardID"},
)
longRangeFailInsertedBlockCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "num_blocks_failed_long_range",
Help: "number of blocks failed to insert into change in long range sync",
},
[]string{"ShardID", "error"},
)
numShortRangeCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "num_short_range",
Help: "number of short range sync is triggered",
},
[]string{"ShardID"},
)
numFailedDownloadCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "failed_download",
Help: "number of downloading is failed",
},
[]string{"ShardID", "error"},
)
numBlocksInsertedShortRangeHistogramVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "num_blocks_inserted_short_range",
Help: "number of blocks inserted for each short range sync",
// Buckets: 0, 1, 2, 4, +INF (capped at 10)
Buckets: prometheus.ExponentialBuckets(0.5, 2, 5),
},
[]string{"ShardID"},
)
numBlocksInsertedBeaconHelperCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "hmy",
Subsystem: "downloader",
Name: "num_blocks_inserted_beacon_helper",
Help: "number of blocks inserted from beacon helper",
},
)
)
func (d *Downloader) promLabels() prometheus.Labels {
sid := d.bc.ShardID()
return prometheus.Labels{"ShardID": fmt.Sprintf("%d", sid)}
}

@ -0,0 +1,30 @@
package stagedstreamsync
import (
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/p2p"
)
// StagedStreamSyncService is simply a adapter of downloaders, which support block synchronization
type StagedStreamSyncService struct {
Downloaders *Downloaders
}
// NewService creates a new downloader service
func NewService(host p2p.Host, bcs []core.BlockChain, config Config) *StagedStreamSyncService {
return &StagedStreamSyncService{
Downloaders: NewDownloaders(host, bcs, config),
}
}
// Start starts the service
func (s *StagedStreamSyncService) Start() error {
s.Downloaders.Start()
return nil
}
// Stop stops the service
func (s *StagedStreamSyncService) Stop() error {
s.Downloaders.Close()
return nil
}

@ -0,0 +1,221 @@
package stagedstreamsync
import (
"context"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/types"
syncProto "github.com/harmony-one/harmony/p2p/stream/protocols/sync"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/pkg/errors"
"github.com/rs/zerolog"
)
type srHelper struct {
syncProtocol syncProtocol
ctx context.Context
config Config
logger zerolog.Logger
}
func (sh *srHelper) getHashChain(bns []uint64) ([]common.Hash, []sttypes.StreamID, error) {
results := newBlockHashResults(bns)
var wg sync.WaitGroup
wg.Add(sh.config.Concurrency)
for i := 0; i != sh.config.Concurrency; i++ {
go func(index int) {
defer wg.Done()
hashes, stid, err := sh.doGetBlockHashesRequest(bns)
if err != nil {
sh.logger.Warn().Err(err).Str("StreamID", string(stid)).
Msg(WrapStagedSyncMsg("doGetBlockHashes return error"))
return
}
results.addResult(hashes, stid)
}(i)
}
wg.Wait()
select {
case <-sh.ctx.Done():
sh.logger.Info().Err(sh.ctx.Err()).Int("num blocks", results.numBlocksWithResults()).
Msg(WrapStagedSyncMsg("short range sync get hashes timed out"))
return nil, nil, sh.ctx.Err()
default:
}
hashChain, wl := results.computeLongestHashChain()
sh.logger.Info().Int("hashChain size", len(hashChain)).Int("whitelist", len(wl)).
Msg(WrapStagedSyncMsg("computeLongestHashChain result"))
return hashChain, wl, nil
}
func (sh *srHelper) getBlocksChain(bns []uint64) ([]*types.Block, sttypes.StreamID, error) {
return sh.doGetBlocksByNumbersRequest(bns)
}
func (sh *srHelper) getBlocksByHashes(hashes []common.Hash, whitelist []sttypes.StreamID) ([]*types.Block, []sttypes.StreamID, error) {
ctx, cancel := context.WithCancel(sh.ctx)
defer cancel()
m := newGetBlocksByHashManager(hashes, whitelist)
var (
wg sync.WaitGroup
gErr error
errLock sync.Mutex
)
concurrency := sh.config.Concurrency
if concurrency > m.numRequests() {
concurrency = m.numRequests()
}
wg.Add(concurrency)
for i := 0; i != concurrency; i++ {
go func(index int) {
defer wg.Done()
defer cancel() // it's ok to cancel context more than once
for {
if m.isDone() {
return
}
hashes, wl, err := m.getNextHashes()
if err != nil {
errLock.Lock()
gErr = err
errLock.Unlock()
return
}
if len(hashes) == 0 {
select {
case <-time.After(200 * time.Millisecond):
continue
case <-ctx.Done():
return
}
}
blocks, stid, err := sh.doGetBlocksByHashesRequest(ctx, hashes, wl)
if err != nil {
sh.logger.Warn().Err(err).
Str("StreamID", string(stid)).
Int("hashes", len(hashes)).
Int("index", index).
Msg(WrapStagedSyncMsg("getBlocksByHashes worker failed"))
m.handleResultError(hashes, stid)
} else {
m.addResult(hashes, blocks, stid)
}
}
}(i)
}
wg.Wait()
if gErr != nil {
return nil, nil, gErr
}
select {
case <-sh.ctx.Done():
res, _, _ := m.getResults()
sh.logger.Info().Err(sh.ctx.Err()).Int("num blocks", len(res)).
Msg(WrapStagedSyncMsg("short range sync get blocks timed out"))
return nil, nil, sh.ctx.Err()
default:
}
return m.getResults()
}
func (sh *srHelper) checkPrerequisites() error {
if sh.syncProtocol.NumStreams() < sh.config.Concurrency {
return errors.New("not enough streams")
}
return nil
}
func (sh *srHelper) prepareBlockHashNumbers(curNumber uint64, count int) []uint64 {
n := count
if count > BlockHashesPerRequest {
n = BlockHashesPerRequest
}
res := make([]uint64, 0, n)
for bn := curNumber + 1; bn <= curNumber+uint64(n); bn++ {
res = append(res, bn)
}
return res
}
func (sh *srHelper) doGetBlockHashesRequest(bns []uint64) ([]common.Hash, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(sh.ctx, 1*time.Second)
defer cancel()
hashes, stid, err := sh.syncProtocol.GetBlockHashes(ctx, bns)
if err != nil {
sh.logger.Warn().Err(err).
Interface("block numbers", bns).
Str("stream", string(stid)).
Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest"))
return nil, stid, err
}
if len(hashes) != len(bns) {
err := errors.New("unexpected get block hashes result delivered")
sh.logger.Warn().Err(err).
Str("stream", string(stid)).
Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest"))
sh.syncProtocol.StreamFailed(stid, "unexpected get block hashes result delivered")
return nil, stid, err
}
return hashes, stid, nil
}
func (sh *srHelper) doGetBlocksByNumbersRequest(bns []uint64) ([]*types.Block, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(sh.ctx, 10*time.Second)
defer cancel()
blocks, stid, err := sh.syncProtocol.GetBlocksByNumber(ctx, bns)
if err != nil {
sh.logger.Warn().Err(err).Str("stream", string(stid)).Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest"))
return nil, stid, err
}
return blocks, stid, nil
}
func (sh *srHelper) doGetBlocksByHashesRequest(ctx context.Context, hashes []common.Hash, wl []sttypes.StreamID) ([]*types.Block, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(sh.ctx, 10*time.Second)
defer cancel()
blocks, stid, err := sh.syncProtocol.GetBlocksByHashes(ctx, hashes,
syncProto.WithWhitelist(wl))
if err != nil {
sh.logger.Warn().Err(err).Str("stream", string(stid)).Msg("failed to getBlockByHashes")
return nil, stid, err
}
if err := checkGetBlockByHashesResult(blocks, hashes); err != nil {
sh.logger.Warn().Err(err).Str("stream", string(stid)).Msg(WrapStagedSyncMsg("failed to getBlockByHashes"))
sh.syncProtocol.StreamFailed(stid, "failed to getBlockByHashes")
return nil, stid, err
}
return blocks, stid, nil
}
func (sh *srHelper) removeStreams(sts []sttypes.StreamID) {
for _, st := range sts {
sh.syncProtocol.RemoveStream(st)
}
}
// blameAllStreams only not to blame all whitelisted streams when the it's not the last block signature verification failed.
func (sh *srHelper) blameAllStreams(blocks types.Blocks, errIndex int, err error) bool {
if errors.As(err, &emptySigVerifyErr) && errIndex == len(blocks)-1 {
return false
}
return true
}

@ -0,0 +1,112 @@
package stagedstreamsync
import (
"context"
"errors"
"github.com/ethereum/go-ethereum/common"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/ledgerwatch/erigon-lib/kv"
)
type ExecFunc func(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error
type StageHandler interface {
// Exec is the execution function for the stage to move forward.
// * firstCycle - is it the first cycle of syncing.
// * invalidBlockRevert - whether the execution is to solve the invalid block
// * s - is the current state of the stage and contains stage data.
// * reverter - if the stage needs to cause reverting, `reverter` methods can be used.
Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error
// Revert is the reverting logic of the stage.
// * firstCycle - is it the first cycle of syncing.
// * u - contains information about the revert itself.
// * s - represents the state of this stage at the beginning of revert.
Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) error
// CleanUp is the execution function for the stage to prune old data.
// * firstCycle - is it the first cycle of syncing.
// * p - is the current state of the stage and contains stage data.
CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) error
// SetStageContext updates the context for stage
SetStageContext(ctx context.Context)
}
// Stage is a single sync stage in staged sync.
type Stage struct {
// ID of the sync stage. Should not be empty and should be unique. It is recommended to prefix it with reverse domain to avoid clashes (`com.example.my-stage`).
ID SyncStageID
// Handler handles the logic for the stage
Handler StageHandler
// Description is a string that is shown in the logs.
Description string
// DisabledDescription shows in the log with a message if the stage is disabled. Here, you can show which command line flags should be provided to enable the page.
DisabledDescription string
// Disabled defines if the stage is disabled. It sets up when the stage is build by its `StageBuilder`.
Disabled bool
}
var ErrStopped = errors.New("stopped")
var ErrRevert = errors.New("unwound")
// StageState is the state of the stage.
type StageState struct {
state *StagedStreamSync
ID SyncStageID
BlockNumber uint64 // BlockNumber is the current block number of the stage at the beginning of the state execution.
}
func (s *StageState) LogPrefix() string { return s.state.LogPrefix() }
func (s *StageState) CurrentStageProgress(db kv.Getter) (uint64, error) {
return GetStageProgress(db, s.ID, s.state.isBeacon)
}
func (s *StageState) StageProgress(db kv.Getter, id SyncStageID) (uint64, error) {
return GetStageProgress(db, id, s.state.isBeacon)
}
// Update updates the stage state (current block number) in the database. Can be called multiple times during stage execution.
func (s *StageState) Update(db kv.Putter, newBlockNum uint64) error {
return SaveStageProgress(db, s.ID, s.state.isBeacon, newBlockNum)
}
func (s *StageState) UpdateCleanUp(db kv.Putter, blockNum uint64) error {
return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum)
}
// Reverter allows the stage to cause an revert.
type Reverter interface {
// RevertTo begins staged sync revert to the specified block.
RevertTo(revertPoint uint64, invalidBlockNumber uint64, invalidBlockHash common.Hash, invalidBlockStreamID sttypes.StreamID)
}
// RevertState contains the information about revert.
type RevertState struct {
ID SyncStageID
RevertPoint uint64 // RevertPoint is the block to revert to.
state *StagedStreamSync
}
func (u *RevertState) LogPrefix() string { return u.state.LogPrefix() }
// Done updates the DB state of the stage.
func (u *RevertState) Done(db kv.Putter) error {
return SaveStageProgress(db, u.ID, u.state.isBeacon, u.RevertPoint)
}
type CleanUpState struct {
ID SyncStageID
ForwardProgress uint64 // progress of stage forward move
CleanUpProgress uint64 // progress of stage prune move. after sync cycle it become equal to ForwardProgress by Done() method
state *StagedStreamSync
}
func (s *CleanUpState) LogPrefix() string { return s.state.LogPrefix() + " CleanUp" }
func (s *CleanUpState) Done(db kv.Putter) error {
return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, s.ForwardProgress)
}
func (s *CleanUpState) DoneAt(db kv.Putter, blockNum uint64) error {
return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum)
}

@ -0,0 +1,420 @@
package stagedstreamsync
import (
"context"
"fmt"
"sync"
"time"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/internal/utils"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/pkg/errors"
)
type StageBodies struct {
configs StageBodiesCfg
}
type StageBodiesCfg struct {
ctx context.Context
bc core.BlockChain
db kv.RwDB
blockDBs []kv.RwDB
concurrency int
protocol syncProtocol
isBeacon bool
logProgress bool
}
func NewStageBodies(cfg StageBodiesCfg) *StageBodies {
return &StageBodies{
configs: cfg,
}
}
func NewStageBodiesCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, logProgress bool) StageBodiesCfg {
return StageBodiesCfg{
ctx: ctx,
bc: bc,
db: db,
blockDBs: blockDBs,
concurrency: concurrency,
protocol: protocol,
isBeacon: isBeacon,
logProgress: logProgress,
}
}
func (b *StageBodies) SetStageContext(ctx context.Context) {
b.configs.ctx = ctx
}
// Exec progresses Bodies stage in the forward direction
func (b *StageBodies) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if invalidBlockRevert {
return b.redownloadBadBlock(s)
}
// for short range sync, skip this stage
if !s.state.initSync {
return nil
}
maxHeight := s.state.status.targetBN
currentHead := b.configs.bc.CurrentBlock().NumberU64()
if currentHead >= maxHeight {
return nil
}
currProgress := uint64(0)
targetHeight := s.state.currentCycle.TargetHeight
// isBeacon := s.state.isBeacon
// isLastCycle := targetHeight >= maxHeight
if errV := CreateView(b.configs.ctx, b.configs.db, tx, func(etx kv.Tx) error {
if currProgress, err = s.CurrentStageProgress(etx); err != nil {
return err
}
return nil
}); errV != nil {
return errV
}
if currProgress == 0 {
if err := b.cleanAllBlockDBs(); err != nil {
return err
}
currProgress = currentHead
}
if currProgress >= targetHeight {
return nil
}
// size := uint64(0)
startTime := time.Now()
// startBlock := currProgress
if b.configs.logProgress {
fmt.Print("\033[s") // save the cursor position
}
if useInternalTx {
var err error
tx, err = b.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
// Fetch blocks from neighbors
s.state.gbm = newBlockDownloadManager(tx, b.configs.bc, targetHeight, s.state.logger)
// Setup workers to fetch blocks from remote node
var wg sync.WaitGroup
for i := 0; i != s.state.config.Concurrency; i++ {
wg.Add(1)
go b.runBlockWorkerLoop(s.state.gbm, &wg, i, startTime)
}
wg.Wait()
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
// runBlockWorkerLoop creates a work loop for download blocks
func (b *StageBodies) runBlockWorkerLoop(gbm *blockDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) {
currentBlock := int(b.configs.bc.CurrentBlock().NumberU64())
defer wg.Done()
for {
select {
case <-b.configs.ctx.Done():
return
default:
}
batch := gbm.GetNextBatch()
if len(batch) == 0 {
select {
case <-b.configs.ctx.Done():
return
case <-time.After(100 * time.Millisecond):
return
}
}
blockBytes, sigBytes, stid, err := b.downloadRawBlocks(batch)
if err != nil {
if !errors.Is(err, context.Canceled) {
b.configs.protocol.StreamFailed(stid, "downloadRawBlocks failed")
}
utils.Logger().Error().
Err(err).
Str("stream", string(stid)).
Interface("block numbers", batch).
Msg(WrapStagedSyncMsg("downloadRawBlocks failed"))
err = errors.Wrap(err, "request error")
gbm.HandleRequestError(batch, err, stid)
} else {
if err = b.saveBlocks(gbm.tx, batch, blockBytes, sigBytes, loopID, stid); err != nil {
panic("[STAGED_STREAM_SYNC] saving downloaded blocks to db failed.")
}
gbm.HandleRequestResult(batch, blockBytes, sigBytes, loopID, stid)
if b.configs.logProgress {
//calculating block download speed
dt := time.Now().Sub(startTime).Seconds()
speed := float64(0)
if dt > 0 {
speed = float64(len(gbm.bdd)) / dt
}
blockSpeed := fmt.Sprintf("%.2f", speed)
fmt.Print("\033[u\033[K") // restore the cursor position and clear the line
fmt.Println("downloaded blocks:", currentBlock+len(gbm.bdd), "/", int(gbm.targetBN), "(", blockSpeed, "blocks/s", ")")
}
}
}
}
// redownloadBadBlock tries to redownload the bad block from other streams
func (b *StageBodies) redownloadBadBlock(s *StageState) error {
batch := make([]uint64, 1)
batch = append(batch, s.state.invalidBlock.Number)
for {
if b.configs.protocol.NumStreams() == 0 {
return errors.Errorf("re-download bad block from all streams failed")
}
blockBytes, sigBytes, stid, err := b.downloadRawBlocks(batch)
if err != nil {
if !errors.Is(err, context.Canceled) {
b.configs.protocol.StreamFailed(stid, "tried to re-download bad block from this stream, but downloadRawBlocks failed")
}
continue
}
isOneOfTheBadStreams := false
for _, id := range s.state.invalidBlock.StreamID {
if id == stid {
b.configs.protocol.RemoveStream(stid)
isOneOfTheBadStreams = true
break
}
}
if isOneOfTheBadStreams {
continue
}
s.state.gbm.SetDownloadDetails(batch, 0, stid)
if errU := b.configs.blockDBs[0].Update(context.Background(), func(tx kv.RwTx) error {
if err = b.saveBlocks(tx, batch, blockBytes, sigBytes, 0, stid); err != nil {
return errors.Errorf("[STAGED_STREAM_SYNC] saving re-downloaded bad block to db failed.")
}
return nil
}); errU != nil {
continue
}
break
}
return nil
}
func (b *StageBodies) downloadBlocks(bns []uint64) ([]*types.Block, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(b.configs.ctx, 10*time.Second)
defer cancel()
blocks, stid, err := b.configs.protocol.GetBlocksByNumber(ctx, bns)
if err != nil {
return nil, stid, err
}
if err := validateGetBlocksResult(bns, blocks); err != nil {
return nil, stid, err
}
return blocks, stid, nil
}
func (b *StageBodies) downloadRawBlocks(bns []uint64) ([][]byte, [][]byte, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(b.configs.ctx, 10*time.Second)
defer cancel()
return b.configs.protocol.GetRawBlocksByNumber(ctx, bns)
}
func validateGetBlocksResult(requested []uint64, result []*types.Block) error {
if len(result) != len(requested) {
return fmt.Errorf("unexpected number of blocks delivered: %v / %v", len(result), len(requested))
}
for i, block := range result {
if block != nil && block.NumberU64() != requested[i] {
return fmt.Errorf("block with unexpected number delivered: %v / %v", block.NumberU64(), requested[i])
}
}
return nil
}
// saveBlocks saves the blocks into db
func (b *StageBodies) saveBlocks(tx kv.RwTx, bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, stid sttypes.StreamID) error {
tx, err := b.configs.blockDBs[loopID].BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
for i := uint64(0); i < uint64(len(blockBytes)); i++ {
block := blockBytes[i]
sig := sigBytes[i]
if block == nil {
continue
}
blkKey := marshalData(bns[i])
if err := tx.Put(BlocksBucket, blkKey, block); err != nil {
utils.Logger().Error().
Err(err).
Uint64("block height", bns[i]).
Msg("[STAGED_STREAM_SYNC] adding block to db failed")
return err
}
// sigKey := []byte("s" + string(bns[i]))
if err := tx.Put(BlockSignaturesBucket, blkKey, sig); err != nil {
utils.Logger().Error().
Err(err).
Uint64("block height", bns[i]).
Msg("[STAGED_STREAM_SYNC] adding block sig to db failed")
return err
}
}
if err := tx.Commit(); err != nil {
return err
}
return nil
}
func (b *StageBodies) saveProgress(s *StageState, progress uint64, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = b.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
// save progress
if err = s.Update(tx, progress); err != nil {
utils.Logger().Error().
Err(err).
Msgf("[STAGED_SYNC] saving progress for block bodies stage failed")
return ErrSavingBodiesProgressFail
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (b *StageBodies) cleanBlocksDB(loopID int) (err error) {
tx, errb := b.configs.blockDBs[loopID].BeginRw(b.configs.ctx)
if errb != nil {
return errb
}
defer tx.Rollback()
// clean block bodies db
if err = tx.ClearBucket(BlocksBucket); err != nil {
utils.Logger().Error().
Err(err).
Msgf("[STAGED_STREAM_SYNC] clear blocks bucket after revert failed")
return err
}
// clean block signatures db
if err = tx.ClearBucket(BlockSignaturesBucket); err != nil {
utils.Logger().Error().
Err(err).
Msgf("[STAGED_STREAM_SYNC] clear block signatures bucket after revert failed")
return err
}
if err = tx.Commit(); err != nil {
return err
}
return nil
}
func (b *StageBodies) cleanAllBlockDBs() (err error) {
//clean all blocks DBs
for i := 0; i < b.configs.concurrency; i++ {
if err := b.cleanBlocksDB(i); err != nil {
return err
}
}
return nil
}
func (b *StageBodies) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
//clean all blocks DBs
if err := b.cleanAllBlockDBs(); err != nil {
return err
}
useInternalTx := tx == nil
if useInternalTx {
tx, err = b.configs.db.BeginRw(b.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
// save progress
currentHead := b.configs.bc.CurrentBlock().NumberU64()
if err = s.Update(tx, currentHead); err != nil {
utils.Logger().Error().
Err(err).
Msgf("[STAGED_SYNC] saving progress for block bodies stage after revert failed")
return err
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}
func (b *StageBodies) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
//clean all blocks DBs
if err := b.cleanAllBlockDBs(); err != nil {
return err
}
return nil
}

@ -0,0 +1,198 @@
package stagedstreamsync
import (
"context"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/shard"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/pkg/errors"
)
type StageEpoch struct {
configs StageEpochCfg
}
type StageEpochCfg struct {
ctx context.Context
bc core.BlockChain
db kv.RwDB
}
func NewStageEpoch(cfg StageEpochCfg) *StageEpoch {
return &StageEpoch{
configs: cfg,
}
}
func NewStageEpochCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageEpochCfg {
return StageEpochCfg{
ctx: ctx,
bc: bc,
db: db,
}
}
func (sr *StageEpoch) SetStageContext(ctx context.Context) {
sr.configs.ctx = ctx
}
func (sr *StageEpoch) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error {
// no need to update epoch chain if we are redoing the stages because of bad block
if invalidBlockRevert {
return nil
}
// for long range sync, skip this stage
if s.state.initSync {
return nil
}
if _, ok := sr.configs.bc.(*core.EpochChain); !ok {
return nil
}
// doShortRangeSyncForEpochSync
n, err := sr.doShortRangeSyncForEpochSync(s)
s.state.inserted = n
if err != nil {
return err
}
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = sr.configs.db.BeginRw(sr.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (sr *StageEpoch) doShortRangeSyncForEpochSync(s *StageState) (int, error) {
numShortRangeCounterVec.With(s.state.promLabels()).Inc()
srCtx, cancel := context.WithTimeout(s.state.ctx, ShortRangeTimeout)
defer cancel()
//TODO: merge srHelper with StageEpochConfig
sh := &srHelper{
syncProtocol: s.state.protocol,
ctx: srCtx,
config: s.state.config,
logger: utils.Logger().With().Str("mode", "epoch chain short range").Logger(),
}
if err := sh.checkPrerequisites(); err != nil {
return 0, errors.Wrap(err, "prerequisite")
}
curBN := s.state.bc.CurrentBlock().NumberU64()
bns := make([]uint64, 0, BlocksPerRequest)
// in epoch chain, we have only the last block of each epoch, so, the current
// block's epoch number shows the last epoch we have. We should start
// from next epoch then
loopEpoch := s.state.bc.CurrentHeader().Epoch().Uint64() + 1
for len(bns) < BlocksPerRequest {
blockNum := shard.Schedule.EpochLastBlock(loopEpoch)
if blockNum > curBN {
bns = append(bns, blockNum)
}
loopEpoch = loopEpoch + 1
}
if len(bns) == 0 {
return 0, nil
}
////////////////////////////////////////////////////////
hashChain, whitelist, err := sh.getHashChain(bns)
if err != nil {
return 0, errors.Wrap(err, "getHashChain")
}
if len(hashChain) == 0 {
// short circuit for no sync is needed
return 0, nil
}
blocks, streamID, err := sh.getBlocksByHashes(hashChain, whitelist)
if err != nil {
utils.Logger().Warn().Err(err).Msg("epoch sync getBlocksByHashes failed")
if !errors.Is(err, context.Canceled) {
sh.removeStreams(whitelist) // Remote nodes cannot provide blocks with target hashes
}
return 0, errors.Wrap(err, "epoch sync getBlocksByHashes")
}
///////////////////////////////////////////////////////
// TODO: check this
// blocks, streamID, err := sh.getBlocksChain(bns)
// if err != nil {
// return 0, errors.Wrap(err, "getHashChain")
// }
///////////////////////////////////////////////////////
if len(blocks) == 0 {
// short circuit for no sync is needed
return 0, nil
}
n, err := s.state.bc.InsertChain(blocks, true)
numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n))
if err != nil {
utils.Logger().Info().Err(err).Int("blocks inserted", n).Msg("Insert block failed")
sh.removeStreams(streamID) // Data provided by remote nodes is corrupted
return n, err
}
if n > 0 {
utils.Logger().Info().Int("blocks inserted", n).Msg("Insert block success")
}
return n, nil
}
func (sr *StageEpoch) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = sr.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (sr *StageEpoch) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = sr.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,114 @@
package stagedstreamsync
import (
"context"
"github.com/ledgerwatch/erigon-lib/kv"
)
type StageFinish struct {
configs StageFinishCfg
}
type StageFinishCfg struct {
ctx context.Context
db kv.RwDB
}
func NewStageFinish(cfg StageFinishCfg) *StageFinish {
return &StageFinish{
configs: cfg,
}
}
func NewStageFinishCfg(ctx context.Context, db kv.RwDB) StageFinishCfg {
return StageFinishCfg{
ctx: ctx,
db: db,
}
}
func (finish *StageFinish) SetStageContext(ctx context.Context) {
finish.configs.ctx = ctx
}
func (finish *StageFinish) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error {
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = finish.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
// TODO: prepare indices (useful for RPC) and finalize
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (bh *StageFinish) clearBucket(tx kv.RwTx, isBeacon bool) error {
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = bh.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (finish *StageFinish) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = finish.configs.db.BeginRw(finish.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}
func (finish *StageFinish) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = finish.configs.db.BeginRw(finish.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,157 @@
package stagedstreamsync
import (
"context"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/internal/utils"
"github.com/ledgerwatch/erigon-lib/kv"
)
type StageHeads struct {
configs StageHeadsCfg
}
type StageHeadsCfg struct {
ctx context.Context
bc core.BlockChain
db kv.RwDB
}
func NewStageHeads(cfg StageHeadsCfg) *StageHeads {
return &StageHeads{
configs: cfg,
}
}
func NewStageHeadersCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageHeadsCfg {
return StageHeadsCfg{
ctx: ctx,
bc: bc,
db: db,
}
}
func (heads *StageHeads) SetStageContext(ctx context.Context) {
heads.configs.ctx = ctx
}
func (heads *StageHeads) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error {
// no need to update target if we are redoing the stages because of bad block
if invalidBlockRevert {
return nil
}
// no need for short range sync
if !s.state.initSync {
return nil
}
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = heads.configs.db.BeginRw(heads.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
maxHeight := s.state.status.targetBN
maxBlocksPerSyncCycle := uint64(1024) // TODO: should be in config -> s.state.MaxBlocksPerSyncCycle
currentHeight := heads.configs.bc.CurrentBlock().NumberU64()
s.state.currentCycle.TargetHeight = maxHeight
targetHeight := uint64(0)
if errV := CreateView(heads.configs.ctx, heads.configs.db, tx, func(etx kv.Tx) (err error) {
if targetHeight, err = s.CurrentStageProgress(etx); err != nil {
return err
}
return nil
}); errV != nil {
return errV
}
if currentHeight >= maxHeight {
utils.Logger().Info().Uint64("current number", currentHeight).Uint64("target number", maxHeight).
Msg(WrapStagedSyncMsg("early return of long range sync"))
return nil
}
// if current height is ahead of target height, we need recalculate target height
if currentHeight >= targetHeight {
if maxHeight <= currentHeight {
return nil
}
utils.Logger().Info().
Uint64("max blocks per sync cycle", maxBlocksPerSyncCycle).
Uint64("maxPeersHeight", maxHeight).
Msgf(WrapStagedSyncMsg("current height is ahead of target height, target height is readjusted to max peers height"))
targetHeight = maxHeight
}
if targetHeight > maxHeight {
targetHeight = maxHeight
}
if maxBlocksPerSyncCycle > 0 && targetHeight-currentHeight > maxBlocksPerSyncCycle {
targetHeight = currentHeight + maxBlocksPerSyncCycle
}
s.state.currentCycle.TargetHeight = targetHeight
if err := s.Update(tx, targetHeight); err != nil {
utils.Logger().Error().
Err(err).
Msgf(WrapStagedSyncMsg("saving progress for headers stage failed"))
return err
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (heads *StageHeads) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = heads.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (heads *StageHeads) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = heads.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,205 @@
package stagedstreamsync
import (
"context"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/internal/utils"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/pkg/errors"
)
type StageShortRange struct {
configs StageShortRangeCfg
}
type StageShortRangeCfg struct {
ctx context.Context
bc core.BlockChain
db kv.RwDB
}
func NewStageShortRange(cfg StageShortRangeCfg) *StageShortRange {
return &StageShortRange{
configs: cfg,
}
}
func NewStageShortRangeCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageShortRangeCfg {
return StageShortRangeCfg{
ctx: ctx,
bc: bc,
db: db,
}
}
func (sr *StageShortRange) SetStageContext(ctx context.Context) {
sr.configs.ctx = ctx
}
func (sr *StageShortRange) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error {
// no need to do short range if we are redoing the stages because of bad block
if invalidBlockRevert {
return nil
}
// for long range sync, skip this stage
if s.state.initSync {
return nil
}
if _, ok := sr.configs.bc.(*core.EpochChain); ok {
return nil
}
curBN := sr.configs.bc.CurrentBlock().NumberU64()
if curBN >= s.state.status.targetBN {
return nil
}
// do short range sync
n, err := sr.doShortRangeSync(s)
s.state.inserted = n
if err != nil {
return err
}
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = sr.configs.db.BeginRw(sr.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
// doShortRangeSync does the short range sync.
// Compared with long range sync, short range sync is more focused on syncing to the latest block.
// It consist of 3 steps:
// 1. Obtain the block hashes and compute the longest hash chain..
// 2. Get blocks by hashes from computed hash chain.
// 3. Insert the blocks to blockchain.
func (sr *StageShortRange) doShortRangeSync(s *StageState) (int, error) {
numShortRangeCounterVec.With(s.state.promLabels()).Inc()
srCtx, cancel := context.WithTimeout(s.state.ctx, ShortRangeTimeout)
defer cancel()
sh := &srHelper{
syncProtocol: s.state.protocol,
ctx: srCtx,
config: s.state.config,
logger: utils.Logger().With().Str("mode", "short range").Logger(),
}
if err := sh.checkPrerequisites(); err != nil {
return 0, errors.Wrap(err, "prerequisite")
}
curBN := sr.configs.bc.CurrentBlock().NumberU64()
blkCount := int(s.state.status.targetBN) - int(curBN)
blkNums := sh.prepareBlockHashNumbers(curBN, blkCount)
hashChain, whitelist, err := sh.getHashChain(blkNums)
if err != nil {
return 0, errors.Wrap(err, "getHashChain")
}
if len(hashChain) == 0 {
// short circuit for no sync is needed
return 0, nil
}
expEndBN := curBN + uint64(len(hashChain))
utils.Logger().Info().Uint64("current number", curBN).
Uint64("target number", expEndBN).
Interface("hashChain", hashChain).
Msg("short range start syncing")
s.state.status.setTargetBN(expEndBN)
s.state.status.startSyncing()
defer func() {
utils.Logger().Info().Msg("short range finished syncing")
s.state.status.finishSyncing()
}()
blocks, stids, err := sh.getBlocksByHashes(hashChain, whitelist)
if err != nil {
utils.Logger().Warn().Err(err).Msg("getBlocksByHashes failed")
if !errors.Is(err, context.Canceled) {
sh.removeStreams(whitelist) // Remote nodes cannot provide blocks with target hashes
}
return 0, errors.Wrap(err, "getBlocksByHashes")
}
utils.Logger().Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result")
n, err := verifyAndInsertBlocks(sr.configs.bc, blocks)
numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n))
if err != nil {
utils.Logger().Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed")
if sh.blameAllStreams(blocks, n, err) {
sh.removeStreams(whitelist) // Data provided by remote nodes is corrupted
} else {
// It is the last block gives a wrong commit sig. Blame the provider of the last block.
st2Blame := stids[len(stids)-1]
sh.removeStreams([]sttypes.StreamID{st2Blame})
}
return n, err
}
utils.Logger().Info().Err(err).Int("blocks inserted", n).Msg("Insert block success")
return n, nil
}
func (sr *StageShortRange) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = sr.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (sr *StageShortRange) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = sr.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,295 @@
package stagedstreamsync
import (
"context"
"fmt"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/rlp"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/internal/utils"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/prometheus/client_golang/prometheus"
"github.com/rs/zerolog"
)
type StageStates struct {
configs StageStatesCfg
}
type StageStatesCfg struct {
ctx context.Context
bc core.BlockChain
db kv.RwDB
blockDBs []kv.RwDB
concurrency int
logger zerolog.Logger
logProgress bool
}
func NewStageStates(cfg StageStatesCfg) *StageStates {
return &StageStates{
configs: cfg,
}
}
func NewStageStatesCfg(ctx context.Context,
bc core.BlockChain,
db kv.RwDB,
blockDBs []kv.RwDB,
concurrency int,
logger zerolog.Logger,
logProgress bool) StageStatesCfg {
return StageStatesCfg{
ctx: ctx,
bc: bc,
db: db,
blockDBs: blockDBs,
concurrency: concurrency,
logger: logger,
logProgress: logProgress,
}
}
func (stg *StageStates) SetStageContext(ctx context.Context) {
stg.configs.ctx = ctx
}
// Exec progresses States stage in the forward direction
func (stg *StageStates) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) {
// for short range sync, skip this step
if !s.state.initSync {
return nil
}
maxHeight := s.state.status.targetBN
currentHead := stg.configs.bc.CurrentBlock().NumberU64()
if currentHead >= maxHeight {
return nil
}
currProgress := stg.configs.bc.CurrentBlock().NumberU64()
targetHeight := s.state.currentCycle.TargetHeight
if currProgress >= targetHeight {
return nil
}
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = stg.configs.db.BeginRw(stg.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
// isLastCycle := targetHeight >= maxHeight
startTime := time.Now()
startBlock := currProgress
pl := s.state.promLabels()
gbm := s.state.gbm
// prepare db transactions
txs := make([]kv.RwTx, stg.configs.concurrency)
for i := 0; i < stg.configs.concurrency; i++ {
txs[i], err = stg.configs.blockDBs[i].BeginRw(context.Background())
if err != nil {
return err
}
}
defer func() {
for i := 0; i < stg.configs.concurrency; i++ {
txs[i].Rollback()
}
}()
if stg.configs.logProgress {
fmt.Print("\033[s") // save the cursor position
}
for i := currProgress + 1; i <= targetHeight; i++ {
blkKey := marshalData(i)
loopID, streamID := gbm.GetDownloadDetails(i)
blockBytes, err := txs[loopID].GetOne(BlocksBucket, blkKey)
if err != nil {
return err
}
sigBytes, err := txs[loopID].GetOne(BlockSignaturesBucket, blkKey)
if err != nil {
return err
}
// if block size is invalid, we have to break the updating state loop
// we don't need to do rollback, because the latest batch haven't added to chain yet
sz := len(blockBytes)
if sz <= 1 {
utils.Logger().Error().
Uint64("block number", i).
Msg("block size invalid")
invalidBlockHash := common.Hash{}
s.state.protocol.StreamFailed(streamID, "zero bytes block is received from stream")
reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID)
return ErrInvalidBlockBytes
}
var block *types.Block
if err := rlp.DecodeBytes(blockBytes, &block); err != nil {
utils.Logger().Error().
Uint64("block number", i).
Msg("block size invalid")
s.state.protocol.StreamFailed(streamID, "invalid block is received from stream")
invalidBlockHash := common.Hash{}
reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID)
return ErrInvalidBlockBytes
}
if sigBytes != nil {
block.SetCurrentCommitSig(sigBytes)
}
if block.NumberU64() != i {
s.state.protocol.StreamFailed(streamID, "invalid block with unmatched number is received from stream")
invalidBlockHash := block.Hash()
reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID)
return ErrInvalidBlockNumber
}
if err := verifyAndInsertBlock(stg.configs.bc, block); err != nil {
stg.configs.logger.Warn().Err(err).Uint64("cycle target block", targetHeight).
Uint64("block number", block.NumberU64()).
Msg(WrapStagedSyncMsg("insert blocks failed in long range"))
s.state.protocol.StreamFailed(streamID, "unverifiable invalid block is received from stream")
invalidBlockHash := block.Hash()
reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), block.NumberU64(), invalidBlockHash, streamID)
pl["error"] = err.Error()
longRangeFailInsertedBlockCounterVec.With(pl).Inc()
return err
}
if invalidBlockRevert {
if s.state.invalidBlock.Number == i {
s.state.invalidBlock.resolve()
}
}
s.state.inserted++
longRangeSyncedBlockCounterVec.With(pl).Inc()
utils.Logger().Info().
Uint64("blockHeight", block.NumberU64()).
Uint64("blockEpoch", block.Epoch().Uint64()).
Str("blockHex", block.Hash().Hex()).
Uint32("ShardID", block.ShardID()).
Msg("[STAGED_STREAM_SYNC] New Block Added to Blockchain")
// update cur progress
currProgress = stg.configs.bc.CurrentBlock().NumberU64()
for i, tx := range block.StakingTransactions() {
utils.Logger().Info().
Msgf(
"StakingTxn %d: %s, %v", i, tx.StakingType().String(), tx.StakingMessage(),
)
}
// log the stage progress in console
if stg.configs.logProgress {
//calculating block speed
dt := time.Now().Sub(startTime).Seconds()
speed := float64(0)
if dt > 0 {
speed = float64(currProgress-startBlock) / dt
}
blockSpeed := fmt.Sprintf("%.2f", speed)
fmt.Print("\033[u\033[K") // restore the cursor position and clear the line
fmt.Println("insert blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")")
}
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (stg *StageStates) insertChain(gbm *blockDownloadManager,
protocol syncProtocol,
lbls prometheus.Labels,
targetBN uint64) {
}
func (stg *StageStates) saveProgress(s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
var err error
tx, err = stg.configs.db.BeginRw(context.Background())
if err != nil {
return err
}
defer tx.Rollback()
}
// save progress
if err = s.Update(tx, stg.configs.bc.CurrentBlock().NumberU64()); err != nil {
utils.Logger().Error().
Err(err).
Msgf("[STAGED_SYNC] saving progress for block States stage failed")
return ErrSaveStateProgressFail
}
if useInternalTx {
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func (stg *StageStates) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = stg.configs.db.BeginRw(stg.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if err = u.Done(tx); err != nil {
return err
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}
func (stg *StageStates) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) {
useInternalTx := tx == nil
if useInternalTx {
tx, err = stg.configs.db.BeginRw(stg.configs.ctx)
if err != nil {
return err
}
defer tx.Rollback()
}
if useInternalTx {
if err = tx.Commit(); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,597 @@
package stagedstreamsync
import (
"context"
"fmt"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/event"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/internal/utils"
syncproto "github.com/harmony-one/harmony/p2p/stream/protocols/sync"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/prometheus/client_golang/prometheus"
"github.com/rs/zerolog"
)
type InvalidBlock struct {
Active bool
Number uint64
Hash common.Hash
IsLogged bool
StreamID []sttypes.StreamID
}
func (ib *InvalidBlock) set(num uint64, hash common.Hash, resetBadStreams bool) {
ib.Active = true
ib.IsLogged = false
ib.Number = num
ib.Hash = hash
if resetBadStreams {
ib.StreamID = make([]sttypes.StreamID, 0)
}
}
func (ib *InvalidBlock) resolve() {
ib.Active = false
ib.IsLogged = false
ib.Number = 0
ib.Hash = common.Hash{}
ib.StreamID = ib.StreamID[:0]
}
func (ib *InvalidBlock) addBadStream(bsID sttypes.StreamID) {
// only add uniques IDs
for _, stID := range ib.StreamID {
if stID == bsID {
return
}
}
ib.StreamID = append(ib.StreamID, bsID)
}
type StagedStreamSync struct {
ctx context.Context
bc core.BlockChain
isBeacon bool
isExplorer bool
db kv.RwDB
protocol syncProtocol
gbm *blockDownloadManager // initialized when finished get block number
inserted int
config Config
logger zerolog.Logger
status status //TODO: merge this with currentSyncCycle
initSync bool // if sets to true, node start long range syncing
UseMemDB bool
revertPoint *uint64 // used to run stages
prevRevertPoint *uint64 // used to get value from outside of staged sync after cycle (for example to notify RPCDaemon)
invalidBlock InvalidBlock
currentStage uint
LogProgress bool
currentCycle SyncCycle // current cycle
stages []*Stage
revertOrder []*Stage
pruningOrder []*Stage
timings []Timing
logPrefixes []string
evtDownloadFinished event.Feed // channel for each download task finished
evtDownloadFinishedSubscribed bool
evtDownloadStarted event.Feed // channel for each download has started
evtDownloadStartedSubscribed bool
}
// BlockWithSig the serialization structure for request DownloaderRequest_BLOCKWITHSIG
// The block is encoded as block + commit signature
type BlockWithSig struct {
Block *types.Block
CommitSigAndBitmap []byte
}
type Timing struct {
isRevert bool
isCleanUp bool
stage SyncStageID
took time.Duration
}
type SyncCycle struct {
Number uint64
TargetHeight uint64
lock sync.RWMutex
}
func (s *StagedStreamSync) Len() int { return len(s.stages) }
func (s *StagedStreamSync) Context() context.Context { return s.ctx }
func (s *StagedStreamSync) Blockchain() core.BlockChain { return s.bc }
func (s *StagedStreamSync) DB() kv.RwDB { return s.db }
func (s *StagedStreamSync) IsBeacon() bool { return s.isBeacon }
func (s *StagedStreamSync) IsExplorer() bool { return s.isExplorer }
func (s *StagedStreamSync) LogPrefix() string {
if s == nil {
return ""
}
return s.logPrefixes[s.currentStage]
}
func (s *StagedStreamSync) PrevRevertPoint() *uint64 { return s.prevRevertPoint }
func (s *StagedStreamSync) NewRevertState(id SyncStageID, revertPoint uint64) *RevertState {
return &RevertState{id, revertPoint, s}
}
func (s *StagedStreamSync) CleanUpStageState(id SyncStageID, forwardProgress uint64, tx kv.Tx, db kv.RwDB) (*CleanUpState, error) {
var pruneProgress uint64
var err error
if errV := CreateView(context.Background(), db, tx, func(tx kv.Tx) error {
pruneProgress, err = GetStageCleanUpProgress(tx, id, s.isBeacon)
if err != nil {
return err
}
return nil
}); errV != nil {
return nil, errV
}
return &CleanUpState{id, forwardProgress, pruneProgress, s}, nil
}
func (s *StagedStreamSync) NextStage() {
if s == nil {
return
}
s.currentStage++
}
// IsBefore returns true if stage1 goes before stage2 in staged sync
func (s *StagedStreamSync) IsBefore(stage1, stage2 SyncStageID) bool {
idx1 := -1
idx2 := -1
for i, stage := range s.stages {
if stage.ID == stage1 {
idx1 = i
}
if stage.ID == stage2 {
idx2 = i
}
}
return idx1 < idx2
}
// IsAfter returns true if stage1 goes after stage2 in staged sync
func (s *StagedStreamSync) IsAfter(stage1, stage2 SyncStageID) bool {
idx1 := -1
idx2 := -1
for i, stage := range s.stages {
if stage.ID == stage1 {
idx1 = i
}
if stage.ID == stage2 {
idx2 = i
}
}
return idx1 > idx2
}
func (s *StagedStreamSync) RevertTo(revertPoint uint64, invalidBlockNumber uint64, invalidBlockHash common.Hash, invalidBlockStreamID sttypes.StreamID) {
utils.Logger().Info().
Interface("invalidBlockNumber", invalidBlockNumber).
Interface("invalidBlockHash", invalidBlockHash).
Interface("invalidBlockStreamID", invalidBlockStreamID).
Uint64("revertPoint", revertPoint).
Msgf(WrapStagedSyncMsg("Reverting blocks"))
s.revertPoint = &revertPoint
if invalidBlockNumber > 0 || invalidBlockHash != (common.Hash{}) {
resetBadStreams := !s.invalidBlock.Active
s.invalidBlock.set(invalidBlockNumber, invalidBlockHash, resetBadStreams)
s.invalidBlock.addBadStream(invalidBlockStreamID)
}
}
func (s *StagedStreamSync) Done() {
s.currentStage = uint(len(s.stages))
s.revertPoint = nil
}
func (s *StagedStreamSync) IsDone() bool {
return s.currentStage >= uint(len(s.stages)) && s.revertPoint == nil
}
func (s *StagedStreamSync) SetCurrentStage(id SyncStageID) error {
for i, stage := range s.stages {
if stage.ID == id {
s.currentStage = uint(i)
return nil
}
}
return ErrStageNotFound
}
func (s *StagedStreamSync) StageState(stage SyncStageID, tx kv.Tx, db kv.RwDB) (*StageState, error) {
var blockNum uint64
var err error
if errV := CreateView(context.Background(), db, tx, func(rtx kv.Tx) error {
blockNum, err = GetStageProgress(rtx, stage, s.isBeacon)
if err != nil {
return err
}
return nil
}); errV != nil {
return nil, errV
}
return &StageState{s, stage, blockNum}, nil
}
func (s *StagedStreamSync) cleanUp(fromStage int, db kv.RwDB, tx kv.RwTx, firstCycle bool) error {
found := false
for i := 0; i < len(s.pruningOrder); i++ {
if s.pruningOrder[i].ID == s.stages[fromStage].ID {
found = true
}
if !found || s.pruningOrder[i] == nil || s.pruningOrder[i].Disabled {
continue
}
if err := s.pruneStage(firstCycle, s.pruningOrder[i], db, tx); err != nil {
panic(err)
}
}
return nil
}
func New(ctx context.Context,
bc core.BlockChain,
db kv.RwDB,
stagesList []*Stage,
isBeacon bool,
protocol syncProtocol,
useMemDB bool,
config Config,
logger zerolog.Logger,
) *StagedStreamSync {
revertStages := make([]*Stage, len(stagesList))
for i, stageIndex := range DefaultRevertOrder {
for _, s := range stagesList {
if s.ID == stageIndex {
revertStages[i] = s
break
}
}
}
pruneStages := make([]*Stage, len(stagesList))
for i, stageIndex := range DefaultCleanUpOrder {
for _, s := range stagesList {
if s.ID == stageIndex {
pruneStages[i] = s
break
}
}
}
logPrefixes := make([]string, len(stagesList))
for i := range stagesList {
logPrefixes[i] = fmt.Sprintf("%d/%d %s", i+1, len(stagesList), stagesList[i].ID)
}
status := newStatus()
return &StagedStreamSync{
ctx: ctx,
bc: bc,
isBeacon: isBeacon,
db: db,
protocol: protocol,
gbm: nil,
status: status,
inserted: 0,
config: config,
logger: logger,
stages: stagesList,
currentStage: 0,
revertOrder: revertStages,
pruningOrder: pruneStages,
logPrefixes: logPrefixes,
UseMemDB: useMemDB,
}
}
func (s *StagedStreamSync) doGetCurrentNumberRequest() (uint64, sttypes.StreamID, error) {
ctx, cancel := context.WithTimeout(s.ctx, 10*time.Second)
defer cancel()
bn, stid, err := s.protocol.GetCurrentBlockNumber(ctx, syncproto.WithHighPriority())
if err != nil {
return 0, stid, err
}
return bn, stid, nil
}
func (s *StagedStreamSync) promLabels() prometheus.Labels {
sid := s.bc.ShardID()
return prometheus.Labels{"ShardID": fmt.Sprintf("%d", sid)}
}
func (s *StagedStreamSync) checkHaveEnoughStreams() error {
numStreams := s.protocol.NumStreams()
if numStreams < s.config.MinStreams {
return fmt.Errorf("number of streams smaller than minimum: %v < %v",
numStreams, s.config.MinStreams)
}
return nil
}
func (s *StagedStreamSync) SetNewContext(ctx context.Context) error {
for _, s := range s.stages {
s.Handler.SetStageContext(ctx)
}
return nil
}
func (s *StagedStreamSync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error {
s.prevRevertPoint = nil
s.timings = s.timings[:0]
for !s.IsDone() {
if s.revertPoint != nil {
s.prevRevertPoint = s.revertPoint
s.revertPoint = nil
if !s.invalidBlock.Active {
for j := 0; j < len(s.revertOrder); j++ {
if s.revertOrder[j] == nil || s.revertOrder[j].Disabled {
continue
}
if err := s.revertStage(firstCycle, s.revertOrder[j], db, tx); err != nil {
utils.Logger().Error().
Err(err).
Interface("stage id", s.revertOrder[j].ID).
Msgf(WrapStagedSyncMsg("revert stage failed"))
return err
}
}
}
if err := s.SetCurrentStage(s.stages[0].ID); err != nil {
return err
}
firstCycle = false
}
stage := s.stages[s.currentStage]
if stage.Disabled {
utils.Logger().Trace().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s disabled. %s", stage.ID, stage.DisabledDescription)))
s.NextStage()
continue
}
if err := s.runStage(stage, db, tx, firstCycle, s.invalidBlock.Active); err != nil {
utils.Logger().Error().
Err(err).
Interface("stage id", stage.ID).
Msgf(WrapStagedSyncMsg("stage failed"))
return err
}
s.NextStage()
}
if err := s.cleanUp(0, db, tx, firstCycle); err != nil {
utils.Logger().Error().
Err(err).
Msgf(WrapStagedSyncMsg("stages cleanup failed"))
return err
}
if err := s.SetCurrentStage(s.stages[0].ID); err != nil {
return err
}
if err := printLogs(tx, s.timings); err != nil {
return err
}
s.currentStage = 0
return nil
}
func CreateView(ctx context.Context, db kv.RwDB, tx kv.Tx, f func(tx kv.Tx) error) error {
if tx != nil {
return f(tx)
}
return db.View(context.Background(), func(etx kv.Tx) error {
return f(etx)
})
}
func ByteCount(b uint64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%dB", b)
}
div, exp := uint64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f%cB",
float64(b)/float64(div), "KMGTPE"[exp])
}
func printLogs(tx kv.RwTx, timings []Timing) error {
var logCtx []interface{}
count := 0
for i := range timings {
if timings[i].took < 50*time.Millisecond {
continue
}
count++
if count == 50 {
break
}
if timings[i].isRevert {
logCtx = append(logCtx, "Revert "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String())
} else if timings[i].isCleanUp {
logCtx = append(logCtx, "CleanUp "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String())
} else {
logCtx = append(logCtx, string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String())
}
}
if len(logCtx) > 0 {
utils.Logger().Info().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("Timings (slower than 50ms) %v", logCtx...)))
}
if tx == nil {
return nil
}
if len(logCtx) > 0 { // also don't print this logs if everything is fast
buckets := Buckets
bucketSizes := make([]interface{}, 0, 2*len(buckets))
for _, bucket := range buckets {
sz, err1 := tx.BucketSize(bucket)
if err1 != nil {
return err1
}
bucketSizes = append(bucketSizes, bucket, ByteCount(sz))
}
utils.Logger().Info().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("Tables %v", bucketSizes...)))
}
tx.CollectMetrics()
return nil
}
func (s *StagedStreamSync) runStage(stage *Stage, db kv.RwDB, tx kv.RwTx, firstCycle bool, invalidBlockRevert bool) (err error) {
start := time.Now()
stageState, err := s.StageState(stage.ID, tx, db)
if err != nil {
return err
}
if err = stage.Handler.Exec(firstCycle, invalidBlockRevert, stageState, s, tx); err != nil {
utils.Logger().Error().
Err(err).
Interface("stage id", stage.ID).
Msgf(WrapStagedSyncMsg("stage failed"))
return fmt.Errorf("[%s] %w", s.LogPrefix(), err)
}
took := time.Since(start)
if took > 60*time.Second {
logPrefix := s.LogPrefix()
utils.Logger().Info().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: DONE in %d", logPrefix, took)))
}
s.timings = append(s.timings, Timing{stage: stage.ID, took: took})
return nil
}
func (s *StagedStreamSync) revertStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error {
start := time.Now()
stageState, err := s.StageState(stage.ID, tx, db)
if err != nil {
return err
}
revert := s.NewRevertState(stage.ID, *s.revertPoint)
if stageState.BlockNumber <= revert.RevertPoint {
return nil
}
if err = s.SetCurrentStage(stage.ID); err != nil {
return err
}
err = stage.Handler.Revert(firstCycle, revert, stageState, tx)
if err != nil {
return fmt.Errorf("[%s] %w", s.LogPrefix(), err)
}
took := time.Since(start)
if took > 60*time.Second {
logPrefix := s.LogPrefix()
utils.Logger().Info().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: Revert done in %d", logPrefix, took)))
}
s.timings = append(s.timings, Timing{isRevert: true, stage: stage.ID, took: took})
return nil
}
func (s *StagedStreamSync) pruneStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error {
start := time.Now()
stageState, err := s.StageState(stage.ID, tx, db)
if err != nil {
return err
}
prune, err := s.CleanUpStageState(stage.ID, stageState.BlockNumber, tx, db)
if err != nil {
return err
}
if err = s.SetCurrentStage(stage.ID); err != nil {
return err
}
err = stage.Handler.CleanUp(firstCycle, prune, tx)
if err != nil {
return fmt.Errorf("[%s] %w", s.LogPrefix(), err)
}
took := time.Since(start)
if took > 60*time.Second {
logPrefix := s.LogPrefix()
utils.Logger().Info().
Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: CleanUp done in %d", logPrefix, took)))
}
s.timings = append(s.timings, Timing{isCleanUp: true, stage: stage.ID, took: took})
return nil
}
// DisableAllStages disables all stages including their reverts
func (s *StagedStreamSync) DisableAllStages() []SyncStageID {
var backupEnabledIds []SyncStageID
for i := range s.stages {
if !s.stages[i].Disabled {
backupEnabledIds = append(backupEnabledIds, s.stages[i].ID)
}
}
for i := range s.stages {
s.stages[i].Disabled = true
}
return backupEnabledIds
}
func (s *StagedStreamSync) DisableStages(ids ...SyncStageID) {
for i := range s.stages {
for _, id := range ids {
if s.stages[i].ID != id {
continue
}
s.stages[i].Disabled = true
}
}
}
func (s *StagedStreamSync) EnableStages(ids ...SyncStageID) {
for i := range s.stages {
for _, id := range ids {
if s.stages[i].ID != id {
continue
}
s.stages[i].Disabled = false
}
}
}

@ -0,0 +1,71 @@
package stagedstreamsync
import (
"github.com/ledgerwatch/erigon-lib/kv"
)
// SyncStageID represents the stages in the Mode.StagedSync mode
type SyncStageID string
const (
Heads SyncStageID = "Heads" // Heads are downloaded
ShortRange SyncStageID = "ShortRange" // short range
SyncEpoch SyncStageID = "SyncEpoch" // epoch sync
BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified
States SyncStageID = "States" // will construct most recent state from downloaded blocks
Finish SyncStageID = "Finish" // Nominal stage after all other stages
)
func GetStageName(stage string, isBeacon bool, prune bool) string {
name := stage
if isBeacon {
name = "beacon_" + name
}
if prune {
name = "prune_" + name
}
return name
}
func GetStageID(stage SyncStageID, isBeacon bool, prune bool) []byte {
return []byte(GetStageName(string(stage), isBeacon, prune))
}
func GetBucketName(bucketName string, isBeacon bool) string {
name := bucketName
if isBeacon {
name = "Beacon" + name
}
return name
}
// GetStageProgress retrieves saved progress of a given sync stage from the database
func GetStageProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) {
stgID := GetStageID(stage, isBeacon, false)
v, err := db.GetOne(kv.SyncStageProgress, stgID)
if err != nil {
return 0, err
}
return unmarshalData(v)
}
// SaveStageProgress saves progress of given sync stage
func SaveStageProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error {
stgID := GetStageID(stage, isBeacon, false)
return db.Put(kv.SyncStageProgress, stgID, marshalData(progress))
}
// GetStageCleanUpProgress retrieves saved progress of given sync stage from the database
func GetStageCleanUpProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) {
stgID := GetStageID(stage, isBeacon, true)
v, err := db.GetOne(kv.SyncStageProgress, stgID)
if err != nil {
return 0, err
}
return unmarshalData(v)
}
func SaveStageCleanUpProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error {
stgID := GetStageID(stage, isBeacon, true)
return db.Put(kv.SyncStageProgress, stgID, marshalData(progress))
}

@ -0,0 +1,320 @@
package stagedstreamsync
import (
"context"
"fmt"
"sync"
"time"
"github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/internal/utils"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/harmony-one/harmony/shard"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/kv/mdbx"
"github.com/ledgerwatch/erigon-lib/kv/memdb"
"github.com/ledgerwatch/log/v3"
"github.com/pkg/errors"
"github.com/rs/zerolog"
)
const (
BlocksBucket = "BlockBodies"
BlockSignaturesBucket = "BlockSignatures"
StageProgressBucket = "StageProgress"
// cache db keys
LastBlockHeight = "LastBlockHeight"
LastBlockHash = "LastBlockHash"
)
var Buckets = []string{
BlocksBucket,
BlockSignaturesBucket,
StageProgressBucket,
}
// CreateStagedSync creates an instance of staged sync
func CreateStagedSync(ctx context.Context,
bc core.BlockChain,
UseMemDB bool,
protocol syncProtocol,
config Config,
logger zerolog.Logger,
logProgress bool,
) (*StagedStreamSync, error) {
isBeacon := bc.ShardID() == shard.BeaconChainShardID
var mainDB kv.RwDB
dbs := make([]kv.RwDB, config.Concurrency)
if UseMemDB {
mainDB = memdb.New()
for i := 0; i < config.Concurrency; i++ {
dbs[i] = memdb.New()
}
} else {
mainDB = mdbx.NewMDBX(log.New()).Path(GetBlockDbPath(isBeacon, -1)).MustOpen()
for i := 0; i < config.Concurrency; i++ {
dbPath := GetBlockDbPath(isBeacon, i)
dbs[i] = mdbx.NewMDBX(log.New()).Path(dbPath).MustOpen()
}
}
if errInitDB := initDB(ctx, mainDB, dbs, config.Concurrency); errInitDB != nil {
return nil, errInitDB
}
stageHeadsCfg := NewStageHeadersCfg(ctx, bc, mainDB)
stageShortRangeCfg := NewStageShortRangeCfg(ctx, bc, mainDB)
stageSyncEpochCfg := NewStageEpochCfg(ctx, bc, mainDB)
stageBodiesCfg := NewStageBodiesCfg(ctx, bc, mainDB, dbs, config.Concurrency, protocol, isBeacon, logProgress)
stageStatesCfg := NewStageStatesCfg(ctx, bc, mainDB, dbs, config.Concurrency, logger, logProgress)
stageFinishCfg := NewStageFinishCfg(ctx, mainDB)
stages := DefaultStages(ctx,
stageHeadsCfg,
stageSyncEpochCfg,
stageShortRangeCfg,
stageBodiesCfg,
stageStatesCfg,
stageFinishCfg,
)
return New(ctx,
bc,
mainDB,
stages,
isBeacon,
protocol,
UseMemDB,
config,
logger,
), nil
}
// initDB inits the sync loop main database and create buckets
func initDB(ctx context.Context, mainDB kv.RwDB, dbs []kv.RwDB, concurrency int) error {
// create buckets for mainDB
tx, errRW := mainDB.BeginRw(ctx)
if errRW != nil {
return errRW
}
defer tx.Rollback()
for _, name := range Buckets {
if err := tx.CreateBucket(GetStageName(name, false, false)); err != nil {
return err
}
}
if err := tx.Commit(); err != nil {
return err
}
// create buckets for block cache DBs
for _, db := range dbs {
tx, errRW := db.BeginRw(ctx)
if errRW != nil {
return errRW
}
if err := tx.CreateBucket(BlocksBucket); err != nil {
return err
}
if err := tx.CreateBucket(BlockSignaturesBucket); err != nil {
return err
}
if err := tx.Commit(); err != nil {
return err
}
}
return nil
}
func GetBlockDbPath(beacon bool, loopID int) string {
if beacon {
if loopID >= 0 {
return fmt.Sprintf("%s_%d", "cache/beacon_blocks_db", loopID)
} else {
return "cache/beacon_blocks_db_main"
}
} else {
if loopID >= 0 {
return fmt.Sprintf("%s_%d", "cache/blocks_db", loopID)
} else {
return "cache/blocks_db_main"
}
}
}
// doSync does the long range sync.
// One LongRangeSync consists of several iterations.
// For each iteration, estimate the current block number, then fetch block & insert to blockchain
func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bool) (int, error) {
var totalInserted int
s.initSync = initSync
if err := s.checkPrerequisites(); err != nil {
return 0, err
}
var estimatedHeight uint64
if initSync {
if h, err := s.estimateCurrentNumber(); err != nil {
return 0, err
} else {
estimatedHeight = h
//TODO: use directly currentCycle var
s.status.setTargetBN(estimatedHeight)
}
if curBN := s.bc.CurrentBlock().NumberU64(); estimatedHeight <= curBN {
s.logger.Info().Uint64("current number", curBN).Uint64("target number", estimatedHeight).
Msg(WrapStagedSyncMsg("early return of long range sync"))
return 0, nil
}
s.startSyncing()
defer s.finishSyncing()
}
for {
ctx, cancel := context.WithCancel(downloaderContext)
s.ctx = ctx
s.SetNewContext(ctx)
n, err := s.doSyncCycle(ctx, initSync)
if err != nil {
pl := s.promLabels()
pl["error"] = err.Error()
numFailedDownloadCounterVec.With(pl).Inc()
cancel()
return totalInserted + n, err
}
cancel()
totalInserted += n
// if it's not long range sync, skip loop
if n < LastMileBlocksThreshold || !initSync {
return totalInserted, nil
}
}
}
func (s *StagedStreamSync) doSyncCycle(ctx context.Context, initSync bool) (int, error) {
// TODO: initSync=true means currentCycleNumber==0, so we can remove initSync
var totalInserted int
s.inserted = 0
startHead := s.bc.CurrentBlock().NumberU64()
canRunCycleInOneTransaction := false
var tx kv.RwTx
if canRunCycleInOneTransaction {
var err error
if tx, err = s.DB().BeginRw(context.Background()); err != nil {
return totalInserted, err
}
defer tx.Rollback()
}
startTime := time.Now()
// Do one cycle of staged sync
initialCycle := s.currentCycle.Number == 0
if err := s.Run(s.DB(), tx, initialCycle); err != nil {
utils.Logger().Error().
Err(err).
Bool("isBeacon", s.isBeacon).
Uint32("shard", s.bc.ShardID()).
Uint64("currentHeight", startHead).
Msgf(WrapStagedSyncMsg("sync cycle failed"))
return totalInserted, err
}
totalInserted += s.inserted
s.currentCycle.lock.Lock()
s.currentCycle.Number++
s.currentCycle.lock.Unlock()
// calculating sync speed (blocks/second)
if s.LogProgress && s.inserted > 0 {
dt := time.Now().Sub(startTime).Seconds()
speed := float64(0)
if dt > 0 {
speed = float64(s.inserted) / dt
}
syncSpeed := fmt.Sprintf("%.2f", speed)
fmt.Println("sync speed:", syncSpeed, "blocks/s")
}
return totalInserted, nil
}
func (s *StagedStreamSync) startSyncing() {
s.status.startSyncing()
if s.evtDownloadStartedSubscribed {
s.evtDownloadStarted.Send(struct{}{})
}
}
func (s *StagedStreamSync) finishSyncing() {
s.status.finishSyncing()
if s.evtDownloadFinishedSubscribed {
s.evtDownloadFinished.Send(struct{}{})
}
}
func (s *StagedStreamSync) checkPrerequisites() error {
return s.checkHaveEnoughStreams()
}
// estimateCurrentNumber roughly estimates the current block number.
// The block number does not need to be exact, but just a temporary target of the iteration
func (s *StagedStreamSync) estimateCurrentNumber() (uint64, error) {
var (
cnResults = make(map[sttypes.StreamID]uint64)
lock sync.Mutex
wg sync.WaitGroup
)
wg.Add(s.config.Concurrency)
for i := 0; i != s.config.Concurrency; i++ {
go func() {
defer wg.Done()
bn, stid, err := s.doGetCurrentNumberRequest()
if err != nil {
s.logger.Err(err).Str("streamID", string(stid)).
Msg(WrapStagedSyncMsg("getCurrentNumber request failed"))
if !errors.Is(err, context.Canceled) {
s.protocol.StreamFailed(stid, "getCurrentNumber request failed")
}
return
}
lock.Lock()
cnResults[stid] = bn
lock.Unlock()
}()
}
wg.Wait()
if len(cnResults) == 0 {
select {
case <-s.ctx.Done():
return 0, s.ctx.Err()
default:
}
return 0, errors.New("zero block number response from remote nodes")
}
bn := computeBlockNumberByMaxVote(cnResults)
return bn, nil
}

@ -0,0 +1,287 @@
package stagedstreamsync
import (
"container/heap"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/types"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
)
var (
emptyHash common.Hash
)
type status struct {
isSyncing bool
targetBN uint64
lock sync.Mutex
}
func newStatus() status {
return status{}
}
func (s *status) startSyncing() {
s.lock.Lock()
defer s.lock.Unlock()
s.isSyncing = true
}
func (s *status) setTargetBN(val uint64) {
s.lock.Lock()
defer s.lock.Unlock()
s.targetBN = val
}
func (s *status) finishSyncing() {
s.lock.Lock()
defer s.lock.Unlock()
s.isSyncing = false
s.targetBN = 0
}
func (s *status) get() (bool, uint64) {
s.lock.Lock()
defer s.lock.Unlock()
return s.isSyncing, s.targetBN
}
type getBlocksResult struct {
bns []uint64
blocks []*types.Block
stid sttypes.StreamID
}
type resultQueue struct {
results *priorityQueue
lock sync.Mutex
}
func newResultQueue() *resultQueue {
pq := make(priorityQueue, 0, 200) // 200 - rough estimate
heap.Init(&pq)
return &resultQueue{
results: &pq,
}
}
// addBlockResults adds the blocks to the result queue to be processed by insertChainLoop.
// If a nil block is detected in the block list, will not process further blocks.
func (rq *resultQueue) addBlockResults(blocks []*types.Block, stid sttypes.StreamID) {
rq.lock.Lock()
defer rq.lock.Unlock()
for _, block := range blocks {
if block == nil {
continue
}
heap.Push(rq.results, &blockResult{
block: block,
stid: stid,
})
}
return
}
// popBlockResults pop a continuous list of blocks starting at expStartBN with capped size.
// Return the stale block numbers as the second return value
func (rq *resultQueue) popBlockResults(expStartBN uint64, cap int) ([]*blockResult, []uint64) {
rq.lock.Lock()
defer rq.lock.Unlock()
var (
res = make([]*blockResult, 0, cap)
stales []uint64
)
for cnt := 0; rq.results.Len() > 0 && cnt < cap; cnt++ {
br := heap.Pop(rq.results).(*blockResult)
// stale block number
if br.block.NumberU64() < expStartBN {
stales = append(stales, br.block.NumberU64())
continue
}
if br.block.NumberU64() != expStartBN {
heap.Push(rq.results, br)
return res, stales
}
res = append(res, br)
expStartBN++
}
return res, stales
}
// removeResultsByStreamID removes the block results of the given stream, returns the block
// number removed from the queue
func (rq *resultQueue) removeResultsByStreamID(stid sttypes.StreamID) []uint64 {
rq.lock.Lock()
defer rq.lock.Unlock()
var removed []uint64
Loop:
for {
for i, res := range *rq.results {
blockRes := res.(*blockResult)
if blockRes.stid == stid {
rq.removeByIndex(i)
removed = append(removed, blockRes.block.NumberU64())
goto Loop
}
}
break
}
return removed
}
func (rq *resultQueue) length() int {
return len(*rq.results)
}
func (rq *resultQueue) removeByIndex(index int) {
heap.Remove(rq.results, index)
}
// bnPrioritizedItem is the item which uses block number to determine its priority
type bnPrioritizedItem interface {
getBlockNumber() uint64
}
type blockResult struct {
block *types.Block
stid sttypes.StreamID
}
func (br *blockResult) getBlockNumber() uint64 {
return br.block.NumberU64()
}
func blockResultsToBlocks(results []*blockResult) []*types.Block {
blocks := make([]*types.Block, 0, len(results))
for _, result := range results {
blocks = append(blocks, result.block)
}
return blocks
}
type (
prioritizedNumber uint64
prioritizedNumbers struct {
q *priorityQueue
}
)
func (b prioritizedNumber) getBlockNumber() uint64 {
return uint64(b)
}
func newPrioritizedNumbers() *prioritizedNumbers {
pqs := make(priorityQueue, 0)
heap.Init(&pqs)
return &prioritizedNumbers{
q: &pqs,
}
}
func (pbs *prioritizedNumbers) push(bn uint64) {
heap.Push(pbs.q, prioritizedNumber(bn))
}
func (pbs *prioritizedNumbers) pop() uint64 {
if pbs.q.Len() == 0 {
return 0
}
item := heap.Pop(pbs.q)
return uint64(item.(prioritizedNumber))
}
func (pbs *prioritizedNumbers) length() int {
return len(*pbs.q)
}
type (
blockByNumber types.Block
// blocksByNumber is the priority queue ordered by number
blocksByNumber struct {
q *priorityQueue
cap int
}
)
func (b *blockByNumber) getBlockNumber() uint64 {
raw := (*types.Block)(b)
return raw.NumberU64()
}
func newBlocksByNumber(cap int) *blocksByNumber {
pqs := make(priorityQueue, 0)
heap.Init(&pqs)
return &blocksByNumber{
q: &pqs,
cap: cap,
}
}
func (bs *blocksByNumber) push(b *types.Block) {
heap.Push(bs.q, (*blockByNumber)(b))
for bs.q.Len() > bs.cap {
heap.Pop(bs.q)
}
}
func (bs *blocksByNumber) pop() *types.Block {
if bs.q.Len() == 0 {
return nil
}
item := heap.Pop(bs.q)
return (*types.Block)(item.(*blockByNumber))
}
func (bs *blocksByNumber) len() int {
return bs.q.Len()
}
// priorityQueue is a priority queue with lowest block number with highest priority
type priorityQueue []bnPrioritizedItem
func (q priorityQueue) Len() int {
return len(q)
}
func (q priorityQueue) Less(i, j int) bool {
bn1 := q[i].getBlockNumber()
bn2 := q[j].getBlockNumber()
return bn1 < bn2 // small block number has higher priority
}
func (q priorityQueue) Swap(i, j int) {
q[i], q[j] = q[j], q[i]
}
func (q *priorityQueue) Push(x interface{}) {
item, ok := x.(bnPrioritizedItem)
if !ok {
panic("wrong type of getBlockNumber interface")
}
*q = append(*q, item)
}
func (q *priorityQueue) Pop() interface{} {
prev := *q
n := len(prev)
if n == 0 {
return nil
}
res := prev[n-1]
*q = prev[0 : n-1]
return res
}

@ -0,0 +1,266 @@
package stagedstreamsync
import (
"container/heap"
"fmt"
"math/big"
"strings"
"testing"
"github.com/harmony-one/harmony/block"
headerV3 "github.com/harmony-one/harmony/block/v3"
"github.com/harmony-one/harmony/core/types"
bls_cosi "github.com/harmony-one/harmony/crypto/bls"
sttypes "github.com/harmony-one/harmony/p2p/stream/types"
)
func TestResultQueue_AddBlockResults(t *testing.T) {
tests := []struct {
initBNs []uint64
addBNs []uint64
expSize int
}{
{
initBNs: []uint64{},
addBNs: []uint64{1, 2, 3, 4},
expSize: 4,
},
{
initBNs: []uint64{1, 2, 3, 4},
addBNs: []uint64{5, 6, 7, 8},
expSize: 8,
},
}
for i, test := range tests {
rq := makeTestResultQueue(test.initBNs)
rq.addBlockResults(makeTestBlocks(test.addBNs), "")
if rq.results.Len() != test.expSize {
t.Errorf("Test %v: unexpected size: %v / %v", i, rq.results.Len(), test.expSize)
}
}
}
func TestResultQueue_PopBlockResults(t *testing.T) {
tests := []struct {
initBNs []uint64
cap int
expStart uint64
expSize int
staleSize int
}{
{
initBNs: []uint64{1, 2, 3, 4, 5},
cap: 3,
expStart: 1,
expSize: 3,
staleSize: 0,
},
{
initBNs: []uint64{1, 2, 3, 4, 5},
cap: 10,
expStart: 1,
expSize: 5,
staleSize: 0,
},
{
initBNs: []uint64{1, 3, 4, 5},
cap: 10,
expStart: 1,
expSize: 1,
staleSize: 0,
},
{
initBNs: []uint64{1, 2, 3, 4, 5},
cap: 10,
expStart: 0,
expSize: 0,
staleSize: 0,
},
{
initBNs: []uint64{1, 1, 1, 1, 2},
cap: 10,
expStart: 1,
expSize: 2,
staleSize: 3,
},
{
initBNs: []uint64{1, 2, 3, 4, 5},
cap: 10,
expStart: 2,
expSize: 4,
staleSize: 1,
},
}
for i, test := range tests {
rq := makeTestResultQueue(test.initBNs)
res, stales := rq.popBlockResults(test.expStart, test.cap)
if len(res) != test.expSize {
t.Errorf("Test %v: unexpect size %v / %v", i, len(res), test.expSize)
}
if len(stales) != test.staleSize {
t.Errorf("Test %v: unexpect stale size %v / %v", i, len(stales), test.staleSize)
}
}
}
func TestResultQueue_RemoveResultsByStreamID(t *testing.T) {
tests := []struct {
rq *resultQueue
rmStreamID sttypes.StreamID
removed int
expSize int
}{
{
rq: makeTestResultQueue([]uint64{1, 2, 3, 4}),
rmStreamID: "test stream id",
removed: 4,
expSize: 0,
},
{
rq: func() *resultQueue {
rq := makeTestResultQueue([]uint64{2, 3, 4, 5})
rq.addBlockResults([]*types.Block{
makeTestBlock(1),
makeTestBlock(5),
makeTestBlock(6),
}, "another test stream id")
return rq
}(),
rmStreamID: "test stream id",
removed: 4,
expSize: 3,
},
{
rq: func() *resultQueue {
rq := makeTestResultQueue([]uint64{2, 3, 4, 5})
rq.addBlockResults([]*types.Block{
makeTestBlock(1),
makeTestBlock(5),
makeTestBlock(6),
}, "another test stream id")
return rq
}(),
rmStreamID: "another test stream id",
removed: 3,
expSize: 4,
},
}
for i, test := range tests {
res := test.rq.removeResultsByStreamID(test.rmStreamID)
if len(res) != test.removed {
t.Errorf("Test %v: unexpected number removed %v / %v", i, len(res), test.removed)
}
if gotSize := test.rq.results.Len(); gotSize != test.expSize {
t.Errorf("Test %v: unexpected number after removal %v / %v", i, gotSize, test.expSize)
}
}
}
func makeTestResultQueue(bns []uint64) *resultQueue {
rq := newResultQueue()
for _, bn := range bns {
heap.Push(rq.results, &blockResult{
block: makeTestBlock(bn),
stid: "test stream id",
})
}
return rq
}
func TestPrioritizedBlocks(t *testing.T) {
addBNs := []uint64{4, 7, 6, 9}
bns := newPrioritizedNumbers()
for _, bn := range addBNs {
bns.push(bn)
}
prevBN := uint64(0)
for len(*bns.q) > 0 {
b := bns.pop()
if b < prevBN {
t.Errorf("number not incrementing")
}
prevBN = b
}
if last := bns.pop(); last != 0 {
t.Errorf("last elem is not 0")
}
}
func TestBlocksByNumber(t *testing.T) {
addBNs := []uint64{4, 7, 6, 9}
bns := newBlocksByNumber(10)
for _, bn := range addBNs {
bns.push(makeTestBlock(bn))
}
if bns.len() != len(addBNs) {
t.Errorf("size unexpected: %v / %v", bns.len(), len(addBNs))
}
prevBN := uint64(0)
for len(*bns.q) > 0 {
b := bns.pop()
if b.NumberU64() < prevBN {
t.Errorf("number not incrementing")
}
prevBN = b.NumberU64()
}
if lastBlock := bns.pop(); lastBlock != nil {
t.Errorf("last block is not nil")
}
}
func TestPriorityQueue(t *testing.T) {
testBNs := []uint64{1, 9, 2, 4, 5, 12}
pq := make(priorityQueue, 0, 10)
heap.Init(&pq)
for _, bn := range testBNs {
heap.Push(&pq, &blockResult{
block: makeTestBlock(bn),
stid: "",
})
}
cmpBN := uint64(0)
for pq.Len() > 0 {
bn := heap.Pop(&pq).(*blockResult).block.NumberU64()
if bn < cmpBN {
t.Errorf("not incrementing")
}
cmpBN = bn
}
if pq.Len() != 0 {
t.Errorf("after poping, size not 0")
}
}
func makeTestBlocks(bns []uint64) []*types.Block {
blocks := make([]*types.Block, 0, len(bns))
for _, bn := range bns {
blocks = append(blocks, makeTestBlock(bn))
}
return blocks
}
func makeTestBlock(bn uint64) *types.Block {
testHeader := &block.Header{Header: headerV3.NewHeader()}
testHeader.SetNumber(big.NewInt(int64(bn)))
testHeader.SetLastCommitSignature(bls_cosi.SerializedSignature{})
testHeader.SetLastCommitBitmap(make([]byte, 10))
block := types.NewBlockWithHeader(testHeader)
block.SetCurrentCommitSig(make([]byte, 106))
return block
}
func assertError(got, expect error) error {
if (got == nil) != (expect == nil) {
return fmt.Errorf("unexpected error [%v] / [%v]", got, expect)
}
if (got == nil) || (expect == nil) {
return nil
}
if !strings.Contains(got.Error(), expect.Error()) {
return fmt.Errorf("unexpected error [%v] / [%v]", got, expect)
}
return nil
}

@ -39,6 +39,7 @@ import (
"github.com/harmony-one/harmony/api/service" "github.com/harmony-one/harmony/api/service"
"github.com/harmony-one/harmony/api/service/pprof" "github.com/harmony-one/harmony/api/service/pprof"
"github.com/harmony-one/harmony/api/service/prometheus" "github.com/harmony-one/harmony/api/service/prometheus"
"github.com/harmony-one/harmony/api/service/stagedstreamsync"
"github.com/harmony-one/harmony/api/service/synchronize" "github.com/harmony-one/harmony/api/service/synchronize"
"github.com/harmony-one/harmony/common/fdlimit" "github.com/harmony-one/harmony/common/fdlimit"
"github.com/harmony-one/harmony/common/ntp" "github.com/harmony-one/harmony/common/ntp"
@ -415,8 +416,12 @@ func setupNodeAndRun(hc harmonyconfig.HarmonyConfig) {
// Setup services // Setup services
if hc.Sync.Enabled { if hc.Sync.Enabled {
if hc.Sync.StagedSync {
setupStagedSyncService(currentNode, myHost, hc)
} else {
setupSyncService(currentNode, myHost, hc) setupSyncService(currentNode, myHost, hc)
} }
}
if currentNode.NodeConfig.Role() == nodeconfig.Validator { if currentNode.NodeConfig.Role() == nodeconfig.Validator {
currentNode.RegisterValidatorServices() currentNode.RegisterValidatorServices()
} else if currentNode.NodeConfig.Role() == nodeconfig.ExplorerNode { } else if currentNode.NodeConfig.Role() == nodeconfig.ExplorerNode {
@ -906,6 +911,45 @@ func setupSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.HarmonyCo
} }
} }
func setupStagedSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.HarmonyConfig) {
blockchains := []core.BlockChain{node.Blockchain()}
if !node.IsRunningBeaconChain() {
blockchains = append(blockchains, node.Beaconchain())
}
sConfig := stagedstreamsync.Config{
ServerOnly: !hc.Sync.Downloader,
Network: nodeconfig.NetworkType(hc.Network.NetworkType),
Concurrency: hc.Sync.Concurrency,
MinStreams: hc.Sync.MinPeers,
InitStreams: hc.Sync.InitStreams,
SmSoftLowCap: hc.Sync.DiscSoftLowCap,
SmHardLowCap: hc.Sync.DiscHardLowCap,
SmHiCap: hc.Sync.DiscHighCap,
SmDiscBatch: hc.Sync.DiscBatch,
LogProgress: node.NodeConfig.LogProgress,
}
// If we are running side chain, we will need to do some extra works for beacon
// sync.
if !node.IsRunningBeaconChain() {
sConfig.BHConfig = &stagedstreamsync.BeaconHelperConfig{
BlockC: node.BeaconBlockChannel,
InsertHook: node.BeaconSyncHook,
}
}
//Setup stream sync service
s := stagedstreamsync.NewService(host, blockchains, sConfig)
node.RegisterService(service.StagedStreamSync, s)
d := s.Downloaders.GetShardDownloader(node.Blockchain().ShardID())
if hc.Sync.Downloader && hc.General.NodeType != nodeTypeExplorer {
node.Consensus.SetDownloader(d) // Set downloader when stream client is active
}
}
func setupBlacklist(hc harmonyconfig.HarmonyConfig) (map[ethCommon.Address]struct{}, error) { func setupBlacklist(hc harmonyconfig.HarmonyConfig) (map[ethCommon.Address]struct{}, error) {
rosetta_common.InitRosettaFile(hc.TxPool.RosettaFixFile) rosetta_common.InitRosettaFile(hc.TxPool.RosettaFixFile)

@ -165,6 +165,7 @@ require (
github.com/libp2p/go-cidranger v1.1.0 // indirect github.com/libp2p/go-cidranger v1.1.0 // indirect
github.com/libp2p/go-flow-metrics v0.1.0 // indirect github.com/libp2p/go-flow-metrics v0.1.0 // indirect
github.com/libp2p/go-libp2p-asn-util v0.2.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.2.0 // indirect
github.com/libp2p/go-libp2p-core v0.20.1 // indirect
github.com/libp2p/go-libp2p-kbucket v0.5.0 // indirect github.com/libp2p/go-libp2p-kbucket v0.5.0 // indirect
github.com/libp2p/go-libp2p-record v0.2.0 // indirect github.com/libp2p/go-libp2p-record v0.2.0 // indirect
github.com/libp2p/go-msgio v0.2.0 // indirect github.com/libp2p/go-msgio v0.2.0 // indirect

@ -594,6 +594,8 @@ github.com/libp2p/go-libp2p v0.24.0 h1:DQk/5bBon+yUVIGTeRVBmOYpZzoBHx/VTC0xoLgJG
github.com/libp2p/go-libp2p v0.24.0/go.mod h1:28t24CYDlnBs23rIs1OclU89YbhgibrBq2LFbMe+cFw= github.com/libp2p/go-libp2p v0.24.0/go.mod h1:28t24CYDlnBs23rIs1OclU89YbhgibrBq2LFbMe+cFw=
github.com/libp2p/go-libp2p-asn-util v0.2.0 h1:rg3+Os8jbnO5DxkC7K/Utdi+DkY3q/d1/1q+8WeNAsw= github.com/libp2p/go-libp2p-asn-util v0.2.0 h1:rg3+Os8jbnO5DxkC7K/Utdi+DkY3q/d1/1q+8WeNAsw=
github.com/libp2p/go-libp2p-asn-util v0.2.0/go.mod h1:WoaWxbHKBymSN41hWSq/lGKJEca7TNm58+gGJi2WsLI= github.com/libp2p/go-libp2p-asn-util v0.2.0/go.mod h1:WoaWxbHKBymSN41hWSq/lGKJEca7TNm58+gGJi2WsLI=
github.com/libp2p/go-libp2p-core v0.20.1 h1:fQz4BJyIFmSZAiTbKV8qoYhEH5Dtv/cVhZbG3Ib/+Cw=
github.com/libp2p/go-libp2p-core v0.20.1/go.mod h1:6zR8H7CvQWgYLsbG4on6oLNSGcyKaYFSEYyDt51+bIY=
github.com/libp2p/go-libp2p-kad-dht v0.19.0 h1:2HuiInHZTm9ZvQajaqdaPLHr0PCKKigWiflakimttE0= github.com/libp2p/go-libp2p-kad-dht v0.19.0 h1:2HuiInHZTm9ZvQajaqdaPLHr0PCKKigWiflakimttE0=
github.com/libp2p/go-libp2p-kad-dht v0.19.0/go.mod h1:qPIXdiZsLczhV4/+4EO1jE8ae0YCW4ZOogc4WVIyTEU= github.com/libp2p/go-libp2p-kad-dht v0.19.0/go.mod h1:qPIXdiZsLczhV4/+4EO1jE8ae0YCW4ZOogc4WVIyTEU=
github.com/libp2p/go-libp2p-kbucket v0.5.0 h1:g/7tVm8ACHDxH29BGrpsQlnNeu+6OF1A9bno/4/U1oA= github.com/libp2p/go-libp2p-kbucket v0.5.0 h1:g/7tVm8ACHDxH29BGrpsQlnNeu+6OF1A9bno/4/U1oA=

@ -35,7 +35,8 @@ type (
// Only run stream sync protocol as a server. // Only run stream sync protocol as a server.
// TODO: remove this when stream sync is fully up. // TODO: remove this when stream sync is fully up.
ServerOnly bool ServerOnly bool
// use staged sync
Staged bool
// parameters // parameters
Network nodeconfig.NetworkType Network nodeconfig.NetworkType
Concurrency int // Number of concurrent sync requests Concurrency int // Number of concurrent sync requests

@ -23,7 +23,7 @@ var (
consensusTriggeredDownloadCounterVec = prometheus.NewCounterVec( consensusTriggeredDownloadCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "consensus_trigger", Name: "consensus_trigger",
Help: "number of times consensus triggered download task", Help: "number of times consensus triggered download task",
}, },
@ -33,7 +33,7 @@ var (
longRangeSyncedBlockCounterVec = prometheus.NewCounterVec( longRangeSyncedBlockCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "num_blocks_synced_long_range", Name: "num_blocks_synced_long_range",
Help: "number of blocks synced in long range sync", Help: "number of blocks synced in long range sync",
}, },
@ -43,7 +43,7 @@ var (
longRangeFailInsertedBlockCounterVec = prometheus.NewCounterVec( longRangeFailInsertedBlockCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "num_blocks_failed_long_range", Name: "num_blocks_failed_long_range",
Help: "number of blocks failed to insert into change in long range sync", Help: "number of blocks failed to insert into change in long range sync",
}, },
@ -53,7 +53,7 @@ var (
numShortRangeCounterVec = prometheus.NewCounterVec( numShortRangeCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "num_short_range", Name: "num_short_range",
Help: "number of short range sync is triggered", Help: "number of short range sync is triggered",
}, },
@ -63,7 +63,7 @@ var (
numFailedDownloadCounterVec = prometheus.NewCounterVec( numFailedDownloadCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "failed_download", Name: "failed_download",
Help: "number of downloading is failed", Help: "number of downloading is failed",
}, },
@ -73,7 +73,7 @@ var (
numBlocksInsertedShortRangeHistogramVec = prometheus.NewHistogramVec( numBlocksInsertedShortRangeHistogramVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "num_blocks_inserted_short_range", Name: "num_blocks_inserted_short_range",
Help: "number of blocks inserted for each short range sync", Help: "number of blocks inserted for each short range sync",
// Buckets: 0, 1, 2, 4, +INF (capped at 10) // Buckets: 0, 1, 2, 4, +INF (capped at 10)
@ -85,7 +85,7 @@ var (
numBlocksInsertedBeaconHelperCounter = prometheus.NewCounter( numBlocksInsertedBeaconHelperCounter = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "hmy", Namespace: "hmy",
Subsystem: "downloader", Subsystem: "StreamSync",
Name: "num_blocks_inserted_beacon_helper", Name: "num_blocks_inserted_beacon_helper",
Help: "number of blocks inserted from beacon helper", Help: "number of blocks inserted from beacon helper",
}, },

@ -20,7 +20,7 @@ import (
// doShortRangeSync does the short range sync. // doShortRangeSync does the short range sync.
// Compared with long range sync, short range sync is more focused on syncing to the latest block. // Compared with long range sync, short range sync is more focused on syncing to the latest block.
// It consist of 3 steps: // It consist of 3 steps:
// 1. Obtain the block hashes and ompute the longest hash chain.. // 1. Obtain the block hashes and compute the longest hash chain..
// 2. Get blocks by hashes from computed hash chain. // 2. Get blocks by hashes from computed hash chain.
// 3. Insert the blocks to blockchain. // 3. Insert the blocks to blockchain.
func (d *Downloader) doShortRangeSync() (int, error) { func (d *Downloader) doShortRangeSync() (int, error) {

@ -22,11 +22,11 @@ import (
"github.com/harmony-one/harmony/api/service/legacysync" "github.com/harmony-one/harmony/api/service/legacysync"
legdownloader "github.com/harmony-one/harmony/api/service/legacysync/downloader" legdownloader "github.com/harmony-one/harmony/api/service/legacysync/downloader"
downloader_pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" downloader_pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto"
"github.com/harmony-one/harmony/api/service/stagedstreamsync"
"github.com/harmony-one/harmony/api/service/stagedsync" "github.com/harmony-one/harmony/api/service/stagedsync"
"github.com/harmony-one/harmony/api/service/synchronize" "github.com/harmony-one/harmony/api/service/synchronize"
"github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core"
"github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/core/types"
"github.com/harmony-one/harmony/hmy/downloader"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node" nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
"github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/node/worker" "github.com/harmony-one/harmony/node/worker"
@ -811,7 +811,7 @@ func (node *Node) legacySyncStatus(shardID uint32) (bool, uint64, uint64) {
} }
} }
// IsOutOfSync return whether the node is out of sync of the given hsardID // IsOutOfSync return whether the node is out of sync of the given shardID
func (node *Node) IsOutOfSync(shardID uint32) bool { func (node *Node) IsOutOfSync(shardID uint32) bool {
ds := node.getDownloaders() ds := node.getDownloaders()
if ds == nil || !ds.IsActive() { if ds == nil || !ds.IsActive() {
@ -859,7 +859,28 @@ func (node *Node) SyncPeers() map[string]int {
return res return res
} }
func (node *Node) getDownloaders() *downloader.Downloaders { type Downloaders interface {
Start()
Close()
DownloadAsync(shardID uint32)
// GetShardDownloader(shardID uint32) *Downloader
NumPeers() map[uint32]int
SyncStatus(shardID uint32) (bool, uint64, uint64)
IsActive() bool
}
func (node *Node) getDownloaders() Downloaders {
if node.NodeConfig.StagedSync {
syncService := node.serviceManager.GetService(service.StagedStreamSync)
if syncService == nil {
return nil
}
dsService, ok := syncService.(*stagedstreamsync.StagedStreamSyncService)
if !ok {
return nil
}
return dsService.Downloaders
} else {
syncService := node.serviceManager.GetService(service.Synchronize) syncService := node.serviceManager.GetService(service.Synchronize)
if syncService == nil { if syncService == nil {
return nil return nil
@ -869,4 +890,5 @@ func (node *Node) getDownloaders() *downloader.Downloaders {
return nil return nil
} }
return dsService.Downloaders return dsService.Downloaders
}
} }

@ -114,6 +114,18 @@ func (st *testStream) CloseOnExit() error {
return nil return nil
} }
func (st *testStream) FailedTimes() int {
return 0
}
func (st *testStream) AddFailedTimes() {
return
}
func (st *testStream) ResetFailedRimes() {
return
}
func makeDummyTestStreams(indexes []int) []sttypes.Stream { func makeDummyTestStreams(indexes []int) []sttypes.Stream {
sts := make([]sttypes.Stream, 0, len(indexes)) sts := make([]sttypes.Stream, 0, len(indexes))

@ -70,6 +70,18 @@ func (st *testStream) ReadBytes() ([]byte, error) {
return nil, nil return nil, nil
} }
func (st *testStream) FailedTimes() int {
return 0
}
func (st *testStream) AddFailedTimes() {
return
}
func (st *testStream) ResetFailedRimes() {
return
}
func (st *testStream) Close() error { func (st *testStream) Close() error {
if st.closed { if st.closed {
return errors.New("already closed") return errors.New("already closed")

@ -10,6 +10,7 @@ import (
"github.com/harmony-one/abool" "github.com/harmony-one/abool"
"github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/internal/utils"
sttypes "github.com/harmony-one/harmony/p2p/stream/types" sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/harmony-one/harmony/shard"
"github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/network"
libp2p_peer "github.com/libp2p/go-libp2p/core/peer" libp2p_peer "github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/core/protocol"
@ -73,6 +74,9 @@ func newStreamManager(pid sttypes.ProtoID, host host, pf peerFinder, handleStrea
protoSpec, _ := sttypes.ProtoIDToProtoSpec(pid) protoSpec, _ := sttypes.ProtoIDToProtoSpec(pid)
fmt.Println("my peer id: ", host.ID().String())
fmt.Println("my proto id: ", pid)
return &streamManager{ return &streamManager{
myProtoID: pid, myProtoID: pid,
myProtoSpec: protoSpec, myProtoSpec: protoSpec,
@ -234,6 +238,9 @@ func (sm *streamManager) sanityCheckStream(st sttypes.Stream) error {
if mySpec.ShardID != rmSpec.ShardID { if mySpec.ShardID != rmSpec.ShardID {
return fmt.Errorf("unexpected shard ID: %v/%v", rmSpec.ShardID, mySpec.ShardID) return fmt.Errorf("unexpected shard ID: %v/%v", rmSpec.ShardID, mySpec.ShardID)
} }
if mySpec.ShardID == shard.BeaconChainShardID && !rmSpec.BeaconNode {
return fmt.Errorf("unexpected beacon node with shard ID: %v/%v", rmSpec.ShardID, mySpec.ShardID)
}
return nil return nil
} }
@ -323,7 +330,7 @@ func (sm *streamManager) discoverAndSetupStream(discCtx context.Context) (int, e
} }
func (sm *streamManager) discover(ctx context.Context) (<-chan libp2p_peer.AddrInfo, error) { func (sm *streamManager) discover(ctx context.Context) (<-chan libp2p_peer.AddrInfo, error) {
protoID := string(sm.myProtoID) protoID := sm.targetProtoID()
discBatch := sm.config.DiscBatch discBatch := sm.config.DiscBatch
if sm.config.HiCap-sm.streams.size() < sm.config.DiscBatch { if sm.config.HiCap-sm.streams.size() < sm.config.DiscBatch {
discBatch = sm.config.HiCap - sm.streams.size() discBatch = sm.config.HiCap - sm.streams.size()
@ -340,6 +347,14 @@ func (sm *streamManager) discover(ctx context.Context) (<-chan libp2p_peer.AddrI
return sm.pf.FindPeers(ctx2, protoID, discBatch) return sm.pf.FindPeers(ctx2, protoID, discBatch)
} }
func (sm *streamManager) targetProtoID() string {
targetSpec := sm.myProtoSpec
if targetSpec.ShardID == shard.BeaconChainShardID { // for beacon chain, only connect to beacon nodes
targetSpec.BeaconNode = true
}
return string(targetSpec.ToProtoID())
}
func (sm *streamManager) setupStreamWithPeer(ctx context.Context, pid libp2p_peer.ID) error { func (sm *streamManager) setupStreamWithPeer(ctx context.Context, pid libp2p_peer.ID) error {
timer := prometheus.NewTimer(setupStreamDuration.With(prometheus.Labels{"topic": string(sm.myProtoID)})) timer := prometheus.NewTimer(setupStreamDuration.With(prometheus.Labels{"topic": string(sm.myProtoID)}))
defer timer.ObserveDuration() defer timer.ObserveDuration()
@ -347,7 +362,7 @@ func (sm *streamManager) setupStreamWithPeer(ctx context.Context, pid libp2p_pee
nCtx, cancel := context.WithTimeout(ctx, connectTimeout) nCtx, cancel := context.WithTimeout(ctx, connectTimeout)
defer cancel() defer cancel()
st, err := sm.host.NewStream(nCtx, pid, protocol.ID(sm.myProtoID)) st, err := sm.host.NewStream(nCtx, pid, protocol.ID(sm.targetProtoID()))
if err != nil { if err != nil {
return err return err
} }
@ -392,6 +407,10 @@ func (ss *streamSet) get(id sttypes.StreamID) (sttypes.Stream, bool) {
ss.lock.RLock() ss.lock.RLock()
defer ss.lock.RUnlock() defer ss.lock.RUnlock()
if id == "" {
return nil, false
}
st, ok := ss.streams[id] st, ok := ss.streams[id]
return st, ok return st, ok
} }

@ -43,6 +43,36 @@ func (p *Protocol) GetBlocksByNumber(ctx context.Context, bns []uint64, opts ...
return return
} }
func (p *Protocol) GetRawBlocksByNumber(ctx context.Context, bns []uint64, opts ...Option) (blockBytes [][]byte, sigBytes [][]byte, stid sttypes.StreamID, err error) {
timer := p.doMetricClientRequest("getBlocksByNumber")
defer p.doMetricPostClientRequest("getBlocksByNumber", err, timer)
if len(bns) == 0 {
err = fmt.Errorf("zero block numbers requested")
return
}
if len(bns) > GetBlocksByNumAmountCap {
err = fmt.Errorf("number of blocks exceed cap of %v", GetBlocksByNumAmountCap)
return
}
req := newGetBlocksByNumberRequest(bns)
resp, stid, err := p.rm.DoRequest(ctx, req, opts...)
if err != nil {
// At this point, error can be context canceled, context timed out, or waiting queue
// is already full.
return
}
// Parse and return blocks
sResp, ok := resp.(*syncResponse)
if !ok || sResp == nil {
err = errors.New("not sync response")
return
}
blockBytes, sigBytes, err = req.parseBlockBytesAndSigs(sResp)
return
}
// GetCurrentBlockNumber get the current block number from remote node // GetCurrentBlockNumber get the current block number from remote node
func (p *Protocol) GetCurrentBlockNumber(ctx context.Context, opts ...Option) (bn uint64, stid sttypes.StreamID, err error) { func (p *Protocol) GetCurrentBlockNumber(ctx context.Context, opts ...Option) (bn uint64, stid sttypes.StreamID, err error) {
timer := p.doMetricClientRequest("getBlockNumber") timer := p.doMetricClientRequest("getBlockNumber")

@ -17,6 +17,9 @@ const (
// See comments for GetBlocksByNumAmountCap. // See comments for GetBlocksByNumAmountCap.
GetBlocksByHashesAmountCap = 10 GetBlocksByHashesAmountCap = 10
// MaxStreamFailures is the maximum allowed failures before stream gets removed
MaxStreamFailures = 3
// minAdvertiseInterval is the minimum advertise interval // minAdvertiseInterval is the minimum advertise interval
minAdvertiseInterval = 1 * time.Minute minAdvertiseInterval = 1 * time.Minute

@ -2,11 +2,13 @@ package sync
import ( import (
"context" "context"
"fmt"
"strconv" "strconv"
"time" "time"
"github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/event"
"github.com/harmony-one/harmony/consensus/engine" "github.com/harmony-one/harmony/consensus/engine"
"github.com/harmony-one/harmony/core"
nodeconfig "github.com/harmony-one/harmony/internal/configs/node" nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
shardingconfig "github.com/harmony-one/harmony/internal/configs/sharding" shardingconfig "github.com/harmony-one/harmony/internal/configs/sharding"
"github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/internal/utils"
@ -15,6 +17,7 @@ import (
"github.com/harmony-one/harmony/p2p/stream/common/requestmanager" "github.com/harmony-one/harmony/p2p/stream/common/requestmanager"
"github.com/harmony-one/harmony/p2p/stream/common/streammanager" "github.com/harmony-one/harmony/p2p/stream/common/streammanager"
sttypes "github.com/harmony-one/harmony/p2p/stream/types" sttypes "github.com/harmony-one/harmony/p2p/stream/types"
"github.com/harmony-one/harmony/shard"
"github.com/hashicorp/go-version" "github.com/hashicorp/go-version"
libp2p_host "github.com/libp2p/go-libp2p/core/host" libp2p_host "github.com/libp2p/go-libp2p/core/host"
libp2p_network "github.com/libp2p/go-libp2p/core/network" libp2p_network "github.com/libp2p/go-libp2p/core/network"
@ -40,6 +43,7 @@ type (
// Protocol is the protocol for sync streaming // Protocol is the protocol for sync streaming
Protocol struct { Protocol struct {
chain engine.ChainReader // provide SYNC data chain engine.ChainReader // provide SYNC data
beaconNode bool // is beacon node or shard chain node
schedule shardingconfig.Schedule // provide schedule information schedule shardingconfig.Schedule // provide schedule information
rl ratelimiter.RateLimiter // limit the incoming request rate rl ratelimiter.RateLimiter // limit the incoming request rate
sm streammanager.StreamManager // stream management sm streammanager.StreamManager // stream management
@ -74,8 +78,13 @@ type (
func NewProtocol(config Config) *Protocol { func NewProtocol(config Config) *Protocol {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
isBeaconNode := config.Chain.ShardID() == shard.BeaconChainShardID
if _, ok := config.Chain.(*core.EpochChain); ok {
isBeaconNode = false
}
sp := &Protocol{ sp := &Protocol{
chain: config.Chain, chain: config.Chain,
beaconNode: isBeaconNode,
disc: config.Discovery, disc: config.Discovery,
config: config, config: config,
ctx: ctx, ctx: ctx,
@ -162,6 +171,7 @@ func (p *Protocol) HandleStream(raw libp2p_network.Stream) {
Msg("failed to add new stream") Msg("failed to add new stream")
return return
} }
fmt.Println("Node connected to", raw.Conn().RemotePeer().String(), "(", st.ProtoID(), ")")
st.run() st.run()
} }
@ -219,18 +229,40 @@ func (p *Protocol) protoIDByVersion(v *version.Version) sttypes.ProtoID {
NetworkType: p.config.Network, NetworkType: p.config.Network,
ShardID: p.config.ShardID, ShardID: p.config.ShardID,
Version: v, Version: v,
BeaconNode: p.beaconNode,
} }
return spec.ToProtoID() return spec.ToProtoID()
} }
// RemoveStream removes the stream of the given stream ID // RemoveStream removes the stream of the given stream ID
// TODO: add reason to parameters
func (p *Protocol) RemoveStream(stID sttypes.StreamID) { func (p *Protocol) RemoveStream(stID sttypes.StreamID) {
if stID == "" { st, exist := p.sm.GetStreamByID(stID)
return if exist && st != nil {
//TODO: log this incident with reason
st.Close()
// stream manager removes this stream from the list and triggers discovery if number of streams are not enough
p.sm.RemoveStream(stID) //TODO: double check to see if this part is needed
} }
}
func (p *Protocol) StreamFailed(stID sttypes.StreamID, reason string) {
st, exist := p.sm.GetStreamByID(stID) st, exist := p.sm.GetStreamByID(stID)
if exist && st != nil { if exist && st != nil {
st.AddFailedTimes()
p.logger.Info().
Str("stream ID", string(st.ID())).
Int("num failures", st.FailedTimes()).
Str("reason", reason).
Msg("stream failed")
if st.FailedTimes() >= MaxStreamFailures {
st.Close() st.Close()
// stream manager removes this stream from the list and triggers discovery if number of streams are not enough
p.sm.RemoveStream(stID) //TODO: double check to see if this part is needed
p.logger.Warn().
Str("stream ID", string(st.ID())).
Msg("stream removed")
}
} }
} }

@ -21,6 +21,9 @@ type Stream interface {
ReadBytes() ([]byte, error) ReadBytes() ([]byte, error)
Close() error Close() error
CloseOnExit() error CloseOnExit() error
FailedTimes() int
AddFailedTimes()
ResetFailedRimes()
} }
// BaseStream is the wrapper around // BaseStream is the wrapper around
@ -34,6 +37,8 @@ type BaseStream struct {
spec ProtoSpec spec ProtoSpec
specErr error specErr error
specOnce sync.Once specOnce sync.Once
failedTimes int
} }
// NewBaseStream creates BaseStream as the wrapper of libp2p Stream // NewBaseStream creates BaseStream as the wrapper of libp2p Stream
@ -42,6 +47,7 @@ func NewBaseStream(st libp2p_network.Stream) *BaseStream {
return &BaseStream{ return &BaseStream{
raw: st, raw: st,
reader: reader, reader: reader,
failedTimes: 0,
} }
} }
@ -72,6 +78,18 @@ func (st *BaseStream) Close() error {
return st.raw.Reset() return st.raw.Reset()
} }
func (st *BaseStream) FailedTimes() int {
return st.failedTimes
}
func (st *BaseStream) AddFailedTimes() {
st.failedTimes++
}
func (st *BaseStream) ResetFailedRimes() {
st.failedTimes = 0
}
const ( const (
maxMsgBytes = 20 * 1024 * 1024 // 20MB maxMsgBytes = 20 * 1024 * 1024 // 20MB
sizeBytes = 4 // uint32 sizeBytes = 4 // uint32

@ -11,7 +11,7 @@ import (
nodeconfig "github.com/harmony-one/harmony/internal/configs/node" nodeconfig "github.com/harmony-one/harmony/internal/configs/node"
"github.com/hashicorp/go-version" "github.com/hashicorp/go-version"
libp2p_proto "github.com/libp2p/go-libp2p/core/protocol" libp2p_proto "github.com/libp2p/go-libp2p-core/protocol"
"github.com/pkg/errors" "github.com/pkg/errors"
) )
@ -20,10 +20,10 @@ const (
ProtoIDCommonPrefix = "harmony" ProtoIDCommonPrefix = "harmony"
// ProtoIDFormat is the format of stream protocol ID // ProtoIDFormat is the format of stream protocol ID
ProtoIDFormat = "%s/%s/%s/%d/%s" ProtoIDFormat = "%s/%s/%s/%d/%s/%d"
// protoIDNumElem is the number of elements of the ProtoID. See comments in ProtoID // protoIDNumElem is the number of elements of the ProtoID. See comments in ProtoID
protoIDNumElem = 5 protoIDNumElem = 6
) )
// ProtoID is the protocol id for streaming, an alias of libp2p stream protocol ID。 // ProtoID is the protocol id for streaming, an alias of libp2p stream protocol ID。
@ -32,6 +32,7 @@ const (
// 2. NetworkType - mainnet, testnet, stn, e.t.c. // 2. NetworkType - mainnet, testnet, stn, e.t.c.
// 3. ShardID - shard ID of the current protocol. // 3. ShardID - shard ID of the current protocol.
// 4. Version - Stream protocol version for backward compatibility. // 4. Version - Stream protocol version for backward compatibility.
// 5. BeaconNode - whether stream is from a beacon chain node or shard chain node
type ProtoID libp2p_proto.ID type ProtoID libp2p_proto.ID
// ProtoSpec is the un-serialized stream proto id specification // ProtoSpec is the un-serialized stream proto id specification
@ -43,12 +44,13 @@ type ProtoSpec struct {
NetworkType nodeconfig.NetworkType NetworkType nodeconfig.NetworkType
ShardID nodeconfig.ShardID ShardID nodeconfig.ShardID
Version *version.Version Version *version.Version
BeaconNode bool
} }
// ToProtoID convert a ProtoSpec to ProtoID. // ToProtoID convert a ProtoSpec to ProtoID.
func (spec ProtoSpec) ToProtoID() ProtoID { func (spec ProtoSpec) ToProtoID() ProtoID {
s := fmt.Sprintf(ProtoIDFormat, ProtoIDCommonPrefix, spec.Service, s := fmt.Sprintf(ProtoIDFormat, ProtoIDCommonPrefix, spec.Service,
spec.NetworkType, spec.ShardID, spec.Version.String()) spec.NetworkType, spec.ShardID, spec.Version.String(), bool2int(spec.BeaconNode))
return ProtoID(s) return ProtoID(s)
} }
@ -64,6 +66,7 @@ func ProtoIDToProtoSpec(id ProtoID) (ProtoSpec, error) {
networkType = comps[2] networkType = comps[2]
shardIDStr = comps[3] shardIDStr = comps[3]
versionStr = comps[4] versionStr = comps[4]
beaconnodeStr = comps[5]
) )
shardID, err := strconv.Atoi(shardIDStr) shardID, err := strconv.Atoi(shardIDStr)
if err != nil { if err != nil {
@ -76,11 +79,16 @@ func ProtoIDToProtoSpec(id ProtoID) (ProtoSpec, error) {
if err != nil { if err != nil {
return ProtoSpec{}, errors.Wrap(err, "unexpected version string") return ProtoSpec{}, errors.Wrap(err, "unexpected version string")
} }
isBeaconNode, err := strconv.Atoi(beaconnodeStr)
if err != nil {
return ProtoSpec{}, errors.Wrap(err, "invalid beacon node flag")
}
return ProtoSpec{ return ProtoSpec{
Service: service, Service: service,
NetworkType: nodeconfig.NetworkType(networkType), NetworkType: nodeconfig.NetworkType(networkType),
ShardID: nodeconfig.ShardID(uint32(shardID)), ShardID: nodeconfig.ShardID(uint32(shardID)),
Version: version, Version: version,
BeaconNode: int2bool(isBeaconNode),
}, nil }, nil
} }
@ -90,3 +98,14 @@ func GenReqID() uint64 {
rand.Read(rnd[:]) rand.Read(rnd[:])
return binary.BigEndian.Uint64(rnd[:]) return binary.BigEndian.Uint64(rnd[:])
} }
func bool2int(b bool) int {
if b {
return 1
}
return 0
}
func int2bool(i int) bool {
return i > 0
}

Loading…
Cancel
Save