diff --git a/api/service/legacysync/downloader/client.go b/api/service/legacysync/downloader/client.go index 42d5954b8..cb2ea3f4d 100644 --- a/api/service/legacysync/downloader/client.go +++ b/api/service/legacysync/downloader/client.go @@ -8,6 +8,7 @@ import ( pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" "github.com/harmony-one/harmony/internal/utils" "google.golang.org/grpc" + "google.golang.org/grpc/connectivity" ) // Client is the client model for downloader package. @@ -18,9 +19,12 @@ type Client struct { } // ClientSetup setups a Client given ip and port. -func ClientSetup(ip, port string) *Client { +func ClientSetup(ip, port string, withBlock bool) *Client { client := Client{} client.opts = append(client.opts, grpc.WithInsecure()) + if withBlock { + client.opts = append(client.opts, grpc.WithBlock()) + } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -35,6 +39,37 @@ func ClientSetup(ip, port string) *Client { return &client } +// IsReady returns true if client is ready +func (client *Client) IsReady() bool { + return client.conn.GetState() == connectivity.Ready +} + +// IsConnecting returns true if client is connecting +func (client *Client) IsConnecting() bool { + return client.conn.GetState() == connectivity.Connecting +} + +// State returns current Connecting state +func (client *Client) State() connectivity.State { + return client.conn.GetState() +} + +// WaitForConnection waits for client to connect +func (client *Client) WaitForConnection(t time.Duration) bool { + ctx, cancel := context.WithTimeout(context.Background(), t) + defer cancel() + + if client.conn.GetState() == connectivity.Ready { + return true + } + + if ready := client.conn.WaitForStateChange(ctx, client.conn.GetState()); !ready { + return false + } else { + return client.conn.GetState() == connectivity.Ready + } +} + // Close closes the Client. func (client *Client) Close() { err := client.conn.Close() diff --git a/api/service/legacysync/epoch_syncing.go b/api/service/legacysync/epoch_syncing.go index 6697c90c2..f2d222b29 100644 --- a/api/service/legacysync/epoch_syncing.go +++ b/api/service/legacysync/epoch_syncing.go @@ -37,13 +37,13 @@ type EpochSync struct { // If the last result is expired, ask the remote DNS nodes for latest height and return the result. func (ss *EpochSync) GetSyncStatus() SyncCheckResult { return ss.syncStatus.Get(func() SyncCheckResult { - return ss.isInSync(false) + return ss.isSynchronized(false) }) } -// isInSync query the remote DNS node for the latest height to check what is the current +// isSynchronized query the remote DNS node for the latest height to check what is the current // sync status -func (ss *EpochSync) isInSync(_ bool) SyncCheckResult { +func (ss *EpochSync) isSynchronized(_ bool) SyncCheckResult { if ss.syncConfig == nil { return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync } @@ -70,9 +70,9 @@ func (ss *EpochSync) isInSync(_ bool) SyncCheckResult { Uint64("CurrentEpoch", curEpoch). Msg("[EPOCHSYNC] Checking sync status") return SyncCheckResult{ - IsInSync: inSync, - OtherHeight: otherHeight1, - HeightDiff: epochDiff, + IsSynchronized: inSync, + OtherHeight: otherHeight1, + HeightDiff: epochDiff, } } diff --git a/api/service/legacysync/helpers.go b/api/service/legacysync/helpers.go index 90a5dd174..a54b3cd18 100644 --- a/api/service/legacysync/helpers.go +++ b/api/service/legacysync/helpers.go @@ -3,7 +3,6 @@ package legacysync import ( "fmt" "sync" - "time" "github.com/ethereum/go-ethereum/common/math" "github.com/harmony-one/harmony/api/service/legacysync/downloader" @@ -56,9 +55,6 @@ func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32) if err := checkPeersDuplicity(peers); err != nil { return syncConfig, err } - // limit the number of dns peers to connect - randSeed := time.Now().UnixNano() - peers = limitNumPeers(peers, randSeed) utils.Logger().Debug(). Int("len", len(peers)). @@ -78,7 +74,7 @@ func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32) wg.Add(1) go func(peer p2p.Peer) { defer wg.Done() - client := downloader.ClientSetup(peer.IP, peer.Port) + client := downloader.ClientSetup(peer.IP, peer.Port, false) if client == nil { return } diff --git a/api/service/legacysync/syncing.go b/api/service/legacysync/syncing.go index e3068e125..025ea5902 100644 --- a/api/service/legacysync/syncing.go +++ b/api/service/legacysync/syncing.go @@ -44,6 +44,9 @@ const ( numPeersHighBound = 5 downloadTaskBatch = 5 + + //LoopMinTime sync loop must take at least as this value, otherwise it waits for it + LoopMinTime = 0 ) // SyncPeerConfig is peer config to sync. @@ -588,7 +591,7 @@ func (ss *StateSync) downloadBlocks(bc core.BlockChain) { ss.syncConfig.RemovePeer(peerConfig, fmt.Sprintf("StateSync %d: error returned for GetBlocks: %s", ss.blockChain.ShardID(), err.Error())) return } - if err != nil || len(payload) == 0 { + if len(payload) == 0 { count++ utils.Logger().Error().Int("failNumber", count). Msg("[SYNC] downloadBlocks: no more retrievable blocks") @@ -855,7 +858,7 @@ func (ss *StateSync) UpdateBlockAndStatus(block *types.Block, bc core.BlockChain haveCurrentSig := len(block.GetCurrentCommitSig()) != 0 // Verify block signatures if block.NumberU64() > 1 { - // Verify signature every 100 blocks + // Verify signature every N blocks (which N is verifyHeaderBatchSize and can be adjusted in configs) verifySeal := block.NumberU64()%verifyHeaderBatchSize == 0 || verifyAllSig verifyCurrentSig := verifyAllSig && haveCurrentSig if verifyCurrentSig { @@ -894,7 +897,7 @@ func (ss *StateSync) UpdateBlockAndStatus(block *types.Block, bc core.BlockChain utils.Logger().Error(). Err(err). Msgf( - "[SYNC] UpdateBlockAndStatus: Error adding newck to blockchain %d %d", + "[SYNC] UpdateBlockAndStatus: Error adding new block to blockchain %d %d", block.NumberU64(), block.ShardID(), ) @@ -1058,11 +1061,14 @@ func (ss *StateSync) GetMaxPeerHeight() uint64 { } // SyncLoop will keep syncing with peers until catches up -func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus) { +func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus, loopMinTime time.Duration) { + utils.Logger().Info().Msgf("legacy sync is executing ...") if !isBeacon { ss.RegisterNodeInfo() } + for { + start := time.Now() otherHeight := getMaxPeerHeight(ss.syncConfig) currentHeight := bc.CurrentBlock().NumberU64() if currentHeight >= otherHeight { @@ -1089,6 +1095,14 @@ func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeaco break } ss.purgeOldBlocksFromCache() + + if loopMinTime != 0 { + waitTime := loopMinTime - time.Since(start) + c := time.After(waitTime) + select { + case <-c: + } + } } if consensus != nil { if err := ss.addConsensusLastMile(bc, consensus); err != nil { @@ -1099,6 +1113,7 @@ func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeaco consensus.UpdateConsensusInformation() } } + utils.Logger().Info().Msgf("legacy sync is executed") ss.purgeAllBlocksFromCache() } @@ -1149,12 +1164,19 @@ type ( } SyncCheckResult struct { - IsInSync bool - OtherHeight uint64 - HeightDiff uint64 + IsSynchronized bool + OtherHeight uint64 + HeightDiff uint64 } ) +func ParseResult(res SyncCheckResult) (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + IsSynchronized = res.IsSynchronized + OtherHeight = res.OtherHeight + HeightDiff = res.HeightDiff + return IsSynchronized, OtherHeight, HeightDiff +} + func newSyncStatus(role nodeconfig.Role) syncStatus { expiration := getSyncStatusExpiration(role) return syncStatus{ @@ -1200,6 +1222,11 @@ func (status *syncStatus) Clone() syncStatus { } } +func (ss *StateSync) IsSynchronized() bool { + result := ss.GetSyncStatus() + return result.IsSynchronized +} + func (status *syncStatus) expired() bool { return time.Since(status.lastUpdateTime) > status.expiration } @@ -1214,20 +1241,32 @@ func (status *syncStatus) update(result SyncCheckResult) { // If the last result is expired, ask the remote DNS nodes for latest height and return the result. func (ss *StateSync) GetSyncStatus() SyncCheckResult { return ss.syncStatus.Get(func() SyncCheckResult { - return ss.isInSync(false) + return ss.isSynchronized(false) }) } +func (ss *StateSync) GetParsedSyncStatus() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + res := ss.syncStatus.Get(func() SyncCheckResult { + return ss.isSynchronized(false) + }) + return ParseResult(res) +} + // GetSyncStatusDoubleChecked return the sync status when enforcing a immediate query on DNS nodes // with a double check to avoid false alarm. func (ss *StateSync) GetSyncStatusDoubleChecked() SyncCheckResult { - result := ss.isInSync(true) + result := ss.isSynchronized(true) return result } -// isInSync query the remote DNS node for the latest height to check what is the current +func (ss *StateSync) GetParsedSyncStatusDoubleChecked() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + result := ss.isSynchronized(true) + return ParseResult(result) +} + +// isSynchronized query the remote DNS node for the latest height to check what is the current // sync status -func (ss *StateSync) isInSync(doubleCheck bool) SyncCheckResult { +func (ss *StateSync) isSynchronized(doubleCheck bool) SyncCheckResult { if ss.syncConfig == nil { return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync } @@ -1245,9 +1284,9 @@ func (ss *StateSync) isInSync(doubleCheck bool) SyncCheckResult { Uint64("lastHeight", lastHeight). Msg("[SYNC] Checking sync status") return SyncCheckResult{ - IsInSync: !wasOutOfSync, - OtherHeight: otherHeight1, - HeightDiff: heightDiff, + IsSynchronized: !wasOutOfSync, + OtherHeight: otherHeight1, + HeightDiff: heightDiff, } } // double check the sync status after 1 second to confirm (avoid false alarm) @@ -1269,8 +1308,8 @@ func (ss *StateSync) isInSync(doubleCheck bool) SyncCheckResult { heightDiff = 0 // overflow } return SyncCheckResult{ - IsInSync: !(wasOutOfSync && isOutOfSync && lastHeight == currentHeight), - OtherHeight: otherHeight2, - HeightDiff: heightDiff, + IsSynchronized: !(wasOutOfSync && isOutOfSync && lastHeight == currentHeight), + OtherHeight: otherHeight2, + HeightDiff: heightDiff, } } diff --git a/api/service/legacysync/syncing_test.go b/api/service/legacysync/syncing_test.go index ecccabd15..954c757d1 100644 --- a/api/service/legacysync/syncing_test.go +++ b/api/service/legacysync/syncing_test.go @@ -280,7 +280,7 @@ func TestSyncStatus_Get_Concurrency(t *testing.T) { fb := func() SyncCheckResult { time.Sleep(1 * time.Second) atomic.AddInt32(&updated, 1) - return SyncCheckResult{IsInSync: true} + return SyncCheckResult{IsSynchronized: true} } for i := 0; i != 20; i++ { wg.Add(1) diff --git a/api/service/stagedsync/default_stages.go b/api/service/stagedsync/default_stages.go new file mode 100644 index 000000000..0521f30f5 --- /dev/null +++ b/api/service/stagedsync/default_stages.go @@ -0,0 +1,86 @@ +package stagedsync + +import ( + "context" +) + +type ForwardOrder []SyncStageID +type RevertOrder []SyncStageID +type CleanUpOrder []SyncStageID + +var DefaultForwardOrder = ForwardOrder{ + Heads, + BlockHashes, + BlockBodies, + // Stages below don't use Internet + States, + LastMile, + Finish, +} + +var DefaultRevertOrder = RevertOrder{ + Finish, + LastMile, + States, + BlockBodies, + BlockHashes, + Heads, +} + +var DefaultCleanUpOrder = CleanUpOrder{ + Finish, + LastMile, + States, + BlockBodies, + BlockHashes, + Heads, +} + +func DefaultStages(ctx context.Context, + headsCfg StageHeadsCfg, + blockHashesCfg StageBlockHashesCfg, + bodiesCfg StageBodiesCfg, + statesCfg StageStatesCfg, + lastMileCfg StageLastMileCfg, + finishCfg StageFinishCfg) []*Stage { + + handlerStageHeads := NewStageHeads(headsCfg) + handlerStageBlockHashes := NewStageBlockHashes(blockHashesCfg) + handlerStageBodies := NewStageBodies(bodiesCfg) + handleStageStates := NewStageStates(statesCfg) + handlerStageLastMile := NewStageLastMile(lastMileCfg) + handlerStageFinish := NewStageFinish(finishCfg) + + return []*Stage{ + { + ID: Heads, + Description: "Retrieve Chain Heads", + Handler: handlerStageHeads, + }, + { + ID: BlockHashes, + Description: "Download block hashes", + Handler: handlerStageBlockHashes, + }, + { + ID: BlockBodies, + Description: "Download block bodies", + Handler: handlerStageBodies, + }, + { + ID: States, + Description: "Insert new blocks and update blockchain states", + Handler: handleStageStates, + }, + { + ID: LastMile, + Description: "update status for blocks after sync and update last mile blocks as well", + Handler: handlerStageLastMile, + }, + { + ID: Finish, + Description: "Final stage to update current block for the RPC API", + Handler: handlerStageFinish, + }, + } +} diff --git a/api/service/stagedsync/errors.go b/api/service/stagedsync/errors.go new file mode 100644 index 000000000..148a89890 --- /dev/null +++ b/api/service/stagedsync/errors.go @@ -0,0 +1,51 @@ +package stagedsync + +import ( + "fmt" +) + +// Errors ... +var ( + ErrRegistrationFail = WrapStagedSyncError("registration failed") + ErrGetBlock = WrapStagedSyncError("get block failed") + ErrGetBlockHash = WrapStagedSyncError("get block hash failed") + ErrGetConsensusHashes = WrapStagedSyncError("get consensus hashes failed") + ErrGenStateSyncTaskQueue = WrapStagedSyncError("generate state sync task queue failed") + ErrDownloadBlocks = WrapStagedSyncError("get download blocks failed") + ErrUpdateBlockAndStatus = WrapStagedSyncError("update block and status failed") + ErrGenerateNewState = WrapStagedSyncError("get generate new state failed") + ErrFetchBlockHashProgressFail = WrapStagedSyncError("fetch cache progress for block hashes stage failed") + ErrFetchCachedBlockHashFail = WrapStagedSyncError("fetch cached block hashes failed") + ErrNotEnoughBlockHashes = WrapStagedSyncError("peers haven't sent all requested block hashes") + ErrRetrieveCachedProgressFail = WrapStagedSyncError("retrieving cache progress for block hashes stage failed") + ErrRetrieveCachedHashProgressFail = WrapStagedSyncError("retrieving cache progress for block hashes stage failed") + ErrSaveBlockHashesProgressFail = WrapStagedSyncError("saving progress for block hashes stage failed") + ErrSaveCachedBlockHashesProgressFail = WrapStagedSyncError("saving cache progress for block hashes stage failed") + ErrSavingCacheLastBlockHashFail = WrapStagedSyncError("saving cache last block hash for block hashes stage failed") + ErrCachingBlockHashFail = WrapStagedSyncError("caching downloaded block hashes failed") + ErrCommitTransactionFail = WrapStagedSyncError("failed to write db commit") + ErrUnexpectedNumberOfBlocks = WrapStagedSyncError("unexpected number of block delivered") + ErrSavingBodiesProgressFail = WrapStagedSyncError("saving progress for block bodies stage failed") + ErrAddTasksToQueueFail = WrapStagedSyncError("cannot add task to queue") + ErrSavingCachedBodiesProgressFail = WrapStagedSyncError("saving cache progress for blocks stage failed") + ErrRetrievingCachedBodiesProgressFail = WrapStagedSyncError("retrieving cache progress for blocks stage failed") + ErrNoConnectedPeers = WrapStagedSyncError("haven't connected to any peer yet!") + ErrNotEnoughConnectedPeers = WrapStagedSyncError("not enough connected peers") + ErrSaveStateProgressFail = WrapStagedSyncError("saving progress for block States stage failed") + ErrPruningCursorCreationFail = WrapStagedSyncError("failed to create cursor for pruning") + ErrInvalidBlockNumber = WrapStagedSyncError("invalid block number") + ErrInvalidBlockBytes = WrapStagedSyncError("invalid block bytes to insert into chain") + ErrAddTaskFailed = WrapStagedSyncError("cannot add task to queue") + ErrNodeNotEnoughBlockHashes = WrapStagedSyncError("some of the nodes didn't provide all block hashes") + ErrCachingBlocksFail = WrapStagedSyncError("caching downloaded block bodies failed") + ErrSaveBlocksFail = WrapStagedSyncError("save downloaded block bodies failed") + ErrStageNotFound = WrapStagedSyncError("stage not found") + ErrSomeNodesNotReady = WrapStagedSyncError("some nodes are not ready") + ErrSomeNodesBlockHashFail = WrapStagedSyncError("some nodes failed to download block hashes") + ErrMaxPeerHeightFail = WrapStagedSyncError("get max peer height failed") +) + +// WrapStagedSyncError wraps errors for staged sync and returns error object +func WrapStagedSyncError(context string) error { + return fmt.Errorf("[STAGED_SYNC]: %s", context) +} diff --git a/api/service/stagedsync/stage.go b/api/service/stagedsync/stage.go new file mode 100644 index 000000000..74fb83616 --- /dev/null +++ b/api/service/stagedsync/stage.go @@ -0,0 +1,106 @@ +package stagedsync + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type ExecFunc func(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error + +type StageHandler interface { + // Exec is the execution function for the stage to move forward. + // * firstCycle - is it the first cycle of syncing. + // * invalidBlockRevert - whether the execution is to solve the invalid block + // * s - is the current state of the stage and contains stage data. + // * reverter - if the stage needs to cause reverting, `reverter` methods can be used. + Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error + + // Revert is the reverting logic of the stage. + // * firstCycle - is it the first cycle of syncing. + // * u - contains information about the revert itself. + // * s - represents the state of this stage at the beginning of revert. + Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) error + + // CleanUp is the execution function for the stage to prune old data. + // * firstCycle - is it the first cycle of syncing. + // * p - is the current state of the stage and contains stage data. + CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) error +} + +// Stage is a single sync stage in staged sync. +type Stage struct { + // ID of the sync stage. Should not be empty and should be unique. It is recommended to prefix it with reverse domain to avoid clashes (`com.example.my-stage`). + ID SyncStageID + // Handler handles the logic for the stage + Handler StageHandler + // Description is a string that is shown in the logs. + Description string + // DisabledDescription shows in the log with a message if the stage is disabled. Here, you can show which command line flags should be provided to enable the page. + DisabledDescription string + // Disabled defines if the stage is disabled. It sets up when the stage is build by its `StageBuilder`. + Disabled bool +} + +// StageState is the state of the stage. +type StageState struct { + state *StagedSync + ID SyncStageID + BlockNumber uint64 // BlockNumber is the current block number of the stage at the beginning of the state execution. +} + +func (s *StageState) LogPrefix() string { return s.state.LogPrefix() } + +func (s *StageState) CurrentStageProgress(db kv.Getter) (uint64, error) { + return GetStageProgress(db, s.ID, s.state.isBeacon) +} + +func (s *StageState) StageProgress(db kv.Getter, id SyncStageID) (uint64, error) { + return GetStageProgress(db, id, s.state.isBeacon) +} + +// Update updates the stage state (current block number) in the database. Can be called multiple times during stage execution. +func (s *StageState) Update(db kv.Putter, newBlockNum uint64) error { + return SaveStageProgress(db, s.ID, s.state.isBeacon, newBlockNum) +} +func (s *StageState) UpdateCleanUp(db kv.Putter, blockNum uint64) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum) +} + +// Reverter allows the stage to cause an revert. +type Reverter interface { + // RevertTo begins staged sync revert to the specified block. + RevertTo(revertPoint uint64, invalidBlock common.Hash) +} + +// RevertState contains the information about revert. +type RevertState struct { + ID SyncStageID + // RevertPoint is the block to revert to. + RevertPoint uint64 + CurrentBlockNumber uint64 + // If revert is caused by a bad block, this hash is not empty + InvalidBlock common.Hash + state *StagedSync +} + +func (u *RevertState) LogPrefix() string { return u.state.LogPrefix() } + +// Done updates the DB state of the stage. +func (u *RevertState) Done(db kv.Putter) error { + return SaveStageProgress(db, u.ID, u.state.isBeacon, u.RevertPoint) +} + +type CleanUpState struct { + ID SyncStageID + ForwardProgress uint64 // progress of stage forward move + CleanUpProgress uint64 // progress of stage prune move. after sync cycle it become equal to ForwardProgress by Done() method + state *StagedSync +} + +func (s *CleanUpState) LogPrefix() string { return s.state.LogPrefix() + " CleanUp" } +func (s *CleanUpState) Done(db kv.Putter) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, s.ForwardProgress) +} +func (s *CleanUpState) DoneAt(db kv.Putter, blockNum uint64) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum) +} diff --git a/api/service/stagedsync/stage_blockhashes.go b/api/service/stagedsync/stage_blockhashes.go new file mode 100644 index 000000000..1f049d52a --- /dev/null +++ b/api/service/stagedsync/stage_blockhashes.go @@ -0,0 +1,698 @@ +package stagedsync + +import ( + "context" + "encoding/hex" + "fmt" + "strconv" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/log/v3" +) + +type StageBlockHashes struct { + configs StageBlockHashesCfg +} + +type StageBlockHashesCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB + turbo bool + turboModeCh chan struct{} + bgProcRunning bool + isBeacon bool + cachedb kv.RwDB + logProgress bool +} + +func NewStageBlockHashes(cfg StageBlockHashesCfg) *StageBlockHashes { + return &StageBlockHashes{ + configs: cfg, + } +} + +func NewStageBlockHashesCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB, isBeacon bool, turbo bool, logProgress bool) StageBlockHashesCfg { + cachedb, err := initHashesCacheDB(ctx, isBeacon) + if err != nil { + panic("can't initialize sync caches") + } + return StageBlockHashesCfg{ + ctx: ctx, + bc: bc, + db: db, + turbo: turbo, + isBeacon: isBeacon, + cachedb: cachedb, + logProgress: logProgress, + } +} + +func initHashesCacheDB(ctx context.Context, isBeacon bool) (db kv.RwDB, err error) { + // create caches db + cachedbName := BlockHashesCacheDB + if isBeacon { + cachedbName = "beacon_" + cachedbName + } + cachedb := mdbx.NewMDBX(log.New()).Path(cachedbName).MustOpen() + // create transaction on cachedb + tx, errRW := cachedb.BeginRw(ctx) + if errRW != nil { + utils.Logger().Error(). + Err(errRW). + Msg("[STAGED_SYNC] initializing sync caches failed") + return nil, errRW + } + defer tx.Rollback() + if err := tx.CreateBucket(BlockHashesBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] creating cache bucket failed") + return nil, err + } + if err := tx.CreateBucket(StageProgressBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] creating progress bucket failed") + return nil, err + } + if err := tx.Commit(); err != nil { + return nil, err + } + return cachedb, nil +} + +func (bh *StageBlockHashes) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + if len(s.state.syncConfig.peers) < NumPeersLowBound { + return ErrNotEnoughConnectedPeers + } + + maxPeersHeight := s.state.syncStatus.MaxPeersHeight + currentHead := bh.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxPeersHeight { + return nil + } + currProgress := uint64(0) + targetHeight := s.state.syncStatus.currentCycle.TargetHeight + isBeacon := s.state.isBeacon + startHash := bh.configs.bc.CurrentBlock().Hash() + isLastCycle := targetHeight >= maxPeersHeight + canRunInTurboMode := bh.configs.turbo && !isLastCycle + // retrieve the progress + if errV := CreateView(bh.configs.ctx, bh.configs.db, tx, func(etx kv.Tx) error { + if currProgress, err = s.CurrentStageProgress(etx); err != nil { //GetStageProgress(etx, BlockHashes, isBeacon); err != nil { + return err + } + if currProgress > 0 { + key := strconv.FormatUint(currProgress, 10) + bucketName := GetBucketName(BlockHashesBucket, isBeacon) + currHash := []byte{} + if currHash, err = etx.GetOne(bucketName, []byte(key)); err != nil || len(currHash[:]) == 0 { + //TODO: currProgress and DB don't match. Either re-download all or verify db and set currProgress to last + return err + } + startHash.SetBytes(currHash[:]) + } + return nil + }); errV != nil { + return errV + } + + if currProgress == 0 { + if err := bh.clearBlockHashesBucket(tx, s.state.isBeacon); err != nil { + return err + } + startHash = bh.configs.bc.CurrentBlock().Hash() + currProgress = currentHead + } + + if currProgress >= targetHeight { + if canRunInTurboMode && currProgress < maxPeersHeight { + bh.configs.turboModeCh = make(chan struct{}) + go bh.runBackgroundProcess(nil, s, isBeacon, currProgress, maxPeersHeight, startHash) + } + return nil + } + + // check whether any block hashes after curr height is cached + if bh.configs.turbo && !firstCycle { + var cacheHash []byte + if cacheHash, err = bh.getHashFromCache(currProgress + 1); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] fetch cache progress for block hashes stage failed") + } else { + if len(cacheHash[:]) > 0 { + // get blocks from cached db rather than calling peers, and update current progress + newProgress, newStartHash, err := bh.loadBlockHashesFromCache(s, cacheHash, currProgress, targetHeight, tx) + if err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] fetch cached block hashes failed") + bh.clearCache() + bh.clearBlockHashesBucket(tx, isBeacon) + } else { + currProgress = newProgress + startHash.SetBytes(newStartHash[:]) + } + } + } + } + + if currProgress >= targetHeight { + if canRunInTurboMode && currProgress < maxPeersHeight { + bh.configs.turboModeCh = make(chan struct{}) + go bh.runBackgroundProcess(nil, s, isBeacon, currProgress, maxPeersHeight, startHash) + } + return nil + } + + size := uint32(0) + + startTime := time.Now() + startBlock := currProgress + if bh.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + for ok := true; ok; ok = currProgress < targetHeight { + size = uint32(targetHeight - currProgress) + if size > SyncLoopBatchSize { + size = SyncLoopBatchSize + } + // Gets consensus hashes. + if err := s.state.getConsensusHashes(startHash[:], size, false); err != nil { + return err + } + // selects the most common peer config based on their block hashes and doing the clean up + if err := s.state.syncConfig.GetBlockHashesConsensusAndCleanUp(false); err != nil { + return err + } + // double check block hashes + if s.state.DoubleCheckBlockHashes { + invalidPeersMap, validBlockHashes, err := s.state.getInvalidPeersByBlockHashes(tx) + if err != nil { + return err + } + if validBlockHashes < int(size) { + return ErrNotEnoughBlockHashes + } + s.state.syncConfig.cleanUpInvalidPeers(invalidPeersMap) + } + // save the downloaded files to db + if currProgress, startHash, err = bh.saveDownloadedBlockHashes(s, currProgress, startHash, tx); err != nil { + return err + } + // log the stage progress in console + if bh.configs.logProgress { + //calculating block speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currProgress-startBlock) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("downloading block hash progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + } + } + + // continue downloading in background + if canRunInTurboMode && currProgress < maxPeersHeight { + bh.configs.turboModeCh = make(chan struct{}) + go bh.runBackgroundProcess(nil, s, isBeacon, currProgress, maxPeersHeight, startHash) + } + return nil +} + +// runBackgroundProcess continues downloading block hashes in the background and caching them on disk while next stages are running. +// In the next sync cycle, this stage will use cached block hashes rather than download them from peers. +// This helps performance and reduces stage duration. It also helps to use the resources more efficiently. +func (bh *StageBlockHashes) runBackgroundProcess(tx kv.RwTx, s *StageState, isBeacon bool, startHeight uint64, targetHeight uint64, startHash common.Hash) error { + size := uint32(0) + currProgress := startHeight + currHash := startHash + bh.configs.bgProcRunning = true + + defer func() { + if bh.configs.bgProcRunning { + close(bh.configs.turboModeCh) + bh.configs.bgProcRunning = false + } + }() + + // retrieve bg progress and last hash + errV := bh.configs.cachedb.View(context.Background(), func(rtx kv.Tx) error { + + if progressBytes, err := rtx.GetOne(StageProgressBucket, []byte(LastBlockHeight)); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] retrieving cache progress for block hashes stage failed") + return ErrRetrieveCachedProgressFail + } else { + if len(progressBytes[:]) > 0 { + savedProgress, _ := unmarshalData(progressBytes) + if savedProgress > startHeight { + currProgress = savedProgress + // retrieve start hash + if lastBlockHash, err := rtx.GetOne(StageProgressBucket, []byte(LastBlockHash)); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] retrieving cache progress for block hashes stage failed") + return ErrRetrieveCachedHashProgressFail + } else { + currHash.SetBytes(lastBlockHash[:]) + } + } + } + } + return nil + + }) + if errV != nil { + return errV + } + + for { + select { + case <-bh.configs.turboModeCh: + return nil + default: + if currProgress >= targetHeight { + return nil + } + + size = uint32(targetHeight - currProgress) + if size > SyncLoopBatchSize { + size = SyncLoopBatchSize + } + + // Gets consensus hashes. + if err := s.state.getConsensusHashes(currHash[:], size, true); err != nil { + return err + } + + // selects the most common peer config based on their block hashes and doing the clean up + if err := s.state.syncConfig.GetBlockHashesConsensusAndCleanUp(true); err != nil { + return err + } + + // save the downloaded files to db + var err error + if currProgress, currHash, err = bh.saveBlockHashesInCacheDB(s, currProgress, currHash); err != nil { + return err + } + } + //TODO: do we need sleep a few milliseconds? ex: time.Sleep(1 * time.Millisecond) + } +} + +func (bh *StageBlockHashes) clearBlockHashesBucket(tx kv.RwTx, isBeacon bool) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = bh.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + bucketName := GetBucketName(BlockHashesBucket, isBeacon) + if err := tx.ClearBucket(bucketName); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +// saveDownloadedBlockHashes saves block hashes to db (map from block heigh to block hash) +func (bh *StageBlockHashes) saveDownloadedBlockHashes(s *StageState, progress uint64, startHash common.Hash, tx kv.RwTx) (p uint64, h common.Hash, err error) { + p = progress + h.SetBytes(startHash.Bytes()) + lastAddedID := int(0) // the first id won't be added + saved := false + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = bh.configs.db.BeginRw(context.Background()) + if err != nil { + return p, h, err + } + defer tx.Rollback() + } + + s.state.syncConfig.ForEachPeer(func(configPeer *SyncPeerConfig) (brk bool) { + if len(configPeer.blockHashes) == 0 { + return //fetch the rest from other peer + } + + for id := 0; id < len(configPeer.blockHashes); id++ { + if id <= lastAddedID { + continue + } + blockHash := configPeer.blockHashes[id] + if len(blockHash) == 0 { + return //fetch the rest from other peer + } + key := strconv.FormatUint(p+1, 10) + bucketName := GetBucketName(BlockHashesBucket, s.state.isBeacon) + if err := tx.Put(bucketName, []byte(key), blockHash); err != nil { + utils.Logger().Error(). + Err(err). + Int("block hash index", id). + Str("block hash", hex.EncodeToString(blockHash)). + Msg("[STAGED_SYNC] adding block hash to db failed") + return + } + p++ + h.SetBytes(blockHash[:]) + lastAddedID = id + } + // check if all block hashes are added to db break the loop + if lastAddedID == len(configPeer.blockHashes)-1 { + saved = true + brk = true + } + return + }) + + // save progress + if err = s.Update(tx, p); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block hashes stage failed") + return progress, startHash, ErrSaveBlockHashesProgressFail + } + + if len(s.state.syncConfig.peers) > 0 && len(s.state.syncConfig.peers[0].blockHashes) > 0 && !saved { + return progress, startHash, ErrSaveBlockHashesProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return progress, startHash, err + } + } + return p, h, nil +} + +// saveBlockHashesInCacheDB saves block hashes to cache db (map from block heigh to block hash) +func (bh *StageBlockHashes) saveBlockHashesInCacheDB(s *StageState, progress uint64, startHash common.Hash) (p uint64, h common.Hash, err error) { + p = progress + h.SetBytes(startHash[:]) + lastAddedID := int(0) // the first id won't be added + saved := false + + etx, err := bh.configs.cachedb.BeginRw(context.Background()) + if err != nil { + return p, h, err + } + defer etx.Rollback() + + s.state.syncConfig.ForEachPeer(func(configPeer *SyncPeerConfig) (brk bool) { + for id, blockHash := range configPeer.blockHashes { + if id <= lastAddedID { + continue + } + key := strconv.FormatUint(p+1, 10) + if err := etx.Put(BlockHashesBucket, []byte(key), blockHash); err != nil { + utils.Logger().Error(). + Err(err). + Int("block hash index", id). + Str("block hash", hex.EncodeToString(blockHash)). + Msg("[STAGED_SYNC] adding block hash to db failed") + return + } + p++ + h.SetBytes(blockHash[:]) + lastAddedID = id + } + + // check if all block hashes are added to db break the loop + if lastAddedID == len(configPeer.blockHashes)-1 { + saved = true + brk = true + } + return + }) + + // save cache progress (last block height) + if err = etx.Put(StageProgressBucket, []byte(LastBlockHeight), marshalData(p)); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving cache progress for block hashes stage failed") + return p, h, ErrSaveCachedBlockHashesProgressFail + } + + // save cache progress + if err = etx.Put(StageProgressBucket, []byte(LastBlockHash), h.Bytes()); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving cache last block hash for block hashes stage failed") + return p, h, ErrSavingCacheLastBlockHashFail + } + + // if node was connected to other peers and had some hashes to store in db, but it failed to save the blocks, return error + if len(s.state.syncConfig.peers) > 0 && len(s.state.syncConfig.peers[0].blockHashes) > 0 && !saved { + return p, h, ErrCachingBlockHashFail + } + + // commit transaction to db to cache all downloaded blocks + if err := etx.Commit(); err != nil { + return p, h, err + } + + // it cached block hashes successfully, so, it returns the cache progress and last cached block hash + return p, h, nil +} + +// clearCache removes block hashes from cache db +func (bh *StageBlockHashes) clearCache() error { + tx, err := bh.configs.cachedb.BeginRw(context.Background()) + if err != nil { + return nil + } + defer tx.Rollback() + if err := tx.ClearBucket(BlockHashesBucket); err != nil { + return nil + } + + if err := tx.Commit(); err != nil { + return err + } + + return nil +} + +// getHashFromCache fetches block hashes from cache db +func (bh *StageBlockHashes) getHashFromCache(height uint64) (h []byte, err error) { + + tx, err := bh.configs.cachedb.BeginRw(context.Background()) + if err != nil { + return nil, err + } + defer tx.Rollback() + + var cacheHash []byte + key := strconv.FormatUint(height, 10) + if exist, err := tx.Has(BlockHashesBucket, []byte(key)); !exist || err != nil { + return nil, ErrFetchBlockHashProgressFail + } + if cacheHash, err = tx.GetOne(BlockHashesBucket, []byte(key)); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] fetch cache progress for block hashes stage failed") + return nil, ErrFetchBlockHashProgressFail + } + hv, _ := unmarshalData(cacheHash) + if len(cacheHash) <= 1 || hv == 0 { + return nil, ErrFetchBlockHashProgressFail + } + if err := tx.Commit(); err != nil { + return nil, err + } + + return cacheHash[:], nil +} + +// loadBlockHashesFromCache loads block hashes from cache db to main sync db and update the progress +func (bh *StageBlockHashes) loadBlockHashesFromCache(s *StageState, startHash []byte, startHeight uint64, targetHeight uint64, tx kv.RwTx) (p uint64, h common.Hash, err error) { + + p = startHeight + h.SetBytes(startHash[:]) + useInternalTx := tx == nil + if useInternalTx { + tx, err = bh.configs.db.BeginRw(bh.configs.ctx) + if err != nil { + return p, h, err + } + defer tx.Rollback() + } + + if errV := bh.configs.cachedb.View(context.Background(), func(rtx kv.Tx) error { + // load block hashes from cache db and copy them to main sync db + for ok := true; ok; ok = p < targetHeight { + key := strconv.FormatUint(p+1, 10) + lastHash, err := rtx.GetOne(BlockHashesBucket, []byte(key)) + if err != nil { + utils.Logger().Error(). + Err(err). + Str("block height", key). + Msg("[STAGED_SYNC] retrieve block hash from cache failed") + return err + } + if len(lastHash[:]) == 0 { + return nil + } + bucketName := GetBucketName(BlockHashesBucket, s.state.isBeacon) + if err = tx.Put(bucketName, []byte(key), lastHash); err != nil { + return err + } + h.SetBytes(lastHash[:]) + p++ + } + // load extra block hashes from cache db and copy them to bg db to be downloaded in background by block stage + s.state.syncStatus.currentCycle.lock.Lock() + defer s.state.syncStatus.currentCycle.lock.Unlock() + pExtraHashes := p + s.state.syncStatus.currentCycle.ExtraHashes = make(map[uint64][]byte) + for ok := true; ok; ok = pExtraHashes < p+s.state.MaxBackgroundBlocks { + key := strconv.FormatUint(pExtraHashes+1, 10) + newHash, err := rtx.GetOne(BlockHashesBucket, []byte(key)) + if err != nil { + utils.Logger().Error(). + Err(err). + Str("block height", key). + Msg("[STAGED_SYNC] retrieve extra block hashes for background process failed") + break + } + if len(newHash[:]) == 0 { + return nil + } + s.state.syncStatus.currentCycle.ExtraHashes[pExtraHashes+1] = newHash + pExtraHashes++ + } + return nil + }); errV != nil { + return startHeight, h, errV + } + + // save progress + if err = s.Update(tx, p); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving retrieved cached progress for block hashes stage failed") + h.SetBytes(startHash[:]) + return startHeight, h, err + } + + // update the progress + if useInternalTx { + if err := tx.Commit(); err != nil { + h.SetBytes(startHash[:]) + return startHeight, h, err + } + } + return p, h, nil +} + +func (bh *StageBlockHashes) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = bh.configs.db.BeginRw(bh.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // terminate background process in turbo mode + if bh.configs.bgProcRunning { + bh.configs.bgProcRunning = false + bh.configs.turboModeCh <- struct{}{} + close(bh.configs.turboModeCh) + } + + // clean block hashes db + hashesBucketName := GetBucketName(BlockHashesBucket, bh.configs.isBeacon) + if err = tx.ClearBucket(hashesBucketName); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] clear block hashes bucket after revert failed") + return err + } + + // clean cache db as well + if err := bh.clearCache(); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] clear block hashes cache failed") + return err + } + + // clear extra block hashes + s.state.syncStatus.currentCycle.ExtraHashes = make(map[uint64][]byte) + + // save progress + currentHead := bh.configs.bc.CurrentBlock().NumberU64() + if err = s.Update(tx, currentHead); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block hashes stage after revert failed") + return err + } + + if err = u.Done(tx); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] reset after revert failed") + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return ErrCommitTransactionFail + } + } + return nil +} + +func (bh *StageBlockHashes) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = bh.configs.db.BeginRw(bh.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // terminate background process in turbo mode + if bh.configs.bgProcRunning { + bh.configs.bgProcRunning = false + bh.configs.turboModeCh <- struct{}{} + close(bh.configs.turboModeCh) + } + + hashesBucketName := GetBucketName(BlockHashesBucket, bh.configs.isBeacon) + tx.ClearBucket(hashesBucketName) + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stage_bodies.go b/api/service/stagedsync/stage_bodies.go new file mode 100644 index 000000000..5f0d4a582 --- /dev/null +++ b/api/service/stagedsync/stage_bodies.go @@ -0,0 +1,784 @@ +package stagedsync + +import ( + "context" + "encoding/hex" + "fmt" + "strconv" + "sync" + "time" + + "github.com/Workiva/go-datastructures/queue" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/log/v3" +) + +type StageBodies struct { + configs StageBodiesCfg +} +type StageBodiesCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB + turbo bool + turboModeCh chan struct{} + bgProcRunning bool + isBeacon bool + cachedb kv.RwDB + logProgress bool +} + +func NewStageBodies(cfg StageBodiesCfg) *StageBodies { + return &StageBodies{ + configs: cfg, + } +} + +func NewStageBodiesCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB, isBeacon bool, turbo bool, logProgress bool) StageBodiesCfg { + cachedb, err := initBlocksCacheDB(ctx, isBeacon) + if err != nil { + panic("can't initialize sync caches") + } + return StageBodiesCfg{ + ctx: ctx, + bc: bc, + db: db, + turbo: turbo, + isBeacon: isBeacon, + cachedb: cachedb, + logProgress: logProgress, + } +} + +func initBlocksCacheDB(ctx context.Context, isBeacon bool) (db kv.RwDB, err error) { + // create caches db + cachedbName := BlockCacheDB + if isBeacon { + cachedbName = "beacon_" + cachedbName + } + cachedb := mdbx.NewMDBX(log.New()).Path(cachedbName).MustOpen() + tx, errRW := cachedb.BeginRw(ctx) + if errRW != nil { + utils.Logger().Error(). + Err(errRW). + Msg("[STAGED_SYNC] initializing sync caches failed") + return nil, errRW + } + defer tx.Rollback() + if err := tx.CreateBucket(DownloadedBlocksBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] creating cache bucket failed") + return nil, err + } + if err := tx.CreateBucket(StageProgressBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] creating progress bucket failed") + return nil, err + } + if err := tx.Commit(); err != nil { + return nil, err + } + return cachedb, nil +} + +// Exec progresses Bodies stage in the forward direction +func (b *StageBodies) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + maxPeersHeight := s.state.syncStatus.MaxPeersHeight + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxPeersHeight { + return nil + } + currProgress := uint64(0) + targetHeight := s.state.syncStatus.currentCycle.TargetHeight + isBeacon := s.state.isBeacon + isLastCycle := targetHeight >= maxPeersHeight + canRunInTurboMode := b.configs.turbo && !isLastCycle + + if errV := CreateView(b.configs.ctx, b.configs.db, tx, func(etx kv.Tx) error { + if currProgress, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + + if currProgress == 0 { + if err := b.clearBlocksBucket(tx, s.state.isBeacon); err != nil { + return err + } + currProgress = currentHead + } + + if currProgress >= targetHeight { + return nil + } + + // load cached blocks to main sync db + if b.configs.turbo && !firstCycle { + if currProgress, err = b.loadBlocksFromCache(s, currProgress, tx); err != nil { + return err + } + } + + if currProgress >= targetHeight { + return nil + } + + size := uint64(0) + startTime := time.Now() + startBlock := currProgress + if b.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + for ok := true; ok; ok = currProgress < targetHeight { + maxSize := targetHeight - currProgress + size = uint64(downloadTaskBatch * len(s.state.syncConfig.peers)) + if size > maxSize { + size = maxSize + } + if err = b.loadBlockHashesToTaskQueue(s, currProgress+1, size, tx); err != nil { + s.state.RevertTo(b.configs.bc.CurrentBlock().NumberU64(), b.configs.bc.CurrentBlock().Hash()) + return err + } + + // Download blocks. + verifyAllSig := true //TODO: move it to configs + if err = b.downloadBlocks(s, verifyAllSig, tx); err != nil { + return nil + } + // save blocks and update current progress + if currProgress, err = b.saveDownloadedBlocks(s, currProgress, tx); err != nil { + return err + } + // log the stage progress in console + if b.configs.logProgress { + //calculating block speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currProgress-startBlock) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("downloading blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + } + } + + // Run background process in turbo mode + if canRunInTurboMode && currProgress < maxPeersHeight { + b.configs.turboModeCh = make(chan struct{}) + go b.runBackgroundProcess(tx, s, isBeacon, currProgress, currProgress+s.state.MaxBackgroundBlocks) + } + return nil +} + +// runBackgroundProcess continues downloading blocks in the background and caching them on disk while next stages are running. +// In the next sync cycle, this stage will use cached blocks rather than download them from peers. +// This helps performance and reduces stage duration. It also helps to use the resources more efficiently. +func (b *StageBodies) runBackgroundProcess(tx kv.RwTx, s *StageState, isBeacon bool, startHeight uint64, targetHeight uint64) error { + + s.state.syncStatus.currentCycle.lock.RLock() + defer s.state.syncStatus.currentCycle.lock.RUnlock() + + if s.state.syncStatus.currentCycle.Number == 0 || len(s.state.syncStatus.currentCycle.ExtraHashes) == 0 { + return nil + } + currProgress := startHeight + var err error + size := uint64(0) + b.configs.bgProcRunning = true + + defer func() { + if b.configs.bgProcRunning { + close(b.configs.turboModeCh) + b.configs.bgProcRunning = false + } + }() + + for ok := true; ok; ok = currProgress < targetHeight { + select { + case <-b.configs.turboModeCh: + return nil + default: + if currProgress >= targetHeight { + return nil + } + + maxSize := targetHeight - currProgress + size = uint64(downloadTaskBatch * len(s.state.syncConfig.peers)) + if size > maxSize { + size = maxSize + } + if err = b.loadExtraBlockHashesToTaskQueue(s, currProgress+1, size); err != nil { + return err + } + // Download blocks. + verifyAllSig := true //TODO: move it to configs + if err = b.downloadBlocks(s, verifyAllSig, nil); err != nil { + return nil + } + // save blocks and update current progress + if currProgress, err = b.cacheBlocks(s, currProgress); err != nil { + return err + } + } + } + return nil +} + +func (b *StageBodies) clearBlocksBucket(tx kv.RwTx, isBeacon bool) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + bucketName := GetBucketName(DownloadedBlocksBucket, isBeacon) + if err := tx.ClearBucket(bucketName); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +// downloadBlocks downloads blocks from state sync task queue. +func (b *StageBodies) downloadBlocks(s *StageState, verifyAllSig bool, tx kv.RwTx) (err error) { + ss := s.state + var wg sync.WaitGroup + taskQueue := downloadTaskQueue{ss.stateSyncTaskQueue} + s.state.InitDownloadedBlocksMap() + + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + wg.Add(1) + go func() { + defer wg.Done() + if !peerConfig.client.IsReady() { + // try to connect + if ready := peerConfig.client.WaitForConnection(1000 * time.Millisecond); !ready { + if !peerConfig.client.IsConnecting() { // if it's idle or closed then remove it + ss.syncConfig.RemovePeer(peerConfig, "not ready to download blocks") + } + return + } + } + for !taskQueue.empty() { + tasks, err := taskQueue.poll(downloadTaskBatch, time.Millisecond) + if err != nil || len(tasks) == 0 { + if err == queue.ErrDisposed { + continue + } + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] downloadBlocks: ss.stateSyncTaskQueue poll timeout") + break + } + payload, err := peerConfig.GetBlocks(tasks.blockHashes()) + if err != nil { + isBrokenPeer := peerConfig.AddFailedTime(downloadBlocksRetryLimit) + utils.Logger().Error(). + Err(err). + Str("peerID", peerConfig.ip). + Str("port", peerConfig.port). + Msg("[STAGED_SYNC] downloadBlocks: GetBlocks failed") + if err := taskQueue.put(tasks); err != nil { + utils.Logger().Error(). + Err(err). + Interface("taskIndexes", tasks.indexes()). + Msg("cannot add task back to queue") + } + if isBrokenPeer { + ss.syncConfig.RemovePeer(peerConfig, "get blocks failed") + } + return + } + if len(payload) == 0 { + isBrokenPeer := peerConfig.AddFailedTime(downloadBlocksRetryLimit) + utils.Logger().Error(). + Str("peerID", peerConfig.ip). + Str("port", peerConfig.port). + Msg("[STAGED_SYNC] downloadBlocks: no more retrievable blocks") + if err := taskQueue.put(tasks); err != nil { + utils.Logger().Error(). + Err(err). + Interface("taskIndexes", tasks.indexes()). + Interface("taskBlockes", tasks.blockHashesStr()). + Msg("downloadBlocks: cannot add task") + } + if isBrokenPeer { + ss.syncConfig.RemovePeer(peerConfig, "no blocks in payload") + } + return + } + // node received blocks from peer, so it is working now + peerConfig.failedTimes = 0 + + failedTasks, err := b.handleBlockSyncResult(s, payload, tasks, verifyAllSig, tx) + if err != nil { + isBrokenPeer := peerConfig.AddFailedTime(downloadBlocksRetryLimit) + utils.Logger().Error(). + Err(err). + Str("peerID", peerConfig.ip). + Str("port", peerConfig.port). + Msg("[STAGED_SYNC] downloadBlocks: handleBlockSyncResult failed") + if err := taskQueue.put(tasks); err != nil { + utils.Logger().Error(). + Err(err). + Interface("taskIndexes", tasks.indexes()). + Interface("taskBlockes", tasks.blockHashesStr()). + Msg("downloadBlocks: cannot add task") + } + if isBrokenPeer { + ss.syncConfig.RemovePeer(peerConfig, "handleBlockSyncResult failed") + } + return + } + + if len(failedTasks) != 0 { + isBrokenPeer := peerConfig.AddFailedTime(downloadBlocksRetryLimit) + utils.Logger().Error(). + Str("peerID", peerConfig.ip). + Str("port", peerConfig.port). + Msg("[STAGED_SYNC] downloadBlocks: some tasks failed") + if err := taskQueue.put(failedTasks); err != nil { + utils.Logger().Error(). + Err(err). + Interface("task Indexes", failedTasks.indexes()). + Interface("task Blocks", tasks.blockHashesStr()). + Msg("cannot add task") + } + if isBrokenPeer { + ss.syncConfig.RemovePeer(peerConfig, "some blocks failed to handle") + } + return + } + } + }() + return + }) + wg.Wait() + return nil +} + +func (b *StageBodies) handleBlockSyncResult(s *StageState, payload [][]byte, tasks syncBlockTasks, verifyAllSig bool, tx kv.RwTx) (syncBlockTasks, error) { + if len(payload) > len(tasks) { + utils.Logger().Error(). + Err(ErrUnexpectedNumberOfBlocks). + Int("expect", len(tasks)). + Int("got", len(payload)) + return tasks, ErrUnexpectedNumberOfBlocks + } + + var failedTasks syncBlockTasks + if len(payload) < len(tasks) { + utils.Logger().Warn(). + Err(ErrUnexpectedNumberOfBlocks). + Int("expect", len(tasks)). + Int("got", len(payload)) + failedTasks = append(failedTasks, tasks[len(payload):]...) + } + + s.state.lockBlocks.Lock() + defer s.state.lockBlocks.Unlock() + + for i, blockBytes := range payload { + if len(blockBytes[:]) <= 1 { + failedTasks = append(failedTasks, tasks[i]) + continue + } + k := uint64(tasks[i].index) // fmt.Sprintf("%d", tasks[i].index) //fmt.Sprintf("%020d", tasks[i].index) + s.state.downloadedBlocks[k] = make([]byte, len(blockBytes)) + copy(s.state.downloadedBlocks[k], blockBytes[:]) + } + + return failedTasks, nil +} + +func (b *StageBodies) saveProgress(s *StageState, progress uint64, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, progress); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage failed") + return ErrSavingBodiesProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageBodies) loadBlockHashesToTaskQueue(s *StageState, startIndex uint64, size uint64, tx kv.RwTx) error { + s.state.stateSyncTaskQueue = queue.New(0) + if errV := CreateView(b.configs.ctx, b.configs.db, tx, func(etx kv.Tx) error { + + for i := startIndex; i < startIndex+size; i++ { + key := strconv.FormatUint(i, 10) + id := int(i - startIndex) + bucketName := GetBucketName(BlockHashesBucket, s.state.isBeacon) + blockHash, err := etx.GetOne(bucketName, []byte(key)) + if err != nil { + return err + } + if blockHash == nil || len(blockHash) == 0 { + break + } + if err := s.state.stateSyncTaskQueue.Put(SyncBlockTask{index: id, blockHash: blockHash}); err != nil { + s.state.stateSyncTaskQueue = queue.New(0) + utils.Logger().Error(). + Err(err). + Int("taskIndex", id). + Str("taskBlock", hex.EncodeToString(blockHash)). + Msg("[STAGED_SYNC] loadBlockHashesToTaskQueue: cannot add task") + break + } + } + return nil + + }); errV != nil { + return errV + } + + if s.state.stateSyncTaskQueue.Len() != int64(size) { + return ErrAddTaskFailed + } + return nil +} + +func (b *StageBodies) loadExtraBlockHashesToTaskQueue(s *StageState, startIndex uint64, size uint64) error { + + s.state.stateSyncTaskQueue = queue.New(0) + + for i := startIndex; i < startIndex+size; i++ { + id := int(i - startIndex) + blockHash := s.state.syncStatus.currentCycle.ExtraHashes[i] + if len(blockHash[:]) == 0 { + break + } + if err := s.state.stateSyncTaskQueue.Put(SyncBlockTask{index: id, blockHash: blockHash}); err != nil { + s.state.stateSyncTaskQueue = queue.New(0) + utils.Logger().Warn(). + Err(err). + Int("taskIndex", id). + Str("taskBlock", hex.EncodeToString(blockHash)). + Msg("[STAGED_SYNC] loadBlockHashesToTaskQueue: cannot add task") + break + } + } + + if s.state.stateSyncTaskQueue.Len() != int64(size) { + return ErrAddTasksToQueueFail + } + return nil +} + +func (b *StageBodies) saveDownloadedBlocks(s *StageState, progress uint64, tx kv.RwTx) (p uint64, err error) { + p = progress + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(context.Background()) + if err != nil { + return p, err + } + defer tx.Rollback() + } + + downloadedBlocks := s.state.GetDownloadedBlocks() + + for i := uint64(0); i < uint64(len(downloadedBlocks)); i++ { + blockBytes := downloadedBlocks[i] + n := progress + i + 1 + blkNumber := marshalData(n) + bucketName := GetBucketName(DownloadedBlocksBucket, s.state.isBeacon) + if err := tx.Put(bucketName, blkNumber, blockBytes); err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block height", n). + Msg("[STAGED_SYNC] adding block to db failed") + return p, err + } + p++ + } + // check if all block hashes are added to db break the loop + if p-progress != uint64(len(downloadedBlocks)) { + return progress, ErrSaveBlocksFail + } + // save progress + if err = s.Update(tx, p); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage failed") + return progress, ErrSavingBodiesProgressFail + } + // if it's using its own transaction, commit transaction to db to cache all downloaded blocks + if useInternalTx { + if err := tx.Commit(); err != nil { + return progress, err + } + } + // it cached blocks successfully, so, it returns the cache progress + return p, nil +} + +func (b *StageBodies) cacheBlocks(s *StageState, progress uint64) (p uint64, err error) { + p = progress + + tx, err := b.configs.cachedb.BeginRw(context.Background()) + if err != nil { + return p, err + } + defer tx.Rollback() + + downloadedBlocks := s.state.GetDownloadedBlocks() + + for i := uint64(0); i < uint64(len(downloadedBlocks)); i++ { + blockBytes := downloadedBlocks[i] + n := progress + i + 1 + blkNumber := marshalData(n) // fmt.Sprintf("%020d", p+1) + if err := tx.Put(DownloadedBlocksBucket, blkNumber, blockBytes); err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block height", p). + Msg("[STAGED_SYNC] caching block failed") + return p, err + } + p++ + } + // check if all block hashes are added to db break the loop + if p-progress != uint64(len(downloadedBlocks)) { + return p, ErrCachingBlocksFail + } + + // save progress + if err = tx.Put(StageProgressBucket, []byte(LastBlockHeight), marshalData(p)); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving cache progress for blocks stage failed") + return p, ErrSavingCachedBodiesProgressFail + } + + if err := tx.Commit(); err != nil { + return p, err + } + + return p, nil +} + +// clearCache removes block hashes from cache db +func (b *StageBodies) clearCache() error { + tx, err := b.configs.cachedb.BeginRw(context.Background()) + if err != nil { + return nil + } + defer tx.Rollback() + + if err := tx.ClearBucket(DownloadedBlocksBucket); err != nil { + return nil + } + + if err := tx.Commit(); err != nil { + return err + } + + return nil +} + +// load blocks from cache db to main sync db and update the progress +func (b *StageBodies) loadBlocksFromCache(s *StageState, startHeight uint64, tx kv.RwTx) (p uint64, err error) { + + p = startHeight + + useInternalTx := tx == nil + if useInternalTx { + tx, err = b.configs.db.BeginRw(b.configs.ctx) + if err != nil { + return p, err + } + defer tx.Rollback() + } + + defer func() { + // Clear cache db + b.configs.cachedb.Update(context.Background(), func(etx kv.RwTx) error { + if err := etx.ClearBucket(DownloadedBlocksBucket); err != nil { + return err + } + return nil + }) + }() + + errV := b.configs.cachedb.View(context.Background(), func(rtx kv.Tx) error { + lastCachedHeightBytes, err := rtx.GetOne(StageProgressBucket, []byte(LastBlockHeight)) + if err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] retrieving cache progress for blocks stage failed") + return ErrRetrievingCachedBodiesProgressFail + } + lastHeight, err := unmarshalData(lastCachedHeightBytes) + if err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] retrieving cache progress for blocks stage failed") + return ErrRetrievingCachedBodiesProgressFail + } + + if startHeight >= lastHeight { + return nil + } + + // load block hashes from cache db snd copy them to main sync db + for ok := true; ok; ok = p < lastHeight { + key := marshalData(p + 1) + blkBytes, err := rtx.GetOne(DownloadedBlocksBucket, []byte(key)) + if err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block height", p+1). + Msg("[STAGED_SYNC] retrieve block from cache failed") + return err + } + if len(blkBytes[:]) <= 1 { + break + } + bucketName := GetBucketName(DownloadedBlocksBucket, s.state.isBeacon) + if err = tx.Put(bucketName, []byte(key), blkBytes); err != nil { + return err + } + p++ + } + return nil + }) + if errV != nil { + return startHeight, errV + } + + // save progress + if err = s.Update(tx, p); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving retrieved cached progress for blocks stage failed") + return startHeight, ErrSavingCachedBodiesProgressFail + } + + // update the progress + if useInternalTx { + if err := tx.Commit(); err != nil { + return startHeight, err + } + } + + return p, nil +} + +func (b *StageBodies) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = b.configs.db.BeginRw(b.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // terminate background process in turbo mode + if b.configs.bgProcRunning { + b.configs.bgProcRunning = false + b.configs.turboModeCh <- struct{}{} + close(b.configs.turboModeCh) + } + + // clean block hashes db + blocksBucketName := GetBucketName(DownloadedBlocksBucket, b.configs.isBeacon) + if err = tx.ClearBucket(blocksBucketName); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] clear blocks bucket after revert failed") + return err + } + + // clean cache db as well + if err := b.clearCache(); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] clear blocks cache failed") + return err + } + + // save progress + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if err = s.Update(tx, currentHead); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage after revert failed") + return err + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageBodies) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = b.configs.db.BeginRw(b.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // terminate background process in turbo mode + if b.configs.bgProcRunning { + b.configs.bgProcRunning = false + b.configs.turboModeCh <- struct{}{} + close(b.configs.turboModeCh) + } + blocksBucketName := GetBucketName(DownloadedBlocksBucket, b.configs.isBeacon) + tx.ClearBucket(blocksBucketName) + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stage_finish.go b/api/service/stagedsync/stage_finish.go new file mode 100644 index 000000000..68396d3ab --- /dev/null +++ b/api/service/stagedsync/stage_finish.go @@ -0,0 +1,114 @@ +package stagedsync + +import ( + "context" + + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StageFinish struct { + configs StageFinishCfg +} + +type StageFinishCfg struct { + ctx context.Context + db kv.RwDB +} + +func NewStageFinish(cfg StageFinishCfg) *StageFinish { + return &StageFinish{ + configs: cfg, + } +} + +func NewStageFinishCfg(ctx context.Context, db kv.RwDB) StageFinishCfg { + return StageFinishCfg{ + ctx: ctx, + db: db, + } +} + +func (finish *StageFinish) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = finish.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // TODO: prepare indices (useful for RPC) and finalize + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (bh *StageFinish) clearBucket(tx kv.RwTx, isBeacon bool) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = bh.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + bucketName := GetBucketName(BlockHashesBucket, isBeacon) + if err := tx.ClearBucket(bucketName); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (finish *StageFinish) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = finish.configs.db.BeginRw(finish.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (finish *StageFinish) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = finish.configs.db.BeginRw(finish.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stage_heads.go b/api/service/stagedsync/stage_heads.go new file mode 100644 index 000000000..6dcde6246 --- /dev/null +++ b/api/service/stagedsync/stage_heads.go @@ -0,0 +1,146 @@ +package stagedsync + +import ( + "context" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StageHeads struct { + configs StageHeadsCfg +} + +type StageHeadsCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB +} + +func NewStageHeads(cfg StageHeadsCfg) *StageHeads { + return &StageHeads{ + configs: cfg, + } +} + +func NewStageHeadersCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageHeadsCfg { + return StageHeadsCfg{ + ctx: ctx, + bc: bc, + db: db, + } +} + +func (heads *StageHeads) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + + if len(s.state.syncConfig.peers) < NumPeersLowBound { + return ErrNotEnoughConnectedPeers + } + + // no need to update target if we are redoing the stages because of bad block + if invalidBlockRevert { + return nil + } + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = heads.configs.db.BeginRw(heads.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + maxPeersHeight := s.state.syncStatus.MaxPeersHeight + maxBlocksPerSyncCycle := s.state.MaxBlocksPerSyncCycle + currentHeight := heads.configs.bc.CurrentBlock().NumberU64() + s.state.syncStatus.currentCycle.TargetHeight = maxPeersHeight + targetHeight := uint64(0) + if errV := CreateView(heads.configs.ctx, heads.configs.db, tx, func(etx kv.Tx) (err error) { + if targetHeight, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + + // if current height is ahead of target height, we need recalculate target height + if targetHeight <= currentHeight { + if maxPeersHeight <= currentHeight { + return nil + } + utils.Logger().Info(). + Uint64("max blocks per sync cycle", maxBlocksPerSyncCycle). + Uint64("maxPeersHeight", maxPeersHeight). + Msgf("[STAGED_SYNC] current height is ahead of target height, target height is readjusted to max peers height") + targetHeight = maxPeersHeight + } + + if targetHeight > maxPeersHeight { + targetHeight = maxPeersHeight + } + + if maxBlocksPerSyncCycle > 0 && targetHeight-currentHeight > maxBlocksPerSyncCycle { + targetHeight = currentHeight + maxBlocksPerSyncCycle + } + + s.state.syncStatus.currentCycle.TargetHeight = targetHeight + + if err := s.Update(tx, targetHeight); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for headers stage failed") + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (heads *StageHeads) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = heads.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (heads *StageHeads) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = heads.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stage_lastmile.go b/api/service/stagedsync/stage_lastmile.go new file mode 100644 index 000000000..df6079bd0 --- /dev/null +++ b/api/service/stagedsync/stage_lastmile.go @@ -0,0 +1,121 @@ +package stagedsync + +import ( + "context" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StageLastMile struct { + configs StageLastMileCfg +} + +type StageLastMileCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB +} + +func NewStageLastMile(cfg StageLastMileCfg) *StageLastMile { + return &StageLastMile{ + configs: cfg, + } +} + +func NewStageLastMileCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageLastMileCfg { + return StageLastMileCfg{ + ctx: ctx, + bc: bc, + db: db, + } +} + +func (lm *StageLastMile) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + maxPeersHeight := s.state.syncStatus.MaxPeersHeight + targetHeight := s.state.syncStatus.currentCycle.TargetHeight + isLastCycle := targetHeight >= maxPeersHeight + if !isLastCycle { + return nil + } + + bc := lm.configs.bc + // update blocks after node start sync + parentHash := bc.CurrentBlock().Hash() + for { + block := s.state.getMaxConsensusBlockFromParentHash(parentHash) + if block == nil { + break + } + err = s.state.UpdateBlockAndStatus(block, bc, true) + if err != nil { + break + } + parentHash = block.Hash() + } + // TODO ek – Do we need to hold syncMux now that syncConfig has its own mutex? + s.state.syncMux.Lock() + s.state.syncConfig.ForEachPeer(func(peer *SyncPeerConfig) (brk bool) { + peer.newBlocks = []*types.Block{} + return + }) + s.state.syncMux.Unlock() + + // update last mile blocks if any + parentHash = bc.CurrentBlock().Hash() + for { + block := s.state.getBlockFromLastMileBlocksByParentHash(parentHash) + if block == nil { + break + } + err = s.state.UpdateBlockAndStatus(block, bc, false) + if err != nil { + break + } + parentHash = block.Hash() + } + + return nil +} + +func (lm *StageLastMile) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = lm.configs.db.BeginRw(lm.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (lm *StageLastMile) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = lm.configs.db.BeginRw(lm.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stage_state.go b/api/service/stagedsync/stage_state.go new file mode 100644 index 000000000..70e64516b --- /dev/null +++ b/api/service/stagedsync/stage_state.go @@ -0,0 +1,330 @@ +package stagedsync + +import ( + "context" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/chain" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" +) + +type StageStates struct { + configs StageStatesCfg +} +type StageStatesCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB + logProgress bool +} + +func NewStageStates(cfg StageStatesCfg) *StageStates { + return &StageStates{ + configs: cfg, + } +} + +func NewStageStatesCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB, logProgress bool) StageStatesCfg { + return StageStatesCfg{ + ctx: ctx, + bc: bc, + db: db, + logProgress: logProgress, + } +} + +func getBlockHashByHeight(h uint64, isBeacon bool, tx kv.RwTx) common.Hash { + var invalidBlockHash common.Hash + hashesBucketName := GetBucketName(BlockHashesBucket, isBeacon) + blockHeight := marshalData(h) + if invalidBlockHashBytes, err := tx.GetOne(hashesBucketName, blockHeight); err == nil { + invalidBlockHash.SetBytes(invalidBlockHashBytes) + } + return invalidBlockHash +} + +// Exec progresses States stage in the forward direction +func (stg *StageStates) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + maxPeersHeight := s.state.syncStatus.MaxPeersHeight + currentHead := stg.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxPeersHeight { + return nil + } + currProgress := stg.configs.bc.CurrentBlock().NumberU64() + targetHeight := s.state.syncStatus.currentCycle.TargetHeight + if currProgress >= targetHeight { + return nil + } + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + blocksBucketName := GetBucketName(DownloadedBlocksBucket, s.state.isBeacon) + isLastCycle := targetHeight >= maxPeersHeight + verifyAllSig := s.state.VerifyAllSig || isLastCycle //if it's last cycle, we have to check all signatures + startTime := time.Now() + startBlock := currProgress + var newBlocks types.Blocks + nBlock := int(0) + + if stg.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + for i := currProgress + 1; i <= targetHeight; i++ { + key := marshalData(i) + blockBytes, err := tx.GetOne(blocksBucketName, key) + if err != nil { + return err + } + + // if block size is invalid, we have to break the updating state loop + // we don't need to do rollback, because the latest batch haven't added to chain yet + sz := len(blockBytes) + if sz <= 1 { + utils.Logger().Error(). + Uint64("block number", i). + Msg("block size invalid") + invalidBlockHash := getBlockHashByHeight(i, s.state.isBeacon, tx) + s.state.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), invalidBlockHash) + return ErrInvalidBlockBytes + } + + block, err := RlpDecodeBlockOrBlockWithSig(blockBytes) + if err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block number", i). + Msg("block RLP decode failed") + invalidBlockHash := getBlockHashByHeight(i, s.state.isBeacon, tx) + s.state.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), invalidBlockHash) + return err + } + + /* + // TODO: use hash as key and here check key (which is hash) against block.header.hash + gotHash := block.Hash() + if !bytes.Equal(gotHash[:], tasks[i].blockHash) { + utils.Logger().Warn(). + Err(errors.New("wrong block delivery")). + Str("expectHash", hex.EncodeToString(tasks[i].blockHash)). + Str("gotHash", hex.EncodeToString(gotHash[:])) + continue + } + */ + if block.NumberU64() != i { + invalidBlockHash := getBlockHashByHeight(i, s.state.isBeacon, tx) + s.state.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), invalidBlockHash) + return ErrInvalidBlockNumber + } + if block.NumberU64() <= currProgress { + continue + } + + // Verify block signatures + if block.NumberU64() > 1 { + // Verify signature every N blocks (which N is verifyHeaderBatchSize and can be adjusted in configs) + haveCurrentSig := len(block.GetCurrentCommitSig()) != 0 + verifySeal := block.NumberU64()%s.state.VerifyHeaderBatchSize == 0 || verifyAllSig + verifyCurrentSig := verifyAllSig && haveCurrentSig + bc := stg.configs.bc + if err = stg.verifyBlockSignatures(bc, block, verifyCurrentSig, verifySeal, verifyAllSig); err != nil { + invalidBlockHash := getBlockHashByHeight(i, s.state.isBeacon, tx) + s.state.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), invalidBlockHash) + return err + } + + /* + //TODO: we are handling the bad blocks and already blocks are verified, so do we need verify header? + err := stg.configs.bc.Engine().VerifyHeader(stg.configs.bc, block.Header(), verifySeal) + if err == engine.ErrUnknownAncestor { + return err + } else if err != nil { + utils.Logger().Error().Err(err).Msgf("[STAGED_SYNC] failed verifying signatures for new block %d", block.NumberU64()) + if !verifyAllSig { + utils.Logger().Info().Interface("block", stg.configs.bc.CurrentBlock()).Msg("[STAGED_SYNC] Rolling back last 99 blocks!") + for i := uint64(0); i < s.state.VerifyHeaderBatchSize-1; i++ { + if rbErr := stg.configs.bc.Rollback([]common.Hash{stg.configs.bc.CurrentBlock().Hash()}); rbErr != nil { + utils.Logger().Err(rbErr).Msg("[STAGED_SYNC] UpdateBlockAndStatus: failed to rollback") + return err + } + } + currProgress = stg.configs.bc.CurrentBlock().NumberU64() + } + return err + } + */ + } + + newBlocks = append(newBlocks, block) + if nBlock < s.state.InsertChainBatchSize-1 && block.NumberU64() < targetHeight { + nBlock++ + continue + } + + // insert downloaded block into chain + headBeforeNewBlocks := stg.configs.bc.CurrentBlock().NumberU64() + headHashBeforeNewBlocks := stg.configs.bc.CurrentBlock().Hash() + _, err = stg.configs.bc.InsertChain(newBlocks, false) //TODO: verifyHeaders can be done here + if err != nil { + // TODO: handle chain rollback because of bad block + utils.Logger().Error(). + Err(err). + Uint64("block number", block.NumberU64()). + Uint32("shard", block.ShardID()). + Msgf("[STAGED_SYNC] UpdateBlockAndStatus: Error adding new block to blockchain") + // rollback bc + utils.Logger().Info(). + Interface("block", stg.configs.bc.CurrentBlock()). + Msg("[STAGED_SYNC] Rolling back last added blocks!") + if rbErr := stg.configs.bc.Rollback([]common.Hash{headHashBeforeNewBlocks}); rbErr != nil { + utils.Logger().Error(). + Err(rbErr). + Msg("[STAGED_SYNC] UpdateBlockAndStatus: failed to rollback") + return err + } + s.state.RevertTo(headBeforeNewBlocks, headHashBeforeNewBlocks) + return err + } + utils.Logger().Info(). + Uint64("blockHeight", block.NumberU64()). + Uint64("blockEpoch", block.Epoch().Uint64()). + Str("blockHex", block.Hash().Hex()). + Uint32("ShardID", block.ShardID()). + Msg("[STAGED_SYNC] UpdateBlockAndStatus: New Block Added to Blockchain") + + // update cur progress + currProgress = stg.configs.bc.CurrentBlock().NumberU64() + + for i, tx := range block.StakingTransactions() { + utils.Logger().Info(). + Msgf( + "StakingTxn %d: %s, %v", i, tx.StakingType().String(), tx.StakingMessage(), + ) + } + + nBlock = 0 + newBlocks = newBlocks[:0] + // log the stage progress in console + if stg.configs.logProgress { + //calculating block speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currProgress-startBlock) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("insert blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + } + + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +//verifyBlockSignatures verifies block signatures +func (stg *StageStates) verifyBlockSignatures(bc core.BlockChain, block *types.Block, verifyCurrentSig bool, verifySeal bool, verifyAllSig bool) (err error) { + if verifyCurrentSig { + sig, bitmap, err := chain.ParseCommitSigAndBitmap(block.GetCurrentCommitSig()) + if err != nil { + return errors.Wrap(err, "parse commitSigAndBitmap") + } + + startTime := time.Now() + if err := bc.Engine().VerifyHeaderSignature(bc, block.Header(), sig, bitmap); err != nil { + return errors.Wrapf(err, "verify header signature %v", block.Hash().String()) + } + utils.Logger().Debug(). + Int64("elapsed time", time.Now().Sub(startTime).Milliseconds()). + Msg("[STAGED_SYNC] VerifyHeaderSignature") + } + return nil +} + +// saveProgress saves the stage progress +func (stg *StageStates) saveProgress(s *StageState, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, stg.configs.bc.CurrentBlock().NumberU64()); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block States stage failed") + return ErrSaveStateProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStates) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStates) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedsync/stagedsync.go b/api/service/stagedsync/stagedsync.go new file mode 100644 index 000000000..67369c0fa --- /dev/null +++ b/api/service/stagedsync/stagedsync.go @@ -0,0 +1,1316 @@ +package stagedsync + +import ( + "bytes" + "context" + "encoding/hex" + "fmt" + "math" + "sort" + "strconv" + "sync" + "time" + + "github.com/pkg/errors" + + "github.com/Workiva/go-datastructures/queue" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/harmony-one/harmony/api/service/legacysync/downloader" + "github.com/harmony-one/harmony/consensus" + "github.com/harmony-one/harmony/consensus/engine" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/chain" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/p2p" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StagedSync struct { + selfip string + selfport string + selfPeerHash [20]byte // hash of ip and address combination + commonBlocks map[int]*types.Block + downloadedBlocks map[uint64][]byte + lastMileBlocks []*types.Block // last mile blocks to catch up with the consensus + syncConfig *SyncConfig + isExplorer bool + stateSyncTaskQueue *queue.Queue + syncMux sync.Mutex + lastMileMux sync.Mutex + syncStatus syncStatus + lockBlocks sync.RWMutex + + ctx context.Context + bc core.BlockChain + isBeacon bool + db kv.RwDB + + revertPoint *uint64 // used to run stages + prevRevertPoint *uint64 // used to get value from outside of staged sync after cycle (for example to notify RPCDaemon) + invalidBlock common.Hash + + stages []*Stage + revertOrder []*Stage + pruningOrder []*Stage + currentStage uint + timings []Timing + logPrefixes []string + + // if set to true, it will double check the block hashes + // so, only blocks are sent by 2/3 of peers are considered as valid + DoubleCheckBlockHashes bool + // Maximum number of blocks per each cycle. if set to zero, all blocks will be + // downloaded and synced in one full cycle. + MaxBlocksPerSyncCycle uint64 + // maximum number of blocks which can be downloaded in background. + MaxBackgroundBlocks uint64 + // max number of blocks to use a single transaction for staged sync + MaxMemSyncCycleSize uint64 + // number of blocks to build a batch and insert to chain in staged sync + InsertChainBatchSize int + // verify signature for all blocks regardless of height or batch size + VerifyAllSig bool + // batch size to verify header before insert to chain + VerifyHeaderBatchSize uint64 + // use mem db for staged sync, set to false to use disk + UseMemDB bool + // use turbo mode for staged sync + StagedSyncTurboMode bool + // log the full sync progress in console + LogProgress bool +} + +// BlockWithSig the serialization structure for request DownloaderRequest_BLOCKWITHSIG +// The block is encoded as block + commit signature +type BlockWithSig struct { + Block *types.Block + CommitSigAndBitmap []byte +} + +type Timing struct { + isRevert bool + isCleanUp bool + stage SyncStageID + took time.Duration +} + +func (s *StagedSync) Len() int { return len(s.stages) } +func (s *StagedSync) Context() context.Context { return s.ctx } +func (s *StagedSync) IsBeacon() bool { return s.isBeacon } +func (s *StagedSync) IsExplorer() bool { return s.isExplorer } +func (s *StagedSync) Blockchain() core.BlockChain { return s.bc } +func (s *StagedSync) DB() kv.RwDB { return s.db } +func (s *StagedSync) PrevRevertPoint() *uint64 { return s.prevRevertPoint } + +func (s *StagedSync) InitDownloadedBlocksMap() error { + s.lockBlocks.Lock() + defer s.lockBlocks.Unlock() + s.downloadedBlocks = make(map[uint64][]byte) + return nil +} + +func (s *StagedSync) AddBlocks(blks map[uint64][]byte) error { + s.lockBlocks.Lock() + defer s.lockBlocks.Unlock() + for k, blkBytes := range blks { + s.downloadedBlocks[k] = make([]byte, len(blkBytes)) + copy(s.downloadedBlocks[k], blkBytes[:]) + } + return nil +} + +func (s *StagedSync) GetDownloadedBlocks() map[uint64][]byte { + d := make(map[uint64][]byte) + s.lockBlocks.RLock() + defer s.lockBlocks.RUnlock() + for k, blkBytes := range s.downloadedBlocks { + d[k] = make([]byte, len(blkBytes)) + copy(d[k], blkBytes[:]) + } + return d +} + +func (s *StagedSync) NewRevertState(id SyncStageID, revertPoint, currentProgress uint64) *RevertState { + return &RevertState{id, revertPoint, currentProgress, common.Hash{}, s} +} + +func (s *StagedSync) CleanUpStageState(id SyncStageID, forwardProgress uint64, tx kv.Tx, db kv.RwDB) (*CleanUpState, error) { + var pruneProgress uint64 + var err error + + if errV := CreateView(context.Background(), db, tx, func(tx kv.Tx) error { + pruneProgress, err = GetStageCleanUpProgress(tx, id, s.isBeacon) + if err != nil { + return err + } + return nil + }); errV != nil { + return nil, errV + } + + return &CleanUpState{id, forwardProgress, pruneProgress, s}, nil +} + +func (s *StagedSync) NextStage() { + if s == nil { + return + } + s.currentStage++ +} + +// IsBefore returns true if stage1 goes before stage2 in staged sync +func (s *StagedSync) IsBefore(stage1, stage2 SyncStageID) bool { + idx1 := -1 + idx2 := -1 + for i, stage := range s.stages { + if stage.ID == stage1 { + idx1 = i + } + + if stage.ID == stage2 { + idx2 = i + } + } + + return idx1 < idx2 +} + +// IsAfter returns true if stage1 goes after stage2 in staged sync +func (s *StagedSync) IsAfter(stage1, stage2 SyncStageID) bool { + idx1 := -1 + idx2 := -1 + for i, stage := range s.stages { + if stage.ID == stage1 { + idx1 = i + } + + if stage.ID == stage2 { + idx2 = i + } + } + + return idx1 > idx2 +} + +// RevertTo reverts the stage to a specific height +func (s *StagedSync) RevertTo(revertPoint uint64, invalidBlock common.Hash) { + utils.Logger().Info(). + Interface("invalidBlock", invalidBlock). + Uint64("revertPoint", revertPoint). + Msgf("[STAGED_SYNC] Reverting blocks") + s.revertPoint = &revertPoint + s.invalidBlock = invalidBlock +} + +func (s *StagedSync) Done() { + s.currentStage = uint(len(s.stages)) + s.revertPoint = nil +} + +func (s *StagedSync) IsDone() bool { + return s.currentStage >= uint(len(s.stages)) && s.revertPoint == nil +} + +func (s *StagedSync) LogPrefix() string { + if s == nil { + return "" + } + return s.logPrefixes[s.currentStage] +} + +func (s *StagedSync) SetCurrentStage(id SyncStageID) error { + for i, stage := range s.stages { + if stage.ID == id { + s.currentStage = uint(i) + return nil + } + } + utils.Logger().Error(). + Interface("stage id", id). + Msgf("[STAGED_SYNC] stage not found") + + return ErrStageNotFound +} + +func New(ctx context.Context, + ip string, + port string, + peerHash [20]byte, + bc core.BlockChain, + role nodeconfig.Role, + isBeacon bool, + isExplorer bool, + db kv.RwDB, + stagesList []*Stage, + revertOrder RevertOrder, + pruneOrder CleanUpOrder, + TurboMode bool, + UseMemDB bool, + doubleCheckBlockHashes bool, + maxBlocksPerCycle uint64, + maxBackgroundBlocks uint64, + maxMemSyncCycleSize uint64, + verifyAllSig bool, + verifyHeaderBatchSize uint64, + insertChainBatchSize int, + logProgress bool) *StagedSync { + + revertStages := make([]*Stage, len(stagesList)) + for i, stageIndex := range revertOrder { + for _, s := range stagesList { + if s.ID == stageIndex { + revertStages[i] = s + break + } + } + } + pruneStages := make([]*Stage, len(stagesList)) + for i, stageIndex := range pruneOrder { + for _, s := range stagesList { + if s.ID == stageIndex { + pruneStages[i] = s + break + } + } + } + logPrefixes := make([]string, len(stagesList)) + for i := range stagesList { + logPrefixes[i] = fmt.Sprintf("%d/%d %s", i+1, len(stagesList), stagesList[i].ID) + } + + return &StagedSync{ + ctx: ctx, + selfip: ip, + selfport: port, + selfPeerHash: peerHash, + bc: bc, + isBeacon: isBeacon, + isExplorer: isExplorer, + db: db, + stages: stagesList, + currentStage: 0, + revertOrder: revertStages, + pruningOrder: pruneStages, + logPrefixes: logPrefixes, + syncStatus: NewSyncStatus(role), + commonBlocks: make(map[int]*types.Block), + downloadedBlocks: make(map[uint64][]byte), + lastMileBlocks: []*types.Block{}, + syncConfig: &SyncConfig{}, + StagedSyncTurboMode: TurboMode, + UseMemDB: UseMemDB, + DoubleCheckBlockHashes: doubleCheckBlockHashes, + MaxBlocksPerSyncCycle: maxBlocksPerCycle, + MaxBackgroundBlocks: maxBackgroundBlocks, + MaxMemSyncCycleSize: maxMemSyncCycleSize, + VerifyAllSig: verifyAllSig, + VerifyHeaderBatchSize: verifyHeaderBatchSize, + InsertChainBatchSize: insertChainBatchSize, + LogProgress: logProgress, + } +} + +func (s *StagedSync) StageState(stage SyncStageID, tx kv.Tx, db kv.RwDB) (*StageState, error) { + var blockNum uint64 + var err error + if errV := CreateView(context.Background(), db, tx, func(rtx kv.Tx) error { + blockNum, err = GetStageProgress(rtx, stage, s.isBeacon) + if err != nil { + return err + } + return nil + }); errV != nil { + return nil, errV + } + + return &StageState{s, stage, blockNum}, nil +} + +func (s *StagedSync) cleanUp(fromStage int, db kv.RwDB, tx kv.RwTx, firstCycle bool) error { + found := false + for i := 0; i < len(s.pruningOrder); i++ { + if s.pruningOrder[i].ID == s.stages[fromStage].ID { + found = true + } + if !found || s.pruningOrder[i] == nil || s.pruningOrder[i].Disabled { + continue + } + if err := s.pruneStage(firstCycle, s.pruningOrder[i], db, tx); err != nil { + panic(err) + } + } + return nil +} + +func (s *StagedSync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { + s.prevRevertPoint = nil + s.timings = s.timings[:0] + + for !s.IsDone() { + var invalidBlockRevert bool + if s.revertPoint != nil { + for j := 0; j < len(s.revertOrder); j++ { + if s.revertOrder[j] == nil || s.revertOrder[j].Disabled { + continue + } + if err := s.revertStage(firstCycle, s.revertOrder[j], db, tx); err != nil { + return err + } + } + s.prevRevertPoint = s.revertPoint + s.revertPoint = nil + if s.invalidBlock != (common.Hash{}) { + invalidBlockRevert = true + } + s.invalidBlock = common.Hash{} + if err := s.SetCurrentStage(s.stages[0].ID); err != nil { + return err + } + firstCycle = false + } + + stage := s.stages[s.currentStage] + + if stage.Disabled { + utils.Logger().Trace(). + Msgf("[STAGED_SYNC] %s disabled. %s", stage.ID, stage.DisabledDescription) + + s.NextStage() + continue + } + + if err := s.runStage(stage, db, tx, firstCycle, invalidBlockRevert); err != nil { + return err + } + + s.NextStage() + } + + if err := s.cleanUp(0, db, tx, firstCycle); err != nil { + return err + } + if err := s.SetCurrentStage(s.stages[0].ID); err != nil { + return err + } + if err := printLogs(tx, s.timings); err != nil { + return err + } + s.currentStage = 0 + return nil +} + +func CreateView(ctx context.Context, db kv.RwDB, tx kv.Tx, f func(tx kv.Tx) error) error { + if tx != nil { + return f(tx) + } + return db.View(ctx, func(etx kv.Tx) error { + return f(etx) + }) +} + +func ByteCount(b uint64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%dB", b) + } + div, exp := uint64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f%cB", + float64(b)/float64(div), "KMGTPE"[exp]) +} + +func printLogs(tx kv.RwTx, timings []Timing) error { + var logCtx []interface{} + count := 0 + for i := range timings { + if timings[i].took < 50*time.Millisecond { + continue + } + count++ + if count == 50 { + break + } + if timings[i].isRevert { + logCtx = append(logCtx, "Revert "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } else if timings[i].isCleanUp { + logCtx = append(logCtx, "CleanUp "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } else { + logCtx = append(logCtx, string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } + } + if len(logCtx) > 0 { + utils.Logger().Info(). + Msgf("[STAGED_SYNC] Timings (slower than 50ms) %v", logCtx...) + } + + if tx == nil { + return nil + } + + if len(logCtx) > 0 { // also don't print this logs if everything is fast + buckets := Buckets + bucketSizes := make([]interface{}, 0, 2*len(buckets)) + for _, bucket := range buckets { + sz, err1 := tx.BucketSize(bucket) + if err1 != nil { + return err1 + } + bucketSizes = append(bucketSizes, bucket, ByteCount(sz)) + } + utils.Logger().Info(). + Msgf("[STAGED_SYNC] Tables %v", bucketSizes...) + } + tx.CollectMetrics() + return nil +} + +func (s *StagedSync) runStage(stage *Stage, db kv.RwDB, tx kv.RwTx, firstCycle bool, invalidBlockRevert bool) (err error) { + start := time.Now() + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + if err = stage.Handler.Exec(firstCycle, invalidBlockRevert, stageState, s, tx); err != nil { + utils.Logger().Error(). + Err(err). + Interface("stage id", stage.ID). + Msgf("[STAGED_SYNC] stage failed") + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + utils.Logger().Info(). + Msgf("[STAGED_SYNC] stage %s executed successfully", stage.ID) + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Info(). + Msgf("[STAGED_SYNC] [%s] DONE in %d", logPrefix, took) + + } + s.timings = append(s.timings, Timing{stage: stage.ID, took: took}) + return nil +} + +func (s *StagedSync) revertStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error { + start := time.Now() + utils.Logger().Trace(). + Msgf("[STAGED_SYNC] Revert... stage %s", stage.ID) + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + revert := s.NewRevertState(stage.ID, *s.revertPoint, stageState.BlockNumber) + revert.InvalidBlock = s.invalidBlock + + if stageState.BlockNumber <= revert.RevertPoint { + return nil + } + + if err = s.SetCurrentStage(stage.ID); err != nil { + return err + } + + err = stage.Handler.Revert(firstCycle, revert, stageState, tx) + if err != nil { + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Info(). + Msgf("[STAGED_SYNC] [%s] Revert done in %d", logPrefix, took) + } + s.timings = append(s.timings, Timing{isRevert: true, stage: stage.ID, took: took}) + return nil +} + +func (s *StagedSync) pruneStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error { + start := time.Now() + utils.Logger().Info(). + Msgf("[STAGED_SYNC] CleanUp... stage %s", stage.ID) + + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + prune, err := s.CleanUpStageState(stage.ID, stageState.BlockNumber, tx, db) + if err != nil { + return err + } + if err = s.SetCurrentStage(stage.ID); err != nil { + return err + } + + err = stage.Handler.CleanUp(firstCycle, prune, tx) + if err != nil { + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Trace(). + Msgf("[STAGED_SYNC] [%s] CleanUp done in %d", logPrefix, took) + + utils.Logger().Info(). + Msgf("[STAGED_SYNC] [%s] CleanUp done in %d", logPrefix, took) + } + s.timings = append(s.timings, Timing{isCleanUp: true, stage: stage.ID, took: took}) + return nil +} + +// DisableAllStages - including their reverts +func (s *StagedSync) DisableAllStages() []SyncStageID { + var backupEnabledIds []SyncStageID + for i := range s.stages { + if !s.stages[i].Disabled { + backupEnabledIds = append(backupEnabledIds, s.stages[i].ID) + } + } + for i := range s.stages { + s.stages[i].Disabled = true + } + return backupEnabledIds +} + +func (s *StagedSync) DisableStages(ids ...SyncStageID) { + for i := range s.stages { + for _, id := range ids { + if s.stages[i].ID != id { + continue + } + s.stages[i].Disabled = true + } + } +} + +func (s *StagedSync) EnableStages(ids ...SyncStageID) { + for i := range s.stages { + for _, id := range ids { + if s.stages[i].ID != id { + continue + } + s.stages[i].Disabled = false + } + } +} + +func (ss *StagedSync) purgeAllBlocksFromCache() { + ss.lastMileMux.Lock() + ss.lastMileBlocks = nil + ss.lastMileMux.Unlock() + + ss.syncMux.Lock() + defer ss.syncMux.Unlock() + ss.commonBlocks = make(map[int]*types.Block) + + ss.syncConfig.ForEachPeer(func(configPeer *SyncPeerConfig) (brk bool) { + configPeer.blockHashes = nil + configPeer.newBlocks = nil + return + }) +} + +func (ss *StagedSync) purgeOldBlocksFromCache() { + ss.syncMux.Lock() + defer ss.syncMux.Unlock() + ss.commonBlocks = make(map[int]*types.Block) + ss.syncConfig.ForEachPeer(func(configPeer *SyncPeerConfig) (brk bool) { + configPeer.blockHashes = nil + return + }) +} + +// AddLastMileBlock adds the latest a few block into queue for syncing +// only keep the latest blocks with size capped by LastMileBlocksSize +func (ss *StagedSync) AddLastMileBlock(block *types.Block) { + ss.lastMileMux.Lock() + defer ss.lastMileMux.Unlock() + if ss.lastMileBlocks != nil { + if len(ss.lastMileBlocks) >= LastMileBlocksSize { + ss.lastMileBlocks = ss.lastMileBlocks[1:] + } + ss.lastMileBlocks = append(ss.lastMileBlocks, block) + } +} + +// AddNewBlock will add newly received block into state syncing queue +func (ss *StagedSync) AddNewBlock(peerHash []byte, block *types.Block) { + pc := ss.syncConfig.FindPeerByHash(peerHash) + if pc == nil { + // Received a block with no active peer; just ignore. + return + } + // TODO ek – we shouldn't mess with SyncPeerConfig's mutex. + // Factor this into a method, like pc.AddNewBlock(block) + pc.mux.Lock() + defer pc.mux.Unlock() + pc.newBlocks = append(pc.newBlocks, block) + utils.Logger().Debug(). + Int("total", len(pc.newBlocks)). + Uint64("blockHeight", block.NumberU64()). + Msg("[STAGED_SYNC] new block received") +} + +// CreateSyncConfig creates SyncConfig for StateSync object. +func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32) error { + // sanity check to ensure no duplicate peers + if err := checkPeersDuplicity(peers); err != nil { + return err + } + + // limit the number of dns peers to connect + randSeed := time.Now().UnixNano() + targetSize := ss.syncConfig.SelectRandomPeers(peers, randSeed) + + if len(peers) == 0 || targetSize == 0 { + return errors.New("[STAGED_SYNC] no peers to connect to") + } + + utils.Logger().Debug(). + Int("peers count", len(peers)). + Int("target size", targetSize). + Msg("[STAGED_SYNC] CreateSyncConfig: len of peers") + + if ss.syncConfig != nil { + ss.syncConfig.CloseConnections() + } + ss.syncConfig = &SyncConfig{} + + var connectedPeers int + for _, peer := range peers { + client := downloader.ClientSetup(peer.IP, peer.Port, true) + if client == nil { + continue + } + peerConfig := &SyncPeerConfig{ + ip: peer.IP, + port: peer.Port, + client: client, + } + ss.syncConfig.AddPeer(peerConfig) + connectedPeers++ + if connectedPeers >= targetSize+NumPeersReserved { + break + } + } + + if connectedPeers == 0 { + return errors.New("[STAGED_SYNC] CreateSyncConfig: no ready peers to connect") + } + + // select reserved peers + if connectedPeers > targetSize { + ss.syncConfig.reservedPeers = make([]*SyncPeerConfig, connectedPeers-targetSize) + copy(ss.syncConfig.reservedPeers, ss.syncConfig.peers[targetSize:]) + } + // select main peers + ss.syncConfig.peers = ss.syncConfig.peers[:targetSize] + + utils.Logger().Info(). + Int("len", len(ss.syncConfig.peers)). + Msg("[STAGED_SYNC] Finished making connection to peers") + + return nil +} + +// checkPeersDuplicity checks whether there are duplicates in p2p.Peer +func checkPeersDuplicity(ps []p2p.Peer) error { + type peerDupID struct { + ip string + port string + } + m := make(map[peerDupID]struct{}) + for _, p := range ps { + dip := peerDupID{p.IP, p.Port} + if _, ok := m[dip]; ok { + return fmt.Errorf("duplicate peer [%v:%v]", p.IP, p.Port) + } + m[dip] = struct{}{} + } + return nil +} + +// GetActivePeerNumber returns the number of active peers +func (ss *StagedSync) GetActivePeerNumber() int { + if ss.syncConfig == nil { + return 0 + } + // len() is atomic; no need to hold mutex. + return len(ss.syncConfig.peers) +} + +// getConsensusHashes gets all hashes needed to download. +func (ss *StagedSync) getConsensusHashes(startHash []byte, size uint32, bgMode bool) error { + var bgModeError error + + var wg sync.WaitGroup + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + wg.Add(1) + go func() { + defer wg.Done() + if !peerConfig.client.IsReady() { + // try to connect + if ready := peerConfig.client.WaitForConnection(1000 * time.Millisecond); !ready { + // replace it with reserved peer (in bg mode don't replace because maybe other stages still are using this node) + if bgMode { + bgModeError = ErrSomeNodesNotReady + brk = true //finish whole peers loop + } else { + if !peerConfig.client.IsConnecting() { + ss.syncConfig.ReplacePeerWithReserved(peerConfig, "not ready to download consensus hashes") + } + } + return + } + } + response := peerConfig.client.GetBlockHashes(startHash, size, ss.selfip, ss.selfport) + if response == nil { + utils.Logger().Warn(). + Str("peerIP", peerConfig.ip). + Str("peerPort", peerConfig.port). + Msg("[STAGED_SYNC] getConsensusHashes Nil Response, will be replaced with reserved node (if any)") + // replace it with reserved peer (in bg mode don't replace because maybe other stages still are using this node) + if bgMode { + bgModeError = ErrSomeNodesBlockHashFail + brk = true //finish whole peers loop + } else { + isBrokenPeer := peerConfig.AddFailedTime(downloadBlocksRetryLimit) + if isBrokenPeer { + ss.syncConfig.ReplacePeerWithReserved(peerConfig, "receiving nil response for block hashes") + } + } + return + } + utils.Logger().Info(). + Uint32("queried blockHash size", size). + Int("got blockHashSize", len(response.Payload)). + Str("PeerIP", peerConfig.ip). + Bool("background Mode", bgMode). + Msg("[STAGED_SYNC] GetBlockHashes") + + if len(response.Payload) > int(size+1) { + utils.Logger().Warn(). + Uint32("requestSize", size). + Int("respondSize", len(response.Payload)). + Msg("[STAGED_SYNC] getConsensusHashes: receive more blockHashes than requested!") + peerConfig.blockHashes = response.Payload[:size+1] + } else { + peerConfig.blockHashes = response.Payload + } + + }() + return + }) + wg.Wait() + + return bgModeError +} + +// getInvalidPeersByBlockHashes analyzes block hashes and detects invalid peers +func (ss *StagedSync) getInvalidPeersByBlockHashes(tx kv.RwTx) (map[string]bool, int, error) { + invalidPeers := make(map[string]bool) + if len(ss.syncConfig.peers) < 3 { + lb := len(ss.syncConfig.peers[0].blockHashes) + return invalidPeers, lb, nil + } + + // confirmations threshold to consider as valid block hash + th := 2 * int(len(ss.syncConfig.peers)/3) + if len(ss.syncConfig.peers) == 4 { + th = 3 + } + + type BlockHashMap struct { + peers map[string]bool + isValid bool + } + + // populate the block hashes map + bhm := make(map[string]*BlockHashMap) + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + for _, blkHash := range peerConfig.blockHashes { + k := string(blkHash) + if _, ok := bhm[k]; !ok { + bhm[k] = &BlockHashMap{ + peers: make(map[string]bool), + } + } + peerHash := string(peerConfig.peerHash) + bhm[k].peers[peerHash] = true + bhm[k].isValid = true + } + return + }) + + var validBlockHashes int + + for blkHash, hmap := range bhm { + + // if block is not confirmed by th% of peers, it is considered as invalid block + // So, any peer with that block hash will be considered as invalid peer + if len(hmap.peers) < th { + bhm[blkHash].isValid = false + for _, p := range ss.syncConfig.peers { + hasBlockHash := hmap.peers[string(p.peerHash)] + if hasBlockHash { + invalidPeers[string(p.peerHash)] = true + } + } + continue + } + + // so, block hash is valid, because have been sent by more than th number of peers + validBlockHashes++ + + // if all peers already sent this block hash, then it is considered as valid + if len(hmap.peers) == len(ss.syncConfig.peers) { + continue + } + + //consider invalid peer if it hasn't sent this block hash + for _, p := range ss.syncConfig.peers { + hasBlockHash := hmap.peers[string(p.peerHash)] + if !hasBlockHash { + invalidPeers[string(p.peerHash)] = true + } + } + + } + fmt.Printf("%d out of %d peers have missed blocks or sent invalid blocks\n", len(invalidPeers), len(ss.syncConfig.peers)) + return invalidPeers, validBlockHashes, nil +} + +func (ss *StagedSync) generateStateSyncTaskQueue(bc core.BlockChain, tx kv.RwTx) error { + ss.stateSyncTaskQueue = queue.New(0) + allTasksAddedToQueue := false + ss.syncConfig.ForEachPeer(func(configPeer *SyncPeerConfig) (brk bool) { + for id, blockHash := range configPeer.blockHashes { + if err := ss.stateSyncTaskQueue.Put(SyncBlockTask{index: id, blockHash: blockHash}); err != nil { + ss.stateSyncTaskQueue = queue.New(0) + utils.Logger().Error(). + Err(err). + Int("taskIndex", id). + Str("taskBlock", hex.EncodeToString(blockHash)). + Msg("[STAGED_SYNC] generateStateSyncTaskQueue: cannot add task") + break + } + } + // check if all block hashes added to task queue + if ss.stateSyncTaskQueue.Len() == int64(len(configPeer.blockHashes)) { + allTasksAddedToQueue = true + brk = true + } + return + }) + + if !allTasksAddedToQueue { + return ErrAddTaskFailed + } + utils.Logger().Info(). + Int64("length", ss.stateSyncTaskQueue.Len()). + Msg("[STAGED_SYNC] generateStateSyncTaskQueue: finished") + return nil +} + +// RlpDecodeBlockOrBlockWithSig decodes payload to types.Block or BlockWithSig. +// Return the block with commitSig if set. +func RlpDecodeBlockOrBlockWithSig(payload []byte) (*types.Block, error) { + var block *types.Block + if err := rlp.DecodeBytes(payload, &block); err == nil { + // received payload as *types.Block + return block, nil + } + + var bws BlockWithSig + if err := rlp.DecodeBytes(payload, &bws); err == nil { + block := bws.Block + block.SetCurrentCommitSig(bws.CommitSigAndBitmap) + return block, nil + } + return nil, errors.New("failed to decode to either types.Block or BlockWithSig") +} + +// CompareBlockByHash compares two block by hash, it will be used in sort the blocks +func CompareBlockByHash(a *types.Block, b *types.Block) int { + ha := a.Hash() + hb := b.Hash() + return bytes.Compare(ha[:], hb[:]) +} + +// GetHowManyMaxConsensus will get the most common blocks and the first such blockID +func GetHowManyMaxConsensus(blocks []*types.Block) (int, int) { + // As all peers are sorted by their blockHashes, all equal blockHashes should come together and consecutively. + curCount := 0 + curFirstID := -1 + maxCount := 0 + maxFirstID := -1 + for i := range blocks { + if curFirstID == -1 || CompareBlockByHash(blocks[curFirstID], blocks[i]) != 0 { + curCount = 1 + curFirstID = i + } else { + curCount++ + } + if curCount > maxCount { + maxCount = curCount + maxFirstID = curFirstID + } + } + return maxFirstID, maxCount +} + +func (ss *StagedSync) getMaxConsensusBlockFromParentHash(parentHash common.Hash) *types.Block { + var candidateBlocks []*types.Block + + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + peerConfig.mux.Lock() + defer peerConfig.mux.Unlock() + + for _, block := range peerConfig.newBlocks { + ph := block.ParentHash() + if bytes.Equal(ph[:], parentHash[:]) { + candidateBlocks = append(candidateBlocks, block) + break + } + } + return + }) + if len(candidateBlocks) == 0 { + return nil + } + // Sort by blockHashes. + sort.Slice(candidateBlocks, func(i, j int) bool { + return CompareBlockByHash(candidateBlocks[i], candidateBlocks[j]) == -1 + }) + maxFirstID, maxCount := GetHowManyMaxConsensus(candidateBlocks) + hash := candidateBlocks[maxFirstID].Hash() + utils.Logger().Debug(). + Hex("parentHash", parentHash[:]). + Hex("hash", hash[:]). + Int("maxCount", maxCount). + Msg("[STAGED_SYNC] Find block with matching parent hash") + return candidateBlocks[maxFirstID] +} + +func (ss *StagedSync) getBlockFromOldBlocksByParentHash(parentHash common.Hash) *types.Block { + for _, block := range ss.commonBlocks { + ph := block.ParentHash() + if bytes.Equal(ph[:], parentHash[:]) { + return block + } + } + return nil +} + +func (ss *StagedSync) getBlockFromLastMileBlocksByParentHash(parentHash common.Hash) *types.Block { + for _, block := range ss.lastMileBlocks { + ph := block.ParentHash() + if bytes.Equal(ph[:], parentHash[:]) { + return block + } + } + return nil +} + +// UpdateBlockAndStatus updates block and its status in db +func (ss *StagedSync) UpdateBlockAndStatus(block *types.Block, bc core.BlockChain, verifyAllSig bool) error { + if block.NumberU64() != bc.CurrentBlock().NumberU64()+1 { + utils.Logger().Debug(). + Uint64("curBlockNum", bc.CurrentBlock().NumberU64()). + Uint64("receivedBlockNum", block.NumberU64()). + Msg("[STAGED_SYNC] Inappropriate block number, ignore!") + return nil + } + + haveCurrentSig := len(block.GetCurrentCommitSig()) != 0 + // Verify block signatures + if block.NumberU64() > 1 { + // Verify signature every N blocks (which N is verifyHeaderBatchSize and can be adjusted in configs) + verifySeal := block.NumberU64()%ss.VerifyHeaderBatchSize == 0 || verifyAllSig + verifyCurrentSig := verifyAllSig && haveCurrentSig + if verifyCurrentSig { + sig, bitmap, err := chain.ParseCommitSigAndBitmap(block.GetCurrentCommitSig()) + if err != nil { + return errors.Wrap(err, "parse commitSigAndBitmap") + } + + startTime := time.Now() + if err := bc.Engine().VerifyHeaderSignature(bc, block.Header(), sig, bitmap); err != nil { + return errors.Wrapf(err, "verify header signature %v", block.Hash().String()) + } + utils.Logger().Debug(). + Int64("elapsed time", time.Now().Sub(startTime).Milliseconds()). + Msg("[STAGED_SYNC] VerifyHeaderSignature") + } + err := bc.Engine().VerifyHeader(bc, block.Header(), verifySeal) + if err == engine.ErrUnknownAncestor { + return err + } else if err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block number", block.NumberU64()). + Msgf("[STAGED_SYNC] UpdateBlockAndStatus: failed verifying signatures for new block") + + if !verifyAllSig { + utils.Logger().Info(). + Interface("block", bc.CurrentBlock()). + Msg("[STAGED_SYNC] UpdateBlockAndStatus: Rolling back last 99 blocks!") + for i := uint64(0); i < ss.VerifyHeaderBatchSize-1; i++ { + if rbErr := bc.Rollback([]common.Hash{bc.CurrentBlock().Hash()}); rbErr != nil { + utils.Logger().Error(). + Err(rbErr). + Msg("[STAGED_SYNC] UpdateBlockAndStatus: failed to rollback") + return err + } + } + } + return err + } + } + + _, err := bc.InsertChain([]*types.Block{block}, false /* verifyHeaders */) + if err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block number", block.NumberU64()). + Uint32("shard", block.ShardID()). + Msgf("[STAGED_SYNC] UpdateBlockAndStatus: Error adding new block to blockchain") + return err + } + utils.Logger().Info(). + Uint64("blockHeight", block.NumberU64()). + Uint64("blockEpoch", block.Epoch().Uint64()). + Str("blockHex", block.Hash().Hex()). + Uint32("ShardID", block.ShardID()). + Msg("[STAGED_SYNC] UpdateBlockAndStatus: New Block Added to Blockchain") + + for i, tx := range block.StakingTransactions() { + utils.Logger().Info(). + Msgf( + "StakingTxn %d: %s, %v", i, tx.StakingType().String(), tx.StakingMessage(), + ) + } + return nil +} + +// RegisterNodeInfo will register node to peers to accept future new block broadcasting +// return number of successful registration +func (ss *StagedSync) RegisterNodeInfo() int { + registrationNumber := RegistrationNumber + utils.Logger().Debug(). + Int("registrationNumber", registrationNumber). + Int("activePeerNumber", len(ss.syncConfig.peers)). + Msg("[STAGED_SYNC] node registration to peers") + + count := 0 + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + logger := utils.Logger().With().Str("peerPort", peerConfig.port).Str("peerIP", peerConfig.ip).Logger() + if count >= registrationNumber { + brk = true + return + } + if peerConfig.ip == ss.selfip && peerConfig.port == GetSyncingPort(ss.selfport) { + logger.Debug(). + Str("selfport", ss.selfport). + Str("selfsyncport", GetSyncingPort(ss.selfport)). + Msg("[STAGED_SYNC] skip self") + return + } + err := peerConfig.registerToBroadcast(ss.selfPeerHash[:], ss.selfip, ss.selfport) + if err != nil { + logger.Debug(). + Hex("selfPeerHash", ss.selfPeerHash[:]). + Msg("[STAGED_SYNC] register failed to peer") + return + } + + logger.Debug().Msg("[STAGED_SYNC] register success") + count++ + return + }) + return count +} + +// getMaxPeerHeight returns the maximum blockchain heights from peers +func (ss *StagedSync) getMaxPeerHeight() (uint64, error) { + maxHeight := uint64(math.MaxUint64) + var ( + wg sync.WaitGroup + lock sync.Mutex + ) + + ss.syncConfig.ForEachPeer(func(peerConfig *SyncPeerConfig) (brk bool) { + wg.Add(1) + go func() { + defer wg.Done() + response, err := peerConfig.client.GetBlockChainHeight() + if err != nil { + utils.Logger().Error(). + Err(err). + Str("peerIP", peerConfig.ip). + Str("peerPort", peerConfig.port). + Msg("[STAGED_SYNC]GetBlockChainHeight failed") + ss.syncConfig.RemovePeer(peerConfig, "GetBlockChainHeight failed") + return + } + utils.Logger().Info(). + Str("peerIP", peerConfig.ip). + Uint64("blockHeight", response.BlockHeight). + Msg("[STAGED_SYNC] getMaxPeerHeight") + lock.Lock() + if response != nil { + if maxHeight == uint64(math.MaxUint64) || maxHeight < response.BlockHeight { + maxHeight = response.BlockHeight + } + } + lock.Unlock() + }() + return + }) + wg.Wait() + + if maxHeight == uint64(math.MaxUint64) { + return 0, ErrMaxPeerHeightFail + } + + return maxHeight, nil +} + +// IsSameBlockchainHeight checks whether the node is out of sync from other peers +func (ss *StagedSync) IsSameBlockchainHeight(bc core.BlockChain) (uint64, bool) { + otherHeight, _ := ss.getMaxPeerHeight() + currentHeight := bc.CurrentBlock().NumberU64() + return otherHeight, currentHeight == otherHeight +} + +// GetMaxPeerHeight returns maximum block height of connected peers +func (ss *StagedSync) GetMaxPeerHeight() uint64 { + mph, _ := ss.getMaxPeerHeight() + return mph +} + +func (ss *StagedSync) addConsensusLastMile(bc core.BlockChain, consensus *consensus.Consensus) error { + curNumber := bc.CurrentBlock().NumberU64() + blockIter, err := consensus.GetLastMileBlockIter(curNumber + 1) + if err != nil { + return err + } + for { + block := blockIter.Next() + if block == nil { + break + } + if _, err := bc.InsertChain(types.Blocks{block}, true); err != nil { + return errors.Wrap(err, "failed to InsertChain") + } + } + return nil +} + +// GetSyncingPort returns the syncing port. +func GetSyncingPort(nodePort string) string { + if port, err := strconv.Atoi(nodePort); err == nil { + return fmt.Sprintf("%d", port-SyncingPortDifference) + } + return "" +} + +func ParseResult(res SyncCheckResult) (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + IsSynchronized = res.IsSynchronized + OtherHeight = res.OtherHeight + HeightDiff = res.HeightDiff + return IsSynchronized, OtherHeight, HeightDiff +} + +// GetSyncStatus get the last sync status for other modules (E.g. RPC, explorer). +// If the last sync result is not expired, return the sync result immediately. +// If the last result is expired, ask the remote DNS nodes for latest height and return the result. +func (ss *StagedSync) GetSyncStatus() SyncCheckResult { + return ss.syncStatus.Get(func() SyncCheckResult { + return ss.isSynchronized(false) + }) +} + +func (ss *StagedSync) GetParsedSyncStatus() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + res := ss.syncStatus.Get(func() SyncCheckResult { + return ss.isSynchronized(false) + }) + return ParseResult(res) +} + +func (ss *StagedSync) IsSynchronized() bool { + result := ss.GetSyncStatus() + return result.IsSynchronized +} + +// GetSyncStatusDoubleChecked returns the sync status when enforcing a immediate query on DNS nodes +// with a double check to avoid false alarm. +func (ss *StagedSync) GetSyncStatusDoubleChecked() SyncCheckResult { + result := ss.isSynchronized(true) + return result +} + +func (ss *StagedSync) GetParsedSyncStatusDoubleChecked() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) { + result := ss.isSynchronized(true) + return ParseResult(result) +} + +// isSynchronized queries the remote DNS node for the latest height to check what is the current sync status +func (ss *StagedSync) isSynchronized(doubleCheck bool) SyncCheckResult { + if ss.syncConfig == nil { + return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync + } + otherHeight1, _ := ss.getMaxPeerHeight() + lastHeight := ss.Blockchain().CurrentBlock().NumberU64() + wasOutOfSync := lastHeight+inSyncThreshold < otherHeight1 + + if !doubleCheck { + heightDiff := otherHeight1 - lastHeight + if otherHeight1 < lastHeight { + heightDiff = 0 // + } + utils.Logger().Info(). + Uint64("OtherHeight", otherHeight1). + Uint64("lastHeight", lastHeight). + Msg("[STAGED_SYNC] Checking sync status") + return SyncCheckResult{ + IsSynchronized: !wasOutOfSync, + OtherHeight: otherHeight1, + HeightDiff: heightDiff, + } + } + // double check the sync status after 1 second to confirm (avoid false alarm) + time.Sleep(1 * time.Second) + + otherHeight2, _ := ss.getMaxPeerHeight() + currentHeight := ss.Blockchain().CurrentBlock().NumberU64() + + isOutOfSync := currentHeight+inSyncThreshold < otherHeight2 + utils.Logger().Info(). + Uint64("OtherHeight1", otherHeight1). + Uint64("OtherHeight2", otherHeight2). + Uint64("lastHeight", lastHeight). + Uint64("currentHeight", currentHeight). + Msg("[STAGED_SYNC] Checking sync status") + // Only confirm out of sync when the node has lower height and didn't move in heights for 2 consecutive checks + heightDiff := otherHeight2 - lastHeight + if otherHeight2 < lastHeight { + heightDiff = 0 // overflow + } + return SyncCheckResult{ + IsSynchronized: !(wasOutOfSync && isOutOfSync && lastHeight == currentHeight), + OtherHeight: otherHeight2, + HeightDiff: heightDiff, + } +} diff --git a/api/service/stagedsync/stages.go b/api/service/stagedsync/stages.go new file mode 100644 index 000000000..3afff20a4 --- /dev/null +++ b/api/service/stagedsync/stages.go @@ -0,0 +1,94 @@ +package stagedsync + +import ( + "encoding/binary" + "fmt" + + "github.com/ledgerwatch/erigon-lib/kv" +) + +// SyncStageID represents the stages in the Mode.StagedSync mode +type SyncStageID string + +const ( + Heads SyncStageID = "Heads" // Heads are downloaded + BlockHashes SyncStageID = "BlockHashes" // block hashes are downloaded from peers + BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified + States SyncStageID = "States" // will construct most recent state from downloaded blocks + LastMile SyncStageID = "LastMile" // update blocks after sync and update last mile blocks as well + Finish SyncStageID = "Finish" // Nominal stage after all other stages +) + +func GetStageName(stage string, isBeacon bool, prune bool) string { + name := stage + if isBeacon { + name = "beacon_" + name + } + if prune { + name = "prune_" + name + } + return name +} + +func GetStageID(stage SyncStageID, isBeacon bool, prune bool) []byte { + return []byte(GetStageName(string(stage), isBeacon, prune)) +} + +func GetBucketName(bucketName string, isBeacon bool) string { + name := bucketName + if isBeacon { + name = "Beacon" + name + } + return name +} + +// GetStageProgress retrieves saved progress of given sync stage from the database +func GetStageProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) { + stgID := GetStageID(stage, isBeacon, false) + v, err := db.GetOne(kv.SyncStageProgress, stgID) + if err != nil { + return 0, err + } + return unmarshalData(v) +} + +// SaveStageProgress saves progress of given sync stage +func SaveStageProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error { + stgID := GetStageID(stage, isBeacon, false) + return db.Put(kv.SyncStageProgress, stgID, marshalData(progress)) +} + +// GetStageCleanUpProgress retrieves saved progress of given sync stage from the database +func GetStageCleanUpProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) { + stgID := GetStageID(stage, isBeacon, true) + v, err := db.GetOne(kv.SyncStageProgress, stgID) + if err != nil { + return 0, err + } + return unmarshalData(v) +} + +func SaveStageCleanUpProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error { + stgID := GetStageID(stage, isBeacon, true) + return db.Put(kv.SyncStageProgress, stgID, marshalData(progress)) +} + +func marshalData(blockNumber uint64) []byte { + return encodeBigEndian(blockNumber) +} + +func unmarshalData(data []byte) (uint64, error) { + if len(data) == 0 { + return 0, nil + } + if len(data) < 8 { + return 0, fmt.Errorf("value must be at least 8 bytes, got %d", len(data)) + } + return binary.BigEndian.Uint64(data[:8]), nil +} + +func encodeBigEndian(n uint64) []byte { + var v [8]byte + binary.BigEndian.PutUint64(v[:], n) + return v[:] +} diff --git a/api/service/stagedsync/sync_config.go b/api/service/stagedsync/sync_config.go new file mode 100644 index 000000000..55ea3a3ac --- /dev/null +++ b/api/service/stagedsync/sync_config.go @@ -0,0 +1,401 @@ +package stagedsync + +import ( + "bytes" + "encoding/hex" + "errors" + "math/rand" + "reflect" + "sort" + "sync" + + "github.com/harmony-one/harmony/api/service/legacysync/downloader" + pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/p2p" +) + +// Constants for syncing. +const ( + downloadBlocksRetryLimit = 3 // downloadBlocks service retry limit + RegistrationNumber = 3 + SyncingPortDifference = 3000 + inSyncThreshold = 0 // when peerBlockHeight - myBlockHeight <= inSyncThreshold, it's ready to join consensus + SyncLoopBatchSize uint32 = 30 // maximum size for one query of block hashes + verifyHeaderBatchSize uint64 = 100 // block chain header verification batch size (not used for now) + LastMileBlocksSize = 50 + + // after cutting off a number of connected peers, the result number of peers + // shall be between numPeersLowBound and numPeersHighBound + NumPeersLowBound = 3 + numPeersHighBound = 5 + + // NumPeersReserved is the number reserved peers which will be replaced with any broken peer + NumPeersReserved = 2 + + // downloadTaskBatch is the number of tasks per each downloader request + downloadTaskBatch = 5 +) + +// SyncPeerConfig is peer config to sync. +type SyncPeerConfig struct { + ip string + port string + peerHash []byte + client *downloader.Client + blockHashes [][]byte // block hashes before node doing sync + newBlocks []*types.Block // blocks after node doing sync + mux sync.RWMutex + failedTimes uint64 +} + +// CreateTestSyncPeerConfig used for testing. +func CreateTestSyncPeerConfig(client *downloader.Client, blockHashes [][]byte) *SyncPeerConfig { + return &SyncPeerConfig{ + client: client, + blockHashes: blockHashes, + } +} + +// GetClient returns client pointer of downloader.Client +func (peerConfig *SyncPeerConfig) GetClient() *downloader.Client { + return peerConfig.client +} + +// AddFailedTime considers one more peer failure and checks against max allowed failed times +func (peerConfig *SyncPeerConfig) AddFailedTime(maxFailures uint64) (mustStop bool) { + peerConfig.mux.Lock() + defer peerConfig.mux.Unlock() + peerConfig.failedTimes++ + if peerConfig.failedTimes > maxFailures { + return true + } + return false +} + +// IsEqual checks the equality between two sync peers +func (peerConfig *SyncPeerConfig) IsEqual(pc2 *SyncPeerConfig) bool { + return peerConfig.ip == pc2.ip && peerConfig.port == pc2.port +} + +// GetBlocks gets blocks by calling grpc request to the corresponding peer. +func (peerConfig *SyncPeerConfig) GetBlocks(hashes [][]byte) ([][]byte, error) { + response := peerConfig.client.GetBlocksAndSigs(hashes) + if response == nil { + return nil, ErrGetBlock + } + return response.Payload, nil +} + +func (peerConfig *SyncPeerConfig) registerToBroadcast(peerHash []byte, ip, port string) error { + response := peerConfig.client.Register(peerHash, ip, port) + if response == nil || response.Type == pb.DownloaderResponse_FAIL { + return ErrRegistrationFail + } else if response.Type == pb.DownloaderResponse_SUCCESS { + return nil + } + return ErrRegistrationFail +} + +// CompareSyncPeerConfigByblockHashes compares two SyncPeerConfig by blockHashes. +func CompareSyncPeerConfigByblockHashes(a *SyncPeerConfig, b *SyncPeerConfig) int { + if len(a.blockHashes) != len(b.blockHashes) { + if len(a.blockHashes) < len(b.blockHashes) { + return -1 + } + return 1 + } + for id := range a.blockHashes { + if !reflect.DeepEqual(a.blockHashes[id], b.blockHashes[id]) { + return bytes.Compare(a.blockHashes[id], b.blockHashes[id]) + } + } + return 0 +} + +// SyncBlockTask is the task struct to sync a specific block. +type SyncBlockTask struct { + index int + blockHash []byte +} + +type syncBlockTasks []SyncBlockTask + +func (tasks syncBlockTasks) blockHashes() [][]byte { + hashes := make([][]byte, 0, len(tasks)) + for _, task := range tasks { + hash := make([]byte, len(task.blockHash)) + copy(hash, task.blockHash) + hashes = append(hashes, task.blockHash) + } + return hashes +} + +func (tasks syncBlockTasks) blockHashesStr() []string { + hashes := make([]string, 0, len(tasks)) + for _, task := range tasks { + hash := hex.EncodeToString(task.blockHash) + hashes = append(hashes, hash) + } + return hashes +} + +func (tasks syncBlockTasks) indexes() []int { + indexes := make([]int, 0, len(tasks)) + for _, task := range tasks { + indexes = append(indexes, task.index) + } + return indexes +} + +// SyncConfig contains an array of SyncPeerConfig. +type SyncConfig struct { + // mtx locks peers, and *SyncPeerConfig pointers in peers. + // SyncPeerConfig itself is guarded by its own mutex. + mtx sync.RWMutex + reservedPeers []*SyncPeerConfig + peers []*SyncPeerConfig +} + +// AddPeer adds the given sync peer. +func (sc *SyncConfig) AddPeer(peer *SyncPeerConfig) { + sc.mtx.Lock() + defer sc.mtx.Unlock() + + // Ensure no duplicate peers + for _, p2 := range sc.peers { + if peer.IsEqual(p2) { + return + } + } + sc.peers = append(sc.peers, peer) +} + +// SelectRandomPeers limits number of peers to release some server end sources. +func (sc *SyncConfig) SelectRandomPeers(peers []p2p.Peer, randSeed int64) int { + numPeers := len(peers) + targetSize := calcNumPeersWithBound(numPeers, NumPeersLowBound, numPeersHighBound) + // if number of peers is less than required number, keep all in list + if numPeers <= targetSize { + utils.Logger().Warn(). + Int("num connected peers", numPeers). + Msg("[STAGED_SYNC] not enough connected peers to sync, still sync will on going") + return numPeers + } + //shuffle peers list + r := rand.New(rand.NewSource(randSeed)) + r.Shuffle(numPeers, func(i, j int) { peers[i], peers[j] = peers[j], peers[i] }) + + return targetSize +} + +// calcNumPeersWithBound calculates the number of connected peers with bound +// peers are expected to limited at half of the size, capped between lowBound and highBound. +func calcNumPeersWithBound(size int, lowBound, highBound int) int { + if size < lowBound { + return size + } + expLen := size / 2 + if expLen < lowBound { + expLen = lowBound + } + if expLen > highBound { + expLen = highBound + } + return expLen +} + +// ForEachPeer calls the given function with each peer. +// It breaks the iteration iff the function returns true. +func (sc *SyncConfig) ForEachPeer(f func(peer *SyncPeerConfig) (brk bool)) { + sc.mtx.RLock() + peers := make([]*SyncPeerConfig, len(sc.peers)) + copy(peers, sc.peers) + sc.mtx.RUnlock() + + for _, peer := range peers { + if f(peer) { + break + } + } +} + +// RemovePeer removes a peer from SyncConfig +func (sc *SyncConfig) RemovePeer(peer *SyncPeerConfig, reason string) { + sc.mtx.Lock() + defer sc.mtx.Unlock() + + peer.client.Close() + for i, p := range sc.peers { + if p == peer { + sc.peers = append(sc.peers[:i], sc.peers[i+1:]...) + break + } + } + utils.Logger().Info(). + Str("peerIP", peer.ip). + Str("peerPortMsg", peer.port). + Str("reason", reason). + Msg("[STAGED_SYNC] remove GRPC peer") +} + +// ReplacePeerWithReserved tries to replace a peer from reserved peer list +func (sc *SyncConfig) ReplacePeerWithReserved(peer *SyncPeerConfig, reason string) { + sc.mtx.Lock() + defer sc.mtx.Unlock() + + peer.client.Close() + for i, p := range sc.peers { + if p == peer { + if len(sc.reservedPeers) > 0 { + sc.peers = append(sc.peers[:i], sc.peers[i+1:]...) + sc.peers = append(sc.peers, sc.reservedPeers[0]) + utils.Logger().Info(). + Str("peerIP", peer.ip). + Str("peerPort", peer.port). + Str("reservedPeerIP", sc.reservedPeers[0].ip). + Str("reservedPeerPort", sc.reservedPeers[0].port). + Str("reason", reason). + Msg("[STAGED_SYNC] replaced GRPC peer by reserved") + sc.reservedPeers = sc.reservedPeers[1:] + } else { + sc.peers = append(sc.peers[:i], sc.peers[i+1:]...) + utils.Logger().Info(). + Str("peerIP", peer.ip). + Str("peerPortMsg", peer.port). + Str("reason", reason). + Msg("[STAGED_SYNC] remove GRPC peer without replacement") + } + break + } + } +} + +// CloseConnections close grpc connections for state sync clients +func (sc *SyncConfig) CloseConnections() { + sc.mtx.RLock() + defer sc.mtx.RUnlock() + for _, pc := range sc.peers { + pc.client.Close() + } +} + +// FindPeerByHash returns the peer with the given hash, or nil if not found. +func (sc *SyncConfig) FindPeerByHash(peerHash []byte) *SyncPeerConfig { + sc.mtx.RLock() + defer sc.mtx.RUnlock() + for _, pc := range sc.peers { + if bytes.Equal(pc.peerHash, peerHash) { + return pc + } + } + return nil +} + +// getHowManyMaxConsensus returns max number of consensus nodes and the first ID of consensus group. +// Assumption: all peers are sorted by CompareSyncPeerConfigByBlockHashes first. +// Caller shall ensure mtx is locked for reading. +func (sc *SyncConfig) getHowManyMaxConsensus() (int, int) { + // As all peers are sorted by their blockHashes, all equal blockHashes should come together and consecutively. + if len(sc.peers) == 0 { + return -1, 0 + } else if len(sc.peers) == 1 { + return 0, 1 + } + maxFirstID := len(sc.peers) - 1 + for i := maxFirstID - 1; i >= 0; i-- { + if CompareSyncPeerConfigByblockHashes(sc.peers[maxFirstID], sc.peers[i]) != 0 { + break + } + maxFirstID = i + } + maxCount := len(sc.peers) - maxFirstID + return maxFirstID, maxCount +} + +// InitForTesting used for testing. +func (sc *SyncConfig) InitForTesting(client *downloader.Client, blockHashes [][]byte) { + sc.mtx.RLock() + defer sc.mtx.RUnlock() + for i := range sc.peers { + sc.peers[i].blockHashes = blockHashes + sc.peers[i].client = client + } +} + +// cleanUpPeers cleans up all peers whose blockHashes are not equal to +// consensus block hashes. Caller shall ensure mtx is locked for RW. +func (sc *SyncConfig) cleanUpPeers(maxFirstID int) { + fixedPeer := sc.peers[maxFirstID] + countBeforeCleanUp := len(sc.peers) + for i := 0; i < len(sc.peers); i++ { + if CompareSyncPeerConfigByblockHashes(fixedPeer, sc.peers[i]) != 0 { + // TODO: move it into a util delete func. + // See tip https://github.com/golang/go/wiki/SliceTricks + // Close the client and remove the peer out of the + sc.peers[i].client.Close() + copy(sc.peers[i:], sc.peers[i+1:]) + sc.peers[len(sc.peers)-1] = nil + sc.peers = sc.peers[:len(sc.peers)-1] + } + } + if len(sc.peers) < countBeforeCleanUp { + utils.Logger().Debug(). + Int("removed peers", len(sc.peers)-countBeforeCleanUp). + Msg("[STAGED_SYNC] cleanUpPeers: a few peers removed") + } +} + +// cleanUpInvalidPeers cleans up all peers whose missed any required block hash or sent any invalid block hash +// Caller shall ensure mtx is locked for RW. +func (sc *SyncConfig) cleanUpInvalidPeers(ipm map[string]bool) { + sc.mtx.Lock() + defer sc.mtx.Unlock() + countBeforeCleanUp := len(sc.peers) + for i := 0; i < len(sc.peers); i++ { + if ipm[string(sc.peers[i].peerHash)] == true { + sc.peers[i].client.Close() + copy(sc.peers[i:], sc.peers[i+1:]) + sc.peers[len(sc.peers)-1] = nil + sc.peers = sc.peers[:len(sc.peers)-1] + } + } + if len(sc.peers) < countBeforeCleanUp { + utils.Logger().Debug(). + Int("removed peers", len(sc.peers)-countBeforeCleanUp). + Msg("[STAGED_SYNC] cleanUpPeers: a few peers removed") + } +} + +// GetBlockHashesConsensusAndCleanUp selects the most common peer config based on their block hashes to download/sync. +// Note that choosing the most common peer config does not guarantee that the blocks to be downloaded are the correct ones. +// The subsequent node syncing steps of verifying the block header chain will give such confirmation later. +// If later block header verification fails with the sync peer config chosen here, the entire sync loop gets retried with a new peer set. +func (sc *SyncConfig) GetBlockHashesConsensusAndCleanUp(bgMode bool) error { + sc.mtx.Lock() + defer sc.mtx.Unlock() + // Sort all peers by the blockHashes. + sort.Slice(sc.peers, func(i, j int) bool { + return CompareSyncPeerConfigByblockHashes(sc.peers[i], sc.peers[j]) == -1 + }) + maxFirstID, maxCount := sc.getHowManyMaxConsensus() + if maxFirstID == -1 { + return errors.New("invalid peer index -1 for block hashes query") + } + utils.Logger().Info(). + Int("maxFirstID", maxFirstID). + Str("targetPeerIP", sc.peers[maxFirstID].ip). + Int("maxCount", maxCount). + Int("hashSize", len(sc.peers[maxFirstID].blockHashes)). + Msg("[STAGED_SYNC] block consensus hashes") + + if bgMode { + if maxCount != len(sc.peers) { + return ErrNodeNotEnoughBlockHashes + } + } else { + sc.cleanUpPeers(maxFirstID) + } + return nil +} diff --git a/api/service/stagedsync/sync_status.go b/api/service/stagedsync/sync_status.go new file mode 100644 index 000000000..556f1058b --- /dev/null +++ b/api/service/stagedsync/sync_status.go @@ -0,0 +1,90 @@ +package stagedsync + +import ( + "sync" + "time" + + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" +) + +const ( + // syncStatusExpiration is the expiration time out of a sync status. + // If last sync result in memory is before the expiration, the sync status + // will be updated. + syncStatusExpiration = 6 * time.Second + + // syncStatusExpirationNonValidator is the expiration of sync cache for non-validators. + // Compared with non-validator, the sync check is not as strict as validator nodes. + // TODO: add this field to harmony config + syncStatusExpirationNonValidator = 12 * time.Second +) + +type ( + syncStatus struct { + lastResult SyncCheckResult + MaxPeersHeight uint64 + currentCycle SyncCycle + lastUpdateTime time.Time + lock sync.RWMutex + expiration time.Duration + } + + SyncCheckResult struct { + IsSynchronized bool + OtherHeight uint64 + HeightDiff uint64 + } + + SyncCycle struct { + Number uint64 + StartHash []byte + TargetHeight uint64 + ExtraHashes map[uint64][]byte + lock sync.RWMutex + } +) + +func NewSyncStatus(role nodeconfig.Role) syncStatus { + expiration := getSyncStatusExpiration(role) + return syncStatus{ + expiration: expiration, + } +} + +func getSyncStatusExpiration(role nodeconfig.Role) time.Duration { + switch role { + case nodeconfig.Validator: + return syncStatusExpiration + case nodeconfig.ExplorerNode: + return syncStatusExpirationNonValidator + default: + return syncStatusExpirationNonValidator + } +} + +func (status *syncStatus) Get(fallback func() SyncCheckResult) SyncCheckResult { + status.lock.RLock() + if !status.expired() { + result := status.lastResult + status.lock.RUnlock() + return result + } + status.lock.RUnlock() + + status.lock.Lock() + defer status.lock.Unlock() + if status.expired() { + result := fallback() + status.update(result) + } + return status.lastResult +} + +func (status *syncStatus) expired() bool { + return time.Since(status.lastUpdateTime) > status.expiration +} + +func (status *syncStatus) update(result SyncCheckResult) { + status.lastUpdateTime = time.Now() + status.lastResult = result +} diff --git a/api/service/stagedsync/syncing.go b/api/service/stagedsync/syncing.go new file mode 100644 index 000000000..d20497157 --- /dev/null +++ b/api/service/stagedsync/syncing.go @@ -0,0 +1,292 @@ +package stagedsync + +import ( + "context" + "fmt" + "time" + + "github.com/c2h5oh/datasize" + "github.com/harmony-one/harmony/consensus" + "github.com/harmony-one/harmony/core" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/node/worker" + "github.com/harmony-one/harmony/shard" + "github.com/ledgerwatch/erigon-lib/kv" + + "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/log/v3" +) + +const ( + BlockHashesBucket = "BlockHashes" + BeaconBlockHashesBucket = "BeaconBlockHashes" + DownloadedBlocksBucket = "BlockBodies" + BeaconDownloadedBlocksBucket = "BeaconBlockBodies" // Beacon Block bodies are downloaded, TxHash and UncleHash are getting verified + LastMileBlocksBucket = "LastMileBlocks" // last mile blocks to catch up with the consensus + StageProgressBucket = "StageProgress" + + // cache db keys + LastBlockHeight = "LastBlockHeight" + LastBlockHash = "LastBlockHash" + + // cache db names + BlockHashesCacheDB = "cache_block_hashes" + BlockCacheDB = "cache_blocks" +) + +var Buckets = []string{ + BlockHashesBucket, + BeaconBlockHashesBucket, + DownloadedBlocksBucket, + BeaconDownloadedBlocksBucket, + LastMileBlocksBucket, + StageProgressBucket, +} + +// CreateStagedSync creates an instance of staged sync +func CreateStagedSync( + ip string, + port string, + peerHash [20]byte, + bc core.BlockChain, + role nodeconfig.Role, + isExplorer bool, + TurboMode bool, + UseMemDB bool, + doubleCheckBlockHashes bool, + maxBlocksPerCycle uint64, + maxBackgroundBlocks uint64, + maxMemSyncCycleSize uint64, + verifyAllSig bool, + verifyHeaderBatchSize uint64, + insertChainBatchSize int, + logProgress bool, +) (*StagedSync, error) { + + ctx := context.Background() + isBeacon := bc.ShardID() == shard.BeaconChainShardID + + var db kv.RwDB + if UseMemDB { + // maximum Blocks in memory is maxMemSyncCycleSize + maxBackgroundBlocks + var dbMapSize datasize.ByteSize + if isBeacon { + // for memdb, maximum 512 kb for beacon chain each block (in average) should be enough + dbMapSize = datasize.ByteSize(maxMemSyncCycleSize+maxBackgroundBlocks) * 512 * datasize.KB + } else { + // for memdb, maximum 256 kb for each shard chains block (in average) should be enough + dbMapSize = datasize.ByteSize(maxMemSyncCycleSize+maxBackgroundBlocks) * 256 * datasize.KB + } + // we manually create memory db because "db = memdb.New()" sets the default map size (64 MB) which is not enough for some cases + db = mdbx.NewMDBX(log.New()).MapSize(dbMapSize).InMem("cache_db").MustOpen() + } else { + if isBeacon { + db = mdbx.NewMDBX(log.New()).Path("cache_beacon_db").MustOpen() + } else { + db = mdbx.NewMDBX(log.New()).Path("cache_shard_db").MustOpen() + } + } + + if errInitDB := initDB(ctx, db); errInitDB != nil { + return nil, errInitDB + } + + headsCfg := NewStageHeadersCfg(ctx, bc, db) + blockHashesCfg := NewStageBlockHashesCfg(ctx, bc, db, isBeacon, TurboMode, logProgress) + bodiesCfg := NewStageBodiesCfg(ctx, bc, db, isBeacon, TurboMode, logProgress) + statesCfg := NewStageStatesCfg(ctx, bc, db, logProgress) + lastMileCfg := NewStageLastMileCfg(ctx, bc, db) + finishCfg := NewStageFinishCfg(ctx, db) + + stages := DefaultStages(ctx, + headsCfg, + blockHashesCfg, + bodiesCfg, + statesCfg, + lastMileCfg, + finishCfg, + ) + + return New(ctx, + ip, + port, + peerHash, + bc, + role, + isBeacon, + isExplorer, + db, + stages, + DefaultRevertOrder, + DefaultCleanUpOrder, + TurboMode, + UseMemDB, + doubleCheckBlockHashes, + maxBlocksPerCycle, + maxBackgroundBlocks, + maxMemSyncCycleSize, + verifyAllSig, + verifyHeaderBatchSize, + insertChainBatchSize, + logProgress, + ), nil +} + +// initDB inits sync loop main database and create buckets +func initDB(ctx context.Context, db kv.RwDB) error { + tx, errRW := db.BeginRw(ctx) + if errRW != nil { + return errRW + } + defer tx.Rollback() + for _, name := range Buckets { + // create bucket + if err := tx.CreateBucket(GetStageName(name, false, false)); err != nil { + return err + } + // create bucket for beacon + if err := tx.CreateBucket(GetStageName(name, true, false)); err != nil { + return err + } + } + if err := tx.Commit(); err != nil { + return err + } + return nil +} + +// SyncLoop will keep syncing with peers until catches up +func (s *StagedSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus, loopMinTime time.Duration) { + + utils.Logger().Info(). + Uint64("current height", bc.CurrentBlock().NumberU64()). + Msgf("staged sync is executing ... ") + + if !s.IsBeacon() { + s.RegisterNodeInfo() + } + + // get max peers height + maxPeersHeight, err := s.getMaxPeerHeight() + if err != nil { + return + } + utils.Logger().Info(). + Uint64("maxPeersHeight", maxPeersHeight). + Msgf("[STAGED_SYNC] max peers height") + s.syncStatus.MaxPeersHeight = maxPeersHeight + + for { + if len(s.syncConfig.peers) < NumPeersLowBound { + // TODO: try to use reserved nodes + utils.Logger().Warn(). + Int("num peers", len(s.syncConfig.peers)). + Msgf("[STAGED_SYNC] Not enough connected peers") + break + } + startHead := bc.CurrentBlock().NumberU64() + + if startHead >= maxPeersHeight { + utils.Logger().Info(). + Bool("isBeacon", isBeacon). + Uint32("shard", bc.ShardID()). + Uint64("maxPeersHeight", maxPeersHeight). + Uint64("currentHeight", startHead). + Msgf("[STAGED_SYNC] Node is now IN SYNC!") + break + } + startTime := time.Now() + + if err := s.runSyncCycle(bc, worker, isBeacon, consensus, maxPeersHeight); err != nil { + utils.Logger().Error(). + Err(err). + Bool("isBeacon", isBeacon). + Uint32("shard", bc.ShardID()). + Uint64("currentHeight", startHead). + Msgf("[STAGED_SYNC] sync cycle failed") + break + } + + if loopMinTime != 0 { + waitTime := loopMinTime - time.Since(startTime) + utils.Logger().Debug(). + Bool("isBeacon", isBeacon). + Uint32("shard", bc.ShardID()). + Interface("duration", waitTime). + Msgf("[STAGED SYNC] Node is syncing ..., it's waiting a few seconds until next loop") + c := time.After(waitTime) + select { + case <-s.Context().Done(): + return + case <-c: + } + } + + // calculating sync speed (blocks/second) + currHead := bc.CurrentBlock().NumberU64() + if s.LogProgress && currHead-startHead > 0 { + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currHead-startHead) / dt + } + syncSpeed := fmt.Sprintf("%.2f", speed) + fmt.Println("sync speed:", syncSpeed, "blocks/s (", currHead, "/", maxPeersHeight, ")") + } + + s.syncStatus.currentCycle.lock.Lock() + s.syncStatus.currentCycle.Number++ + s.syncStatus.currentCycle.lock.Unlock() + + } + + if consensus != nil { + if err := s.addConsensusLastMile(s.Blockchain(), consensus); err != nil { + utils.Logger().Error(). + Err(err). + Msg("[STAGED_SYNC] Add consensus last mile") + } + // TODO: move this to explorer handler code. + if s.isExplorer { + consensus.UpdateConsensusInformation() + } + } + s.purgeAllBlocksFromCache() + utils.Logger().Info(). + Uint64("new height", bc.CurrentBlock().NumberU64()). + Msgf("staged sync is executed") + return +} + +// runSyncCycle will run one cycle of staged syncing +func (s *StagedSync) runSyncCycle(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus, maxPeersHeight uint64) error { + canRunCycleInOneTransaction := s.MaxBlocksPerSyncCycle > 0 && s.MaxBlocksPerSyncCycle <= s.MaxMemSyncCycleSize + var tx kv.RwTx + if canRunCycleInOneTransaction { + var err error + if tx, err = s.DB().BeginRw(context.Background()); err != nil { + return err + } + defer tx.Rollback() + } + // Do one cycle of staged sync + initialCycle := s.syncStatus.currentCycle.Number == 0 + syncErr := s.Run(s.DB(), tx, initialCycle) + if syncErr != nil { + utils.Logger().Error(). + Err(syncErr). + Bool("isBeacon", s.IsBeacon()). + Uint32("shard", s.Blockchain().ShardID()). + Msgf("[STAGED_SYNC] Sync loop failed") + s.purgeOldBlocksFromCache() + return syncErr + } + if tx != nil { + errTx := tx.Commit() + if errTx != nil { + return errTx + } + } + return nil +} diff --git a/api/service/stagedsync/task_queue.go b/api/service/stagedsync/task_queue.go new file mode 100644 index 000000000..8802ca839 --- /dev/null +++ b/api/service/stagedsync/task_queue.go @@ -0,0 +1,38 @@ +package stagedsync + +import ( + "time" + + "github.com/Workiva/go-datastructures/queue" +) + +// downloadTaskQueue is wrapper around Queue with item to be SyncBlockTask +type downloadTaskQueue struct { + q *queue.Queue +} + +func (queue downloadTaskQueue) poll(num int64, timeOut time.Duration) (syncBlockTasks, error) { + items, err := queue.q.Poll(num, timeOut) + if err != nil { + return nil, err + } + tasks := make(syncBlockTasks, 0, len(items)) + for _, item := range items { + task := item.(SyncBlockTask) + tasks = append(tasks, task) + } + return tasks, nil +} + +func (queue downloadTaskQueue) put(tasks syncBlockTasks) error { + for _, task := range tasks { + if err := queue.q.Put(task); err != nil { + return err + } + } + return nil +} + +func (queue downloadTaskQueue) empty() bool { + return queue.q.Empty() +} diff --git a/cmd/harmony/config_migrations.go b/cmd/harmony/config_migrations.go index 813c9045e..c5d86bdd0 100644 --- a/cmd/harmony/config_migrations.go +++ b/cmd/harmony/config_migrations.go @@ -260,6 +260,7 @@ func init() { confTree.Set("Version", "2.5.3") return confTree } + migrations["2.5.3"] = func(confTree *toml.Tree) *toml.Tree { if confTree.Get("TxPool.AllowedTxsFile") == nil { confTree.Set("TxPool.AllowedTxsFile", defaultConfig.TxPool.AllowedTxsFile) @@ -267,6 +268,7 @@ func init() { confTree.Set("Version", "2.5.4") return confTree } + migrations["2.5.4"] = func(confTree *toml.Tree) *toml.Tree { if confTree.Get("TxPool.GlobalSlots") == nil { confTree.Set("TxPool.GlobalSlots", defaultConfig.TxPool.GlobalSlots) @@ -274,6 +276,7 @@ func init() { confTree.Set("Version", "2.5.5") return confTree } + migrations["2.5.5"] = func(confTree *toml.Tree) *toml.Tree { if confTree.Get("Log.Console") == nil { confTree.Set("Log.Console", defaultConfig.Log.Console) @@ -281,6 +284,7 @@ func init() { confTree.Set("Version", "2.5.6") return confTree } + migrations["2.5.6"] = func(confTree *toml.Tree) *toml.Tree { if confTree.Get("P2P.MaxPeers") == nil { confTree.Set("P2P.MaxPeers", defaultConfig.P2P.MaxPeers) @@ -295,6 +299,15 @@ func init() { return confTree } + migrations["2.5.8"] = func(confTree *toml.Tree) *toml.Tree { + if confTree.Get("Sync.StagedSync") == nil { + confTree.Set("Sync.StagedSync", defaultConfig.Sync.StagedSync) + confTree.Set("Sync.StagedSyncCfg", defaultConfig.Sync.StagedSyncCfg) + } + confTree.Set("Version", "2.5.9") + return confTree + } + // check that the latest version here is the same as in default.go largestKey := getNextVersion(migrations) if largestKey != tomlConfigVersion { diff --git a/cmd/harmony/default.go b/cmd/harmony/default.go index 64c60365d..84295eb87 100644 --- a/cmd/harmony/default.go +++ b/cmd/harmony/default.go @@ -5,7 +5,7 @@ import ( nodeconfig "github.com/harmony-one/harmony/internal/configs/node" ) -const tomlConfigVersion = "2.5.8" +const tomlConfigVersion = "2.5.9" const ( defNetworkType = nodeconfig.Mainnet @@ -143,10 +143,25 @@ var defaultPrometheusConfig = harmonyconfig.PrometheusConfig{ Gateway: "https://gateway.harmony.one", } +var defaultStagedSyncConfig = harmonyconfig.StagedSyncConfig{ + TurboMode: true, + DoubleCheckBlockHashes: false, + MaxBlocksPerSyncCycle: 512, // sync new blocks in each cycle, if set to zero means all blocks in one full cycle + MaxBackgroundBlocks: 512, // max blocks to be downloaded at background process in turbo mode + InsertChainBatchSize: 128, // number of blocks to build a batch and insert to chain in staged sync + VerifyAllSig: false, // whether it should verify signatures for all blocks + VerifyHeaderBatchSize: 100, // batch size to verify block header before insert to chain + MaxMemSyncCycleSize: 1024, // max number of blocks to use a single transaction for staged sync + UseMemDB: true, // it uses memory by default. set it to false to use disk + LogProgress: false, // log the full sync progress in console +} + var ( defaultMainnetSyncConfig = harmonyconfig.SyncConfig{ Enabled: false, Downloader: false, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, Concurrency: 6, MinPeers: 6, InitStreams: 8, @@ -159,6 +174,8 @@ var ( defaultTestNetSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, Downloader: false, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, Concurrency: 2, MinPeers: 2, InitStreams: 2, @@ -171,6 +188,8 @@ var ( defaultLocalNetSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, Downloader: true, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, Concurrency: 4, MinPeers: 5, InitStreams: 5, @@ -183,6 +202,8 @@ var ( defaultElseSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, Downloader: true, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, Concurrency: 4, MinPeers: 4, InitStreams: 4, diff --git a/cmd/harmony/flags.go b/cmd/harmony/flags.go index 02e8989da..d2c458149 100644 --- a/cmd/harmony/flags.go +++ b/cmd/harmony/flags.go @@ -218,6 +218,7 @@ var ( syncFlags = []cli.Flag{ syncStreamEnabledFlag, syncDownloaderFlag, + syncStagedSyncFlag, syncConcurrencyFlag, syncMinPeersFlag, syncInitStreamsFlag, @@ -1661,6 +1662,12 @@ var ( Hidden: true, DefValue: false, } + syncStagedSyncFlag = cli.BoolFlag{ + Name: "sync.stagedsync", + Usage: "Enable the staged sync", + Hidden: false, + DefValue: false, + } syncConcurrencyFlag = cli.IntFlag{ Name: "sync.concurrency", Usage: "Concurrency when doing p2p sync requests", @@ -1708,6 +1715,10 @@ func applySyncFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { config.Sync.Downloader = cli.GetBoolFlagValue(cmd, syncDownloaderFlag) } + if cli.IsFlagChanged(cmd, syncStagedSyncFlag) { + config.Sync.StagedSync = cli.GetBoolFlagValue(cmd, syncStagedSyncFlag) + } + if cli.IsFlagChanged(cmd, syncConcurrencyFlag) { config.Sync.Concurrency = cli.GetIntFlagValue(cmd, syncConcurrencyFlag) } diff --git a/cmd/harmony/flags_test.go b/cmd/harmony/flags_test.go index 2f69ccdc2..d47a1712f 100644 --- a/cmd/harmony/flags_test.go +++ b/cmd/harmony/flags_test.go @@ -1345,6 +1345,7 @@ func TestSyncFlags(t *testing.T) { cfgSync := defaultMainnetSyncConfig cfgSync.Enabled = true cfgSync.Downloader = true + cfgSync.StagedSync = false cfgSync.Concurrency = 10 cfgSync.MinPeers = 10 cfgSync.InitStreams = 10 diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index b972fd36a..bf5ebdcdf 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -314,6 +314,7 @@ func setupNodeAndRun(hc harmonyconfig.HarmonyConfig) { nodeconfig.GetDefaultConfig().ShardID = nodeConfig.ShardID nodeconfig.GetDefaultConfig().IsOffline = nodeConfig.IsOffline nodeconfig.GetDefaultConfig().Downloader = nodeConfig.Downloader + nodeconfig.GetDefaultConfig().StagedSync = nodeConfig.StagedSync // Check NTP configuration accurate, err := ntp.CheckLocalTimeAccurate(nodeConfig.NtpServer) @@ -599,7 +600,17 @@ func createGlobalConfig(hc harmonyconfig.HarmonyConfig) (*nodeconfig.ConfigType, nodeConfig.SetArchival(hc.General.IsBeaconArchival, hc.General.IsArchival) nodeConfig.IsOffline = hc.General.IsOffline nodeConfig.Downloader = hc.Sync.Downloader - + nodeConfig.StagedSync = hc.Sync.StagedSync + nodeConfig.StagedSyncTurboMode = hc.Sync.StagedSyncCfg.TurboMode + nodeConfig.UseMemDB = hc.Sync.StagedSyncCfg.UseMemDB + nodeConfig.DoubleCheckBlockHashes = hc.Sync.StagedSyncCfg.DoubleCheckBlockHashes + nodeConfig.MaxBlocksPerSyncCycle = hc.Sync.StagedSyncCfg.MaxBlocksPerSyncCycle + nodeConfig.MaxBackgroundBlocks = hc.Sync.StagedSyncCfg.MaxBackgroundBlocks + nodeConfig.MaxMemSyncCycleSize = hc.Sync.StagedSyncCfg.MaxMemSyncCycleSize + nodeConfig.VerifyAllSig = hc.Sync.StagedSyncCfg.VerifyAllSig + nodeConfig.VerifyHeaderBatchSize = hc.Sync.StagedSyncCfg.VerifyHeaderBatchSize + nodeConfig.InsertChainBatchSize = hc.Sync.StagedSyncCfg.InsertChainBatchSize + nodeConfig.LogProgress = hc.Sync.StagedSyncCfg.LogProgress // P2P private key is used for secure message transfer between p2p nodes. nodeConfig.P2PPriKey, _, err = utils.LoadKeyFromFile(hc.P2P.KeyFile) if err != nil { diff --git a/go.mod b/go.mod index d852ad38c..bc0c0c539 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/harmony-one/harmony go 1.19 require ( - github.com/RoaringBitmap/roaring v1.1.0 + github.com/RoaringBitmap/roaring v1.2.1 github.com/VictoriaMetrics/fastcache v1.5.7 github.com/Workiva/go-datastructures v1.0.50 github.com/allegro/bigcache v1.2.1 @@ -57,9 +57,9 @@ require ( golang.org/x/net v0.3.0 // indirect golang.org/x/sync v0.1.0 golang.org/x/sys v0.3.0 // indirect - golang.org/x/time v0.0.0-20220609170525-579cf78fd858 + golang.org/x/time v0.2.0 golang.org/x/tools v0.3.0 // indirect - google.golang.org/grpc v1.50.1 + google.golang.org/grpc v1.51.0 google.golang.org/protobuf v1.28.1 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce @@ -67,14 +67,21 @@ require ( gopkg.in/yaml.v2 v2.4.0 ) +require ( + github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b + github.com/ledgerwatch/erigon-lib v0.0.0-20221218022306-0f8fdd40c2db + github.com/ledgerwatch/log/v3 v3.6.0 +) + require ( github.com/AndreasBriese/bbloom v0.0.0-20190825152654-46b345b51c96 // indirect github.com/BurntSushi/toml v0.3.1 // indirect github.com/OpenPeeDeeP/depguard v1.0.1 // indirect + github.com/VictoriaMetrics/metrics v1.23.0 // indirect github.com/aristanetworks/goarista v0.0.0-20190607111240-52c2a7864a08 // indirect github.com/benbjohnson/clock v1.3.0 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/bits-and-blooms/bitset v1.2.2 // indirect github.com/bombsimon/wsl/v2 v2.0.0 // indirect github.com/btcsuite/btcd v0.21.0-beta // indirect github.com/cespare/xxhash v1.1.0 // indirect @@ -88,7 +95,7 @@ require ( github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.0 // indirect - github.com/edsrzf/mmap-go v1.0.0 // indirect + github.com/edsrzf/mmap-go v1.1.0 // indirect github.com/elastic/gosigar v0.14.2 // indirect github.com/fatih/color v1.13.0 // indirect github.com/flynn/noise v1.0.0 // indirect @@ -125,7 +132,7 @@ require ( github.com/golangci/prealloc v0.0.0-20180630174525-215b22d4de21 // indirect github.com/golangci/revgrep v0.0.0-20180526074752-d9c87f5ffaf0 // indirect github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4 // indirect - github.com/google/btree v1.0.1 // indirect + github.com/google/btree v1.1.2 // indirect github.com/google/gopacket v1.1.19 // indirect github.com/google/pprof v0.0.0-20221203041831-ce31453925ec // indirect github.com/google/uuid v1.3.0 // indirect @@ -230,10 +237,13 @@ require ( github.com/tikv/pd/client v0.0.0-20220216070739-26c668271201 // indirect github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e // indirect github.com/tommy-muehle/go-mnd v1.1.1 // indirect + github.com/torquem-ch/mdbx-go v0.27.0 // indirect github.com/tyler-smith/go-bip39 v1.0.2 // indirect github.com/ultraware/funlen v0.0.2 // indirect github.com/ultraware/whitespace v0.0.4 // indirect github.com/uudashr/gocognit v1.0.1 // indirect + github.com/valyala/fastrand v1.1.0 // indirect + github.com/valyala/histogram v1.2.0 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/wsddn/go-ecdh v0.0.0-20161211032359-48726bab9208 // indirect go.opencensus.io v0.24.0 // indirect diff --git a/go.sum b/go.sum index 4aadb7a65..b227df3a4 100644 --- a/go.sum +++ b/go.sum @@ -68,12 +68,14 @@ github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/OpenPeeDeeP/depguard v1.0.1 h1:VlW4R6jmBIv3/u1JNlawEvJMM4J+dPORPaZasQee8Us= github.com/OpenPeeDeeP/depguard v1.0.1/go.mod h1:xsIw86fROiiwelg+jB2uM9PiKihMMmUx/1V+TNhjQvM= -github.com/RoaringBitmap/roaring v1.1.0 h1:b10lZrZXaY6Q6EKIRrmOF519FIyQQ5anPgGr3niw2yY= -github.com/RoaringBitmap/roaring v1.1.0/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= +github.com/RoaringBitmap/roaring v1.2.1 h1:58/LJlg/81wfEHd5L9qsHduznOIhyv4qb1yWcSvVq9A= +github.com/RoaringBitmap/roaring v1.2.1/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VictoriaMetrics/fastcache v1.5.3/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE= github.com/VictoriaMetrics/fastcache v1.5.7 h1:4y6y0G8PRzszQUYIQHHssv/jgPHAb5qQuuDNdCbyAgw= github.com/VictoriaMetrics/fastcache v1.5.7/go.mod h1:ptDBkNMQI4RtmVo8VS/XwRY6RoTu1dAWCbrk+6WsEM8= +github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA= +github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= github.com/Workiva/go-datastructures v1.0.50 h1:slDmfW6KCHcC7U+LP3DDBbm4fqTwZGn1beOFPfGaLvo= github.com/Workiva/go-datastructures v1.0.50/go.mod h1:Z+F2Rca0qCsVYDS8z7bAGm8f3UkzuWYS/oBZz5a7VVA= github.com/Zilliqa/gozilliqa-sdk v1.2.1-0.20201201074141-dd0ecada1be6/go.mod h1:eSYp2T6f0apnuW8TzhV3f6Aff2SE8Dwio++U4ha4yEM= @@ -104,8 +106,9 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/bits-and-blooms/bitset v1.2.2 h1:J5gbX05GpMdBjCvQ9MteIg2KKDExr7DrgK+Yc15FvIk= +github.com/bits-and-blooms/bitset v1.2.2/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/bombsimon/wsl v1.2.5/go.mod h1:43lEF/i0kpXbLCeDXL9LMT8c92HyBywXb0AsgMHYngM= github.com/bombsimon/wsl/v2 v2.0.0 h1:+Vjcn+/T5lSrO8Bjzhk4v14Un/2UyCA1E3V5j9nwTkQ= github.com/bombsimon/wsl/v2 v2.0.0/go.mod h1:mf25kr/SqFEPhhcxW1+7pxzGlW+hIl/hYTKY95VwV8U= @@ -129,6 +132,8 @@ github.com/btcsuite/snappy-go v1.0.0/go.mod h1:8woku9dyThutzjeg+3xrA5iCpBRH8XEEg github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792/go.mod h1:ghJtEyQwv5/p4Mg4C0fgbePVuGr935/5ddU9Z3TmDRY= github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46fmI40EZs= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= +github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY= +github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/cp v0.1.0/go.mod h1:SOGHArjBr4JWaSDEVpWpo/hNg6RoKrls6Oh40hiwW+s= @@ -211,8 +216,8 @@ github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/edsrzf/mmap-go v0.0.0-20160512033002-935e0e8a636c/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= +github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= +github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/elastic/gosigar v0.8.1-0.20180330100440-37f05ff46ffa/go.mod h1:cdorVVzy1fhmEqmtgqkoE3bYtCfSCkVyjTyCIo22xvs= github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4= @@ -384,8 +389,8 @@ github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4 h1:zwtduBRr5SSW github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4/go.mod h1:Izgrg8RkN3rCIMLGE9CyYmU9pY2Jer6DgANEnZ/L/cQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= -github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= +github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -475,6 +480,7 @@ github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d h1:dg1dEPuW github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/holiman/uint256 v1.2.1 h1:XRtyuda/zw2l+Bq/38n5XUoEF72aSOu/77Thd9pPp2o= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huin/goupnp v0.0.0-20161224104101-679507af18f3/go.mod h1:MZ2ZmwcBpvOoJ22IJsc7va19ZwoheaBk43rKg12SKag= github.com/huin/goupnp v1.0.0/go.mod h1:n9v9KO1tAxYH82qOn+UTIFQDmx5n1Zxd/ClZDMX7Bnc= @@ -574,6 +580,10 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/ledgerwatch/erigon-lib v0.0.0-20221218022306-0f8fdd40c2db h1:wV9YkkYQArbUdTdlPxXi5BW6H9ovYbyUT8Af7foetvQ= +github.com/ledgerwatch/erigon-lib v0.0.0-20221218022306-0f8fdd40c2db/go.mod h1:5GCPOzxAshLF7f0wrMZu2Bdq0qqIiMcIubM9n+25gGo= +github.com/ledgerwatch/log/v3 v3.6.0 h1:JBUSK1epPyutUrz7KYDTcJtQLEHnehECRpKbM1ugy5M= +github.com/ledgerwatch/log/v3 v3.6.0/go.mod h1:L+Sp+ma/h205EdCjviZECjGEvYUYEyXSdiuHNZzg+xQ= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= @@ -971,6 +981,8 @@ github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiff github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tommy-muehle/go-mnd v1.1.1 h1:4D0wuPKjOTiK2garzuPGGvm4zZ/wLYDOH8TJSABC7KU= github.com/tommy-muehle/go-mnd v1.1.1/go.mod h1:dSUh0FtTP8VhvkL1S+gUR1OKd9ZnSaozuI6r3m6wOig= +github.com/torquem-ch/mdbx-go v0.27.0 h1:FquhRvKL2zweMdk1R6UdOx3h6DiHgJ0+P9yQvSouURI= +github.com/torquem-ch/mdbx-go v0.27.0/go.mod h1:T2fsoJDVppxfAPTLd1svUgH1kpPmeXdPESmroSHcL1E= github.com/twmb/murmur3 v1.1.3/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/tyler-smith/go-bip39 v1.0.1-0.20181017060643-dbb3b84ba2ef/go.mod h1:sJ5fKU0s6JVwZjjcUEX2zFOnvq0ASQ2K9Zr6cf67kNs= github.com/tyler-smith/go-bip39 v1.0.2 h1:+t3w+KwLXO6154GNJY+qUtIxLTmFjfUmpguQT1OlOT8= @@ -988,6 +1000,10 @@ github.com/uudashr/gocognit v1.0.1 h1:MoG2fZ0b/Eo7NXoIwCVFLG5JED3qgQz5/NEE+rOsjP github.com/uudashr/gocognit v1.0.1/go.mod h1:j44Ayx2KW4+oB6SWMv8KsmHzZrOInQav7D3cQMJ5JUM= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s= +github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8= +github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= +github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ= +github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY= github.com/valyala/quicktemplate v1.2.0/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= @@ -1291,6 +1307,7 @@ golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= @@ -1313,8 +1330,8 @@ golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20220609170525-579cf78fd858 h1:Dpdu/EMxGMFgq0CeYMh4fazTD2vtlZRYE7wyynxJb9U= -golang.org/x/time v0.0.0-20220609170525-579cf78fd858/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.2.0 h1:52I/1L54xyEQAYdtcSuxtiT84KGYTBGXwayxmIpNJhE= +golang.org/x/time v0.2.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -1500,8 +1517,8 @@ google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.43.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY= -google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= +google.golang.org/grpc v1.51.0 h1:E1eGv1FTqoLIdnBCZufiSHgKjlqG6fKFf6pPWtMTh8U= +google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/internal/configs/harmony/harmony.go b/internal/configs/harmony/harmony.go index da37011e0..35a32825e 100644 --- a/internal/configs/harmony/harmony.go +++ b/internal/configs/harmony/harmony.go @@ -221,13 +221,28 @@ type PrometheusConfig struct { type SyncConfig struct { // TODO: Remove this bool after stream sync is fully up. - Enabled bool // enable the stream sync protocol - Downloader bool // start the sync downloader client - Concurrency int // concurrency used for stream sync protocol - MinPeers int // minimum streams to start a sync task. - InitStreams int // minimum streams in bootstrap to start sync loop. - DiscSoftLowCap int // when number of streams is below this value, spin discover during check - DiscHardLowCap int // when removing stream, num is below this value, spin discovery immediately - DiscHighCap int // upper limit of streams in one sync protocol - DiscBatch int // size of each discovery + Enabled bool // enable the stream sync protocol + Downloader bool // start the sync downloader client + StagedSync bool // use staged sync + StagedSyncCfg StagedSyncConfig // staged sync configurations + Concurrency int // concurrency used for stream sync protocol + MinPeers int // minimum streams to start a sync task. + InitStreams int // minimum streams in bootstrap to start sync loop. + DiscSoftLowCap int // when number of streams is below this value, spin discover during check + DiscHardLowCap int // when removing stream, num is below this value, spin discovery immediately + DiscHighCap int // upper limit of streams in one sync protocol + DiscBatch int // size of each discovery +} + +type StagedSyncConfig struct { + TurboMode bool // turn on turbo mode + DoubleCheckBlockHashes bool // double check all block hashes before download blocks + MaxBlocksPerSyncCycle uint64 // max number of blocks per each sync cycle, if set to zero, all blocks will be synced in one full cycle + MaxBackgroundBlocks uint64 // max number of background blocks in turbo mode + InsertChainBatchSize int // number of blocks to build a batch and insert to chain in staged sync + MaxMemSyncCycleSize uint64 // max number of blocks to use a single transaction for staged sync + VerifyAllSig bool // verify signatures for all blocks regardless of height and batch size + VerifyHeaderBatchSize uint64 // batch size to verify header before insert to chain + UseMemDB bool // it uses memory by default. set it to false to use disk + LogProgress bool // log the full sync progress in console } diff --git a/internal/configs/node/config.go b/internal/configs/node/config.go index 8c335ed23..9a0e950ec 100644 --- a/internal/configs/node/config.go +++ b/internal/configs/node/config.go @@ -69,22 +69,33 @@ var peerID peer.ID // PeerID of the node // ConfigType is the structure of all node related configuration variables type ConfigType struct { // The three groupID design, please refer to https://github.com/harmony-one/harmony/blob/master/node/node.md#libp2p-integration - beacon GroupID // the beacon group ID - group GroupID // the group ID of the shard (note: for beacon chain node, the beacon and shard group are the same) - client GroupID // the client group ID of the shard - isClient bool // whether this node is a client node, such as wallet - ShardID uint32 // ShardID of this node; TODO ek – revisit when resharding - role Role // Role of the node - Port string // Port of the node. - IP string // IP of the node. - RPCServer RPCServerConfig // RPC server port and ip - RosettaServer RosettaServerConfig // rosetta server port and ip - IsOffline bool - Downloader bool // Whether stream downloader is running; TODO: remove this after sync up - NtpServer string - StringRole string - P2PPriKey p2p_crypto.PrivKey `json:"-"` - ConsensusPriKey multibls.PrivateKeys `json:"-"` + beacon GroupID // the beacon group ID + group GroupID // the group ID of the shard (note: for beacon chain node, the beacon and shard group are the same) + client GroupID // the client group ID of the shard + isClient bool // whether this node is a client node, such as wallet + ShardID uint32 // ShardID of this node; TODO ek – revisit when resharding + role Role // Role of the node + Port string // Port of the node. + IP string // IP of the node. + RPCServer RPCServerConfig // RPC server port and ip + RosettaServer RosettaServerConfig // rosetta server port and ip + IsOffline bool + Downloader bool // Whether stream downloader is running; TODO: remove this after sync up + StagedSync bool // use staged sync + StagedSyncTurboMode bool // use Turbo mode for staged sync + UseMemDB bool // use mem db for staged sync + DoubleCheckBlockHashes bool + MaxBlocksPerSyncCycle uint64 // Maximum number of blocks per each cycle. if set to zero, all blocks will be downloaded and synced in one full cycle. + MaxMemSyncCycleSize uint64 // max number of blocks to use a single transaction for staged sync + MaxBackgroundBlocks uint64 // max number of background blocks in turbo mode + InsertChainBatchSize int // number of blocks to build a batch and insert to chain in staged sync + VerifyAllSig bool // verify signatures for all blocks regardless of height and batch size + VerifyHeaderBatchSize uint64 // batch size to verify header before insert to chain + LogProgress bool // log the full sync progress in console + NtpServer string + StringRole string + P2PPriKey p2p_crypto.PrivKey `json:"-"` + ConsensusPriKey multibls.PrivateKeys `json:"-"` // Database directory DBDir string networkType NetworkType diff --git a/node/node.go b/node/node.go index 9e6ed72cb..c50e74e1f 100644 --- a/node/node.go +++ b/node/node.go @@ -39,6 +39,7 @@ import ( "github.com/harmony-one/harmony/api/service" "github.com/harmony-one/harmony/api/service/legacysync" "github.com/harmony-one/harmony/api/service/legacysync/downloader" + "github.com/harmony-one/harmony/api/service/stagedsync" "github.com/harmony-one/harmony/consensus" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/rawdb" @@ -81,6 +82,20 @@ type syncConfig struct { withSig bool } +type ISync interface { + UpdateBlockAndStatus(block *types.Block, bc core.BlockChain, verifyAllSig bool) error + AddLastMileBlock(block *types.Block) + GetActivePeerNumber() int + CreateSyncConfig(peers []p2p.Peer, shardID uint32) error + SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus, loopMinTime time.Duration) + IsSynchronized() bool + IsSameBlockchainHeight(bc core.BlockChain) (uint64, bool) + AddNewBlock(peerHash []byte, block *types.Block) + RegisterNodeInfo() int + GetParsedSyncStatus() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) + GetParsedSyncStatusDoubleChecked() (IsSynchronized bool, OtherHeight uint64, HeightDiff uint64) +} + // Node represents a protocol-participating node in the network type Node struct { Consensus *consensus.Consensus // Consensus object containing all Consensus related data (e.g. committee members, signatures, commits) @@ -102,6 +117,7 @@ type Node struct { syncID [SyncIDLength]byte // a unique ID for the node during the state syncing process with peers stateSync *legacysync.StateSync epochSync *legacysync.EpochSync + stateStagedSync *stagedsync.StagedSync peerRegistrationRecord map[string]*syncConfig // record registration time (unixtime) of peers begin in syncing SyncingPeerProvider SyncingPeerProvider // The p2p host used to send/receive p2p messages @@ -127,8 +143,8 @@ type Node struct { // BroadcastInvalidTx flag is considered when adding pending tx to tx-pool BroadcastInvalidTx bool // InSync flag indicates the node is in-sync or not - IsInSync *abool.AtomicBool - proposedBlock map[uint64]*types.Block + IsSynchronized *abool.AtomicBool + proposedBlock map[uint64]*types.Block deciderCache *lru.Cache committeeCache *lru.Cache @@ -153,7 +169,32 @@ func (node *Node) Blockchain() core.BlockChain { return bc } -// Beaconchain returns the beaconchain from node. +func (node *Node) SyncInstance() ISync { + return node.GetOrCreateSyncInstance(true) +} + +func (node *Node) CurrentSyncInstance() bool { + return node.GetOrCreateSyncInstance(false) != nil +} + +// GetOrCreateSyncInstance returns an instance of state sync, either legacy or staged +// if initiate sets to true, it generates a new instance +func (node *Node) GetOrCreateSyncInstance(initiate bool) ISync { + if node.NodeConfig.StagedSync { + if initiate && node.stateStagedSync == nil { + utils.Logger().Info().Msg("initializing staged state sync") + node.stateStagedSync = node.createStagedSync(node.Blockchain()) + } + return node.stateStagedSync + } + if initiate && node.stateSync == nil { + utils.Logger().Info().Msg("initializing legacy state sync") + node.stateSync = node.createStateSync(node.Beaconchain()) + } + return node.stateSync +} + +// Beaconchain returns the beacon chain from node. func (node *Node) Beaconchain() core.BlockChain { // tikv mode not have the BeaconChain storage if node.HarmonyConfig != nil && node.HarmonyConfig.General.RunElasticMode && node.HarmonyConfig.General.ShardID != shard.BeaconChainShardID { @@ -1019,7 +1060,7 @@ func New( } } node.shardChains = collection - node.IsInSync = abool.NewBool(false) + node.IsSynchronized = abool.NewBool(false) if host != nil && consensusObj != nil { // Consensus and associated channel to communicate blocks diff --git a/node/node_syncing.go b/node/node_syncing.go index f10128385..1b4a1e17c 100644 --- a/node/node_syncing.go +++ b/node/node_syncing.go @@ -22,6 +22,7 @@ import ( "github.com/harmony-one/harmony/api/service/legacysync" legdownloader "github.com/harmony-one/harmony/api/service/legacysync/downloader" downloader_pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" + "github.com/harmony-one/harmony/api/service/stagedsync" "github.com/harmony-one/harmony/api/service/synchronize" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/types" @@ -84,10 +85,7 @@ func (node *Node) DoSyncWithoutConsensus() { // IsSameHeight tells whether node is at same bc height as a peer func (node *Node) IsSameHeight() (uint64, bool) { - if node.stateSync == nil { - node.stateSync = node.createStateSync(node.Blockchain()) - } - return node.stateSync.IsSameBlockchainHeight(node.Blockchain()) + return node.SyncInstance().IsSameBlockchainHeight(node.Blockchain()) } func (node *Node) createStateSync(bc core.BlockChain) *legacysync.StateSync { @@ -110,6 +108,42 @@ func (node *Node) createStateSync(bc core.BlockChain) *legacysync.StateSync { node.GetSyncID(), node.NodeConfig.Role() == nodeconfig.ExplorerNode, role) } +func (node *Node) createStagedSync(bc core.BlockChain) *stagedsync.StagedSync { + // Temp hack: The actual port used in dns sync is node.downloaderServer.Port. + // But registration is done through an old way of port arithmetics (syncPort + 3000). + // Thus for compatibility, we are doing the arithmetics here, and not to change the + // protocol itself. This is just the temporary hack and will not be a concern after + // state sync. + var mySyncPort int + if node.downloaderServer != nil { + mySyncPort = node.downloaderServer.Port + } else { + // If local sync server is not started, the port field in protocol is actually not + // functional, simply set it to default value. + mySyncPort = nodeconfig.DefaultDNSPort + } + mutatedPort := strconv.Itoa(mySyncPort + legacysync.SyncingPortDifference) + role := node.NodeConfig.Role() + isExplorer := node.NodeConfig.Role() == nodeconfig.ExplorerNode + + if s, err := stagedsync.CreateStagedSync(node.SelfPeer.IP, mutatedPort, + node.GetSyncID(), bc, role, isExplorer, + node.NodeConfig.StagedSyncTurboMode, + node.NodeConfig.UseMemDB, + node.NodeConfig.DoubleCheckBlockHashes, + node.NodeConfig.MaxBlocksPerSyncCycle, + node.NodeConfig.MaxBackgroundBlocks, + node.NodeConfig.MaxMemSyncCycleSize, + node.NodeConfig.VerifyAllSig, + node.NodeConfig.VerifyHeaderBatchSize, + node.NodeConfig.InsertChainBatchSize, + node.NodeConfig.LogProgress); err != nil { + return nil + } else { + return s + } +} + // SyncingPeerProvider is an interface for getting the peers in the given shard. type SyncingPeerProvider interface { SyncingPeers(shardID uint32) (peers []p2p.Peer, err error) @@ -250,7 +284,9 @@ func (node *Node) DoSyncing(bc core.BlockChain, worker *worker.Worker, willJoinC // doSync keep the node in sync with other peers, willJoinConsensus means the node will try to join consensus after catch up func (node *Node) doSync(bc core.BlockChain, worker *worker.Worker, willJoinConsensus bool) { - if node.stateSync.GetActivePeerNumber() < legacysync.NumPeersLowBound { + + syncInstance := node.SyncInstance() + if syncInstance.GetActivePeerNumber() < legacysync.NumPeersLowBound { shardID := bc.ShardID() peers, err := node.SyncingPeerProvider.SyncingPeers(shardID) if err != nil { @@ -260,28 +296,28 @@ func (node *Node) doSync(bc core.BlockChain, worker *worker.Worker, willJoinCons Msg("cannot retrieve syncing peers") return } - if err := node.stateSync.CreateSyncConfig(peers, shardID); err != nil { + if err := syncInstance.CreateSyncConfig(peers, shardID); err != nil { utils.Logger().Warn(). Err(err). Interface("peers", peers). Msg("[SYNC] create peers error") return } - utils.Logger().Debug().Int("len", node.stateSync.GetActivePeerNumber()).Msg("[SYNC] Get Active Peers") + utils.Logger().Debug().Int("len", syncInstance.GetActivePeerNumber()).Msg("[SYNC] Get Active Peers") } // TODO: treat fake maximum height - if result := node.stateSync.GetSyncStatusDoubleChecked(); !result.IsInSync { - node.IsInSync.UnSet() + if isSynchronized, _, _ := syncInstance.GetParsedSyncStatusDoubleChecked(); !isSynchronized { + node.IsSynchronized.UnSet() if willJoinConsensus { node.Consensus.BlocksNotSynchronized() } - node.stateSync.SyncLoop(bc, worker, false, node.Consensus) + syncInstance.SyncLoop(bc, worker, false, node.Consensus, legacysync.LoopMinTime) if willJoinConsensus { - node.IsInSync.Set() + node.IsSynchronized.Set() node.Consensus.BlocksSynchronized() } } - node.IsInSync.Set() + node.IsSynchronized.Set() } // SupportGRPCSyncServer do gRPC sync server @@ -331,11 +367,16 @@ func (node *Node) supportSyncing() { go node.SendNewBlockToUnsync() } - if node.stateSync == nil { + if !node.NodeConfig.StagedSync && node.stateSync == nil { node.stateSync = node.createStateSync(node.Blockchain()) utils.Logger().Debug().Msg("[SYNC] initialized state sync") } + if node.NodeConfig.StagedSync && node.stateStagedSync == nil { + node.stateStagedSync = node.createStagedSync(node.Blockchain()) + utils.Logger().Debug().Msg("[SYNC] initialized state for staged sync") + } + go node.DoSyncing(node.Blockchain(), node.Worker, joinConsensus) } @@ -356,6 +397,7 @@ func (node *Node) StartSyncingServer(port int) { // SendNewBlockToUnsync send latest verified block to unsync, registered nodes func (node *Node) SendNewBlockToUnsync() { + for { block := <-node.Consensus.VerifiedNewBlock blockBytes, err := rlp.EncodeToBytes(block) @@ -403,7 +445,6 @@ func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, in if node.NodeConfig.IsOffline { return response, nil } - switch request.Type { case downloader_pb.DownloaderRequest_BLOCKHASH: dnsServerRequestCounterVec.With(dnsReqMetricLabel("block_hash")).Inc() @@ -493,7 +534,7 @@ func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, in // this is the out of sync node acts as grpc server side case downloader_pb.DownloaderRequest_NEWBLOCK: dnsServerRequestCounterVec.With(dnsReqMetricLabel("new block")).Inc() - if node.IsInSync.IsSet() { + if node.IsSynchronized.IsSet() { response.Type = downloader_pb.DownloaderResponse_INSYNC return response, nil } @@ -502,7 +543,7 @@ func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, in utils.Logger().Warn().Err(err).Msg("[SYNC] unable to decode received new block") return response, err } - node.stateSync.AddNewBlock(request.PeerHash, block) + node.SyncInstance().AddNewBlock(request.PeerHash, block) case downloader_pb.DownloaderRequest_REGISTER: peerID := string(request.PeerHash[:]) @@ -528,7 +569,7 @@ func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, in } else { response.Type = downloader_pb.DownloaderResponse_FAIL syncPort := legacysync.GetSyncingPort(port) - client := legdownloader.ClientSetup(ip, syncPort) + client := legdownloader.ClientSetup(ip, syncPort, false) if client == nil { utils.Logger().Warn(). Str("ip", ip). @@ -546,8 +587,8 @@ func (node *Node) CalculateResponse(request *downloader_pb.DownloaderRequest, in } case downloader_pb.DownloaderRequest_REGISTERTIMEOUT: - if !node.IsInSync.IsSet() { - count := node.stateSync.RegisterNodeInfo() + if !node.IsSynchronized.IsSet() { + count := node.SyncInstance().RegisterNodeInfo() utils.Logger().Debug(). Int("number", count). Msg("[SYNC] extra node registered") @@ -752,18 +793,17 @@ func (node *Node) SyncStatus(shardID uint32) (bool, uint64, uint64) { func (node *Node) legacySyncStatus(shardID uint32) (bool, uint64, uint64) { switch shardID { case node.NodeConfig.ShardID: - if node.stateSync == nil { + if node.SyncInstance() == nil { return false, 0, 0 } - result := node.stateSync.GetSyncStatus() - return result.IsInSync, result.OtherHeight, result.HeightDiff + return node.SyncInstance().GetParsedSyncStatus() case shard.BeaconChainShardID: if node.epochSync == nil { return false, 0, 0 } result := node.epochSync.GetSyncStatus() - return result.IsInSync, result.OtherHeight, result.HeightDiff + return result.IsSynchronized, result.OtherHeight, result.HeightDiff default: // Shard node is not working on @@ -785,18 +825,19 @@ func (node *Node) IsOutOfSync(shardID uint32) bool { func (node *Node) legacyIsOutOfSync(shardID uint32) bool { switch shardID { case node.NodeConfig.ShardID: - if node.stateSync == nil { + if !node.NodeConfig.StagedSync && node.stateSync == nil { + return true + } else if node.NodeConfig.StagedSync && node.stateStagedSync == nil { return true } - result := node.stateSync.GetSyncStatus() - return !result.IsInSync + return node.SyncInstance().IsSynchronized() case shard.BeaconChainShardID: if node.epochSync == nil { return true } result := node.epochSync.GetSyncStatus() - return !result.IsInSync + return !result.IsSynchronized default: return true diff --git a/rosetta/infra/harmony-mainnet.conf b/rosetta/infra/harmony-mainnet.conf index b2000fd4d..534b80887 100644 --- a/rosetta/infra/harmony-mainnet.conf +++ b/rosetta/infra/harmony-mainnet.conf @@ -1,4 +1,4 @@ -Version = "2.5.8" +Version = "2.5.9" [BLSKeys] KMSConfigFile = "" @@ -100,6 +100,7 @@ Version = "2.5.8" DiscHighCap = 128 DiscSoftLowCap = 8 Downloader = false + StagedSync = false Enabled = false InitStreams = 8 MinPeers = 5 diff --git a/rosetta/infra/harmony-pstn.conf b/rosetta/infra/harmony-pstn.conf index 18d083052..ed4c116c6 100644 --- a/rosetta/infra/harmony-pstn.conf +++ b/rosetta/infra/harmony-pstn.conf @@ -1,4 +1,4 @@ -Version = "2.5.8" +Version = "2.5.9" [BLSKeys] KMSConfigFile = "" @@ -100,6 +100,7 @@ Version = "2.5.8" DiscHighCap = 128 DiscSoftLowCap = 8 Downloader = false + StagedSync = false Enabled = false InitStreams = 8 MinPeers = 2