From ca05f3f10f0c20a05e88698a97f08fc21a9c4b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Sun, 25 Jun 2023 14:25:56 +0800 Subject: [PATCH 01/56] add statesync as a new state to staged sync --- .../stagedstreamsync/default_stages.go | 10 + .../stagedstreamsync/stage_statesync.go | 215 ++++++++++++++++++ api/service/stagedstreamsync/stages.go | 1 + api/service/stagedstreamsync/syncing.go | 4 + 4 files changed, 230 insertions(+) create mode 100644 api/service/stagedstreamsync/stage_statesync.go diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go index 55986ff6e..4a1e719f2 100644 --- a/api/service/stagedstreamsync/default_stages.go +++ b/api/service/stagedstreamsync/default_stages.go @@ -13,6 +13,7 @@ var DefaultForwardOrder = ForwardOrder{ SyncEpoch, ShortRange, BlockBodies, + StateSync, // Stages below don't use Internet States, LastMile, @@ -23,6 +24,7 @@ var DefaultRevertOrder = RevertOrder{ Finish, LastMile, States, + StateSync, BlockBodies, ShortRange, SyncEpoch, @@ -33,6 +35,7 @@ var DefaultCleanUpOrder = CleanUpOrder{ Finish, LastMile, States, + StateSync, BlockBodies, ShortRange, SyncEpoch, @@ -44,6 +47,7 @@ func DefaultStages(ctx context.Context, seCfg StageEpochCfg, srCfg StageShortRangeCfg, bodiesCfg StageBodiesCfg, + stateSyncCfg StageStateSyncCfg, statesCfg StageStatesCfg, lastMileCfg StageLastMileCfg, finishCfg StageFinishCfg, @@ -53,6 +57,7 @@ func DefaultStages(ctx context.Context, handlerStageShortRange := NewStageShortRange(srCfg) handlerStageEpochSync := NewStageEpoch(seCfg) handlerStageBodies := NewStageBodies(bodiesCfg) + handlerStageStateSync := NewStageStateSync(stateSyncCfg) handlerStageStates := NewStageStates(statesCfg) handlerStageLastMile := NewStageLastMile(lastMileCfg) handlerStageFinish := NewStageFinish(finishCfg) @@ -78,6 +83,11 @@ func DefaultStages(ctx context.Context, Description: "Retrieve Block Bodies", Handler: handlerStageBodies, }, + { + ID: StateSync, + Description: "Retrieve States", + Handler: handlerStageStateSync, + }, { ID: States, Description: "Update Blockchain State", diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go new file mode 100644 index 000000000..10cce8462 --- /dev/null +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -0,0 +1,215 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/rawdb" + "github.com/harmony-one/harmony/core/state" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/prometheus/client_golang/prometheus" + "github.com/rs/zerolog" + "golang.org/x/crypto/sha3" +) + +type StageStateSync struct { + configs StageStateSyncCfg +} + +// trieTask represents a single trie node download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type trieTask struct { + hash common.Hash + path [][]byte + attempts map[string]struct{} +} + +// codeTask represents a single byte code download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type codeTask struct { + attempts map[string]struct{} +} + +type StageStateSyncCfg struct { + bc core.BlockChain + protocol syncProtocol + db kv.RwDB + root common.Hash // State root currently being synced + sched *trie.Sync // State trie sync scheduler defining the tasks + keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash + concurrency int + logger zerolog.Logger + logProgress bool +} + +func NewStageStateSync(cfg StageStateSyncCfg) *StageStateSync { + return &StageStateSync{ + configs: cfg, + } +} + +func NewStageStateSyncCfg(bc core.BlockChain, + db kv.RwDB, + root common.Hash, + concurrency int, + protocol syncProtocol, + logger zerolog.Logger, + logProgress bool) StageStateSyncCfg { + + return StageStateSyncCfg{ + bc: bc, + db: db, + root: root, + sched: state.NewStateSync(root, bc.ChainDb(), nil, rawdb.HashScheme), + keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), + trieTasks: make(map[string]*trieTask), + codeTasks: make(map[common.Hash]*codeTask), + concurrency: concurrency, + logger: logger, + logProgress: logProgress, + } +} + +// Exec progresses States stage in the forward direction +func (stg *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + // for short range sync, skip this step + if !s.state.initSync { + return nil + } + + maxHeight := s.state.status.targetBN + currentHead := stg.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxHeight { + return nil + } + currProgress := stg.configs.bc.CurrentBlock().NumberU64() + targetHeight := s.state.currentCycle.TargetHeight + if currProgress >= targetHeight { + return nil + } + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // isLastCycle := targetHeight >= maxHeight + startTime := time.Now() + startBlock := currProgress + + if stg.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + for i := currProgress + 1; i <= targetHeight; i++ { + // log the stage progress in console + if stg.configs.logProgress { + //calculating block speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currProgress-startBlock) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("insert blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + } + + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (stg *StageStateSync) insertChain(gbm *blockDownloadManager, + protocol syncProtocol, + lbls prometheus.Labels, + targetBN uint64) { + +} + +func (stg *StageStateSync) saveProgress(s *StageState, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, stg.configs.bc.CurrentBlock().NumberU64()); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block States stage failed") + return ErrSaveStateProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStateSync) Revert(ctx context.Context, firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStateSync) CleanUp(ctx context.Context, firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/stages.go b/api/service/stagedstreamsync/stages.go index 6a21fe707..cb6efa0cd 100644 --- a/api/service/stagedstreamsync/stages.go +++ b/api/service/stagedstreamsync/stages.go @@ -12,6 +12,7 @@ const ( ShortRange SyncStageID = "ShortRange" // short range SyncEpoch SyncStageID = "SyncEpoch" // epoch sync BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified + StateSync SyncStageID = "StateSync" // State sync States SyncStageID = "States" // will construct most recent state from downloaded blocks LastMile SyncStageID = "LastMile" // update blocks after sync and update last mile blocks as well Finish SyncStageID = "Finish" // Nominal stage after all other stages diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 9e8926468..ba5ab3a20 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -10,6 +10,7 @@ import ( "time" "github.com/harmony-one/harmony/consensus" + "github.com/ethereum/go-ethereum/common" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/internal/utils" sttypes "github.com/harmony-one/harmony/p2p/stream/types" @@ -84,9 +85,11 @@ func CreateStagedSync(ctx context.Context, stageHeadsCfg := NewStageHeadersCfg(bc, mainDB) stageShortRangeCfg := NewStageShortRangeCfg(bc, mainDB) stageSyncEpochCfg := NewStageEpochCfg(bc, mainDB) + stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, logger, config.LogProgress) lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) + stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, common.Hash{}, config.Concurrency, protocol, logger, config.LogProgress) stageFinishCfg := NewStageFinishCfg(mainDB) stages := DefaultStages(ctx, @@ -94,6 +97,7 @@ func CreateStagedSync(ctx context.Context, stageSyncEpochCfg, stageShortRangeCfg, stageBodiesCfg, + stageStateSyncCfg, stageStatesCfg, lastMileCfg, stageFinishCfg, From 9ec0272261aad303d53106e3679970a0e84b3363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 26 Jun 2023 15:43:21 +0800 Subject: [PATCH 02/56] add initial state download manager to stream sync --- .../state_download_manager.go | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 api/service/stagedstreamsync/state_download_manager.go diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go new file mode 100644 index 000000000..3ad5bd1e7 --- /dev/null +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -0,0 +1,127 @@ +package stagedstreamsync + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/rawdb" + "github.com/harmony-one/harmony/core/state" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/rs/zerolog" + "golang.org/x/crypto/sha3" +) + +// trieTask represents a single trie node download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type trieTask struct { + hash common.Hash + path [][]byte + attempts map[string]struct{} +} + +// codeTask represents a single byte code download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type codeTask struct { + attempts map[string]struct{} +} + +type task struct { + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash +} + +func newTask() *task { + return &task{ + trieTasks: make(map[string]*trieTask), + codeTasks: make(map[common.Hash]*codeTask), + } +} + +func (t *task) addCodeTask(h common.Hash, ct *codeTask) { + t.codeTasks[h] = &codeTask{ + attempts: ct.attempts, + } +} + +func (t *task) getCodeTask(h common.Hash) *codeTask { + return t.codeTasks[h] +} + +func (t *task) addNewCodeTask(h common.Hash) { + t.codeTasks[h] = &codeTask{ + attempts: make(map[string]struct{}), + } +} + +func (t *task) deleteCodeTask(hash common.Hash) { + delete(t.codeTasks, hash) +} + +func (t *task) addTrieTask(hash common.Hash, path string) { + t.trieTasks[path] = &trieTask{ + hash: hash, + path: trie.NewSyncPath([]byte(path)), + attempts: make(map[string]struct{}), + } +} + +func (t *task) setTrieTask(path string, tt *trieTask) { + t.trieTasks[path] = &trieTask{ + hash: tt.hash, + path: tt.path, + attempts: tt.attempts, + } +} + +func (t *task) getTrieTask(path string) *trieTask { + return t.trieTasks[path] +} + +func (t *task) deleteTrieTask(path string) { + delete(t.trieTasks, path) +} + +// StateDownloadManager is the helper structure for get blocks request management +type StateDownloadManager struct { + bc core.BlockChain + tx kv.RwTx + + protocol syncProtocol + root common.Hash // State root currently being synced + sched *trie.Sync // State trie sync scheduler defining the tasks + keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with + concurrency int + logger zerolog.Logger + lock sync.Mutex + + tasks *task + requesting *task + processing *task + retries *task +} + +func newStateDownloadManager(tx kv.RwTx, + bc core.BlockChain, + root common.Hash, + concurrency int, + logger zerolog.Logger) *StateDownloadManager { + + return &StateDownloadManager{ + bc: bc, + tx: tx, + root: root, + sched: state.NewStateSync(root, bc.ChainDb(), nil, rawdb.HashScheme), + keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), + concurrency: concurrency, + logger: logger, + tasks: newTask(), + requesting: newTask(), + processing: newTask(), + retries: newTask(), + } +} + From 2064cfd62ce753c601759d786634c2a8f6755873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 26 Jun 2023 15:56:36 +0800 Subject: [PATCH 03/56] add protocol to stage statesync --- .../stagedstreamsync/stage_statesync.go | 34 ++----------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 10cce8462..9391944b7 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -5,47 +5,22 @@ import ( "fmt" "time" - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/trie" "github.com/harmony-one/harmony/core" - "github.com/harmony-one/harmony/core/rawdb" - "github.com/harmony-one/harmony/core/state" "github.com/harmony-one/harmony/internal/utils" "github.com/ledgerwatch/erigon-lib/kv" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" - "golang.org/x/crypto/sha3" ) type StageStateSync struct { configs StageStateSyncCfg } -// trieTask represents a single trie node download task, containing a set of -// peers already attempted retrieval from to detect stalled syncs and abort. -type trieTask struct { - hash common.Hash - path [][]byte - attempts map[string]struct{} -} - -// codeTask represents a single byte code download task, containing a set of -// peers already attempted retrieval from to detect stalled syncs and abort. -type codeTask struct { - attempts map[string]struct{} -} - type StageStateSyncCfg struct { bc core.BlockChain - protocol syncProtocol db kv.RwDB - root common.Hash // State root currently being synced - sched *trie.Sync // State trie sync scheduler defining the tasks - keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with - trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path - codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash concurrency int + protocol syncProtocol logger zerolog.Logger logProgress bool } @@ -58,7 +33,6 @@ func NewStageStateSync(cfg StageStateSyncCfg) *StageStateSync { func NewStageStateSyncCfg(bc core.BlockChain, db kv.RwDB, - root common.Hash, concurrency int, protocol syncProtocol, logger zerolog.Logger, @@ -67,12 +41,8 @@ func NewStageStateSyncCfg(bc core.BlockChain, return StageStateSyncCfg{ bc: bc, db: db, - root: root, - sched: state.NewStateSync(root, bc.ChainDb(), nil, rawdb.HashScheme), - keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), - trieTasks: make(map[string]*trieTask), - codeTasks: make(map[common.Hash]*codeTask), concurrency: concurrency, + protocol: protocol, logger: logger, logProgress: logProgress, } From 702eb5e1fb54b90b7715ef6a66404d50d12889b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 26 Jun 2023 23:01:11 +0800 Subject: [PATCH 04/56] add task management logic to state download manager in stream sync --- api/service/stagedstreamsync/const.go | 8 + .../state_download_manager.go | 272 ++++++++++++++++-- 2 files changed, 251 insertions(+), 29 deletions(-) diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index 048b5d812..d7b6510a4 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -23,6 +23,14 @@ const ( // no more request will be assigned to workers to wait for InsertChain to finish. SoftQueueCap int = 100 + StatesPerRequest int = 10 // number of get nodes by hashes for each request + + // DefaultConcurrency is the default settings for concurrency + DefaultConcurrency int = 4 + + // MaxTriesToFetchNodeData is the maximum number of tries to fetch node data + MaxTriesToFetchNodeData int = 5 + // ShortRangeTimeout is the timeout for each short range sync, which allow short range sync // to restart automatically when stuck in `getBlockHashes` ShortRangeTimeout time.Duration = 1 * time.Minute diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index 3ad5bd1e7..295cc5023 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -1,6 +1,7 @@ package stagedstreamsync import ( + "fmt" "sync" "github.com/ethereum/go-ethereum/common" @@ -15,30 +16,10 @@ import ( "golang.org/x/crypto/sha3" ) -// trieTask represents a single trie node download task, containing a set of -// peers already attempted retrieval from to detect stalled syncs and abort. -type trieTask struct { - hash common.Hash - path [][]byte - attempts map[string]struct{} -} - // codeTask represents a single byte code download task, containing a set of // peers already attempted retrieval from to detect stalled syncs and abort. type codeTask struct { - attempts map[string]struct{} -} - -type task struct { - trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path - codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash -} - -func newTask() *task { - return &task{ - trieTasks: make(map[string]*trieTask), - codeTasks: make(map[common.Hash]*codeTask), - } + attempts map[sttypes.StreamID]int } func (t *task) addCodeTask(h common.Hash, ct *codeTask) { @@ -53,7 +34,7 @@ func (t *task) getCodeTask(h common.Hash) *codeTask { func (t *task) addNewCodeTask(h common.Hash) { t.codeTasks[h] = &codeTask{ - attempts: make(map[string]struct{}), + attempts: make(map[sttypes.StreamID]int), } } @@ -61,15 +42,15 @@ func (t *task) deleteCodeTask(hash common.Hash) { delete(t.codeTasks, hash) } -func (t *task) addTrieTask(hash common.Hash, path string) { - t.trieTasks[path] = &trieTask{ - hash: hash, - path: trie.NewSyncPath([]byte(path)), - attempts: make(map[string]struct{}), - } +// trieTask represents a single trie node download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type trieTask struct { + hash common.Hash + path [][]byte + attempts map[sttypes.StreamID]int } -func (t *task) setTrieTask(path string, tt *trieTask) { +func (t *task) addTrieTask(path string, tt *trieTask) { t.trieTasks[path] = &trieTask{ hash: tt.hash, path: tt.path, @@ -81,10 +62,30 @@ func (t *task) getTrieTask(path string) *trieTask { return t.trieTasks[path] } +func (t *task) addNewTrieTask(hash common.Hash, path string) { + t.trieTasks[path] = &trieTask{ + hash: hash, + path: trie.NewSyncPath([]byte(path)), + attempts: make(map[sttypes.StreamID]int), + } +} + func (t *task) deleteTrieTask(path string) { delete(t.trieTasks, path) } +type task struct { + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash +} + +func newTask() *task { + return &task{ + trieTasks: make(map[string]*trieTask), + codeTasks: make(map[common.Hash]*codeTask), + } +} + // StateDownloadManager is the helper structure for get blocks request management type StateDownloadManager struct { bc core.BlockChain @@ -125,3 +126,216 @@ func newStateDownloadManager(tx kv.RwTx, } } +// fillTasks fills the tasks to send to the remote peer. +func (s *StateDownloadManager) fillTasks(n int) error { + // Refill available tasks from the scheduler. + if fill := n - (len(s.tasks.trieTasks) + len(s.tasks.codeTasks)); fill > 0 { + paths, hashes, codes := s.sched.Missing(fill) + for i, path := range paths { + s.tasks.addNewTrieTask(hashes[i], path) + } + for _, hash := range codes { + s.tasks.addNewCodeTask(hash) + } + } + return nil +} + +// getNextBatch returns objects with a maximum of n state download +// tasks to send to the remote peer. +func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { + s.lock.Lock() + defer s.lock.Unlock() + + cap := StatesPerRequest + + nodes, paths, codes = s.getBatchFromRetries(cap) + nItems := len(nodes) + len(codes) + cap -= nItems + + if cap > 0 { + newNodes, newPaths, newCodes := s.getBatchFromUnprocessed(cap) + nodes = append(nodes, newNodes...) + paths = append(paths, newPaths...) + codes = append(codes, newCodes...) + } + + return nodes, paths, codes +} + +// getNextBatch returns objects with a maximum of n state download +// tasks to send to the remote peer. +func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { + // over trie nodes as those can be written to disk and forgotten about. + nodes = make([]common.Hash, 0, n) + paths = make([]trie.SyncPath, 0, n) + codes = make([]common.Hash, 0, n) + + for hash, t := range s.tasks.codeTasks { + // Stop when we've gathered enough requests + if len(nodes)+len(codes) == n { + break + } + codes = append(codes, hash) + s.requesting.addCodeTask(hash, t) + s.tasks.deleteCodeTask(hash) + } + for path, t := range s.tasks.trieTasks { + // Stop when we've gathered enough requests + if len(nodes)+len(codes) == n { + break + } + nodes = append(nodes, t.hash) + paths = append(paths, t.path) + s.requesting.addTrieTask(path, t) + s.tasks.deleteTrieTask(path) + } + return nodes, paths, codes +} + +// getBatchFromRetries get the block number batch to be requested from retries. +func (s *StateDownloadManager) getBatchFromRetries(n int) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { + // over trie nodes as those can be written to disk and forgotten about. + nodes = make([]common.Hash, 0, n) + paths = make([]trie.SyncPath, 0, n) + codes = make([]common.Hash, 0, n) + + for hash, t := range s.retries.codeTasks { + // Stop when we've gathered enough requests + if len(nodes)+len(codes) == n { + break + } + codes = append(codes, hash) + s.requesting.addCodeTask(hash, t) + s.retries.deleteCodeTask(hash) + } + for path, t := range s.retries.trieTasks { + // Stop when we've gathered enough requests + if len(nodes)+len(codes) == n { + break + } + nodes = append(nodes, t.hash) + paths = append(paths, t.path) + s.requesting.addTrieTask(path, t) + s.retries.deleteTrieTask(path) + } + return nodes, paths, codes +} + +// HandleRequestError handles the error result +func (s *StateDownloadManager) HandleRequestError(codeHashes []common.Hash, triePaths []string, streamID sttypes.StreamID, err error) { + s.lock.Lock() + defer s.lock.Unlock() + + // add requested code hashes to retries + for _, h := range codeHashes { + s.retries.codeTasks[h] = &codeTask{ + attempts: s.requesting.codeTasks[h].attempts, + } + delete(s.requesting.codeTasks, h) + } + + // add requested trie paths to retries + for _, p := range triePaths { + s.retries.trieTasks[p] = &trieTask{ + hash: s.requesting.trieTasks[p].hash, + path: s.requesting.trieTasks[p].path, + attempts: s.requesting.trieTasks[p].attempts, + } + delete(s.requesting.trieTasks, p) + } +} + +// HandleRequestResult handles get trie paths and code hashes result +func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, response [][]byte, loopID int, streamID sttypes.StreamID) error { + s.lock.Lock() + defer s.lock.Unlock() + + // Collect processing stats and update progress if valid data was received + duplicate, unexpected, successful, numUncommitted, bytesUncommitted := 0, 0, 0, 0, 0 + + for _, blob := range response { + hash, err := s.processNodeData(trieTasks, codeTasks, blob) + switch err { + case nil: + numUncommitted++ + bytesUncommitted += len(blob) + successful++ + case trie.ErrNotRequested: + unexpected++ + case trie.ErrAlreadyProcessed: + duplicate++ + default: + return fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) + } + } + + //TODO: remove successful tasks from requesting + + for path, task := range trieTasks { + // If the node did deliver something, missing items may be due to a protocol + // limit or a previous timeout + delayed delivery. Both cases should permit + // the node to retry the missing items (to avoid single-peer stalls). + if len(response) > 0 { //TODO: if timeout also do same + delete(task.attempts, streamID) + } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { + // If we've requested the node too many times already, it may be a malicious + // sync where nobody has the right data. Abort. + return fmt.Errorf("trie node %s failed with peer %s", task.hash.TerminalString(), task.attempts[streamID]) + } + // Missing item, place into the retry queue. + s.retries.addTrieTask(path, task) + } + + for hash, task := range codeTasks { + // If the node did deliver something, missing items may be due to a protocol + // limit or a previous timeout + delayed delivery. Both cases should permit + // the node to retry the missing items (to avoid single-peer stalls). + if len(response) > 0 { //TODO: if timeout also do same + delete(task.attempts, streamID) + } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { + // If we've requested the node too many times already, it may be a malicious + // sync where nobody has the right data. Abort. + return fmt.Errorf("byte code %s failed with peer %s", hash.TerminalString(), task.attempts[streamID]) + } + // Missing item, place into the retry queue. + s.retries.addCodeTask(hash, task) + } + + return nil +} + +// processNodeData tries to inject a trie node data blob delivered from a remote +// peer into the state trie, returning whether anything useful was written or any +// error occurred. +// +// If multiple requests correspond to the same hash, this method will inject the +// blob as a result for the first one only, leaving the remaining duplicates to +// be fetched again. +func (s *StateDownloadManager) processNodeData(nodeTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, responseData []byte) (common.Hash, error) { + var hash common.Hash + s.keccak.Reset() + s.keccak.Write(responseData) + s.keccak.Read(hash[:]) + + //TODO: remove from requesting + if _, present := codeTasks[hash]; present { + err := s.sched.ProcessCode(trie.CodeSyncResult{ + Hash: hash, + Data: responseData, + }) + delete(codeTasks, hash) + return hash, err + } + for path, task := range nodeTasks { + if task.hash == hash { + err := s.sched.ProcessNode(trie.NodeSyncResult{ + Path: path, + Data: responseData, + }) + delete(nodeTasks, path) + return hash, err + } + } + return common.Hash{}, trie.ErrNotRequested +} From 4629fda90bc7d9789b1c1465adcf088391cd1cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 26 Jun 2023 23:18:05 +0800 Subject: [PATCH 05/56] fix statesync config --- api/service/stagedstreamsync/syncing.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index ba5ab3a20..6e0d01a4b 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -10,7 +10,6 @@ import ( "time" "github.com/harmony-one/harmony/consensus" - "github.com/ethereum/go-ethereum/common" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/internal/utils" sttypes "github.com/harmony-one/harmony/p2p/stream/types" @@ -85,11 +84,11 @@ func CreateStagedSync(ctx context.Context, stageHeadsCfg := NewStageHeadersCfg(bc, mainDB) stageShortRangeCfg := NewStageShortRangeCfg(bc, mainDB) stageSyncEpochCfg := NewStageEpochCfg(bc, mainDB) - + stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, logger, config.LogProgress) + stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, config.Concurrency, protocol, logger, config.LogProgress) lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) - stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, common.Hash{}, config.Concurrency, protocol, logger, config.LogProgress) stageFinishCfg := NewStageFinishCfg(mainDB) stages := DefaultStages(ctx, From 9e1249a836df6de6279ff7f785a9fd67ffdcf32c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 28 Jun 2023 15:42:48 +0800 Subject: [PATCH 06/56] refactor state download manager --- .../state_download_manager.go | 48 ++++++++++--------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index 295cc5023..8c136aebd 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -143,7 +143,7 @@ func (s *StateDownloadManager) fillTasks(n int) error { // getNextBatch returns objects with a maximum of n state download // tasks to send to the remote peer. -func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { +func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []string, codes []common.Hash) { s.lock.Lock() defer s.lock.Unlock() @@ -165,10 +165,10 @@ func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []trie // getNextBatch returns objects with a maximum of n state download // tasks to send to the remote peer. -func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { +func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Hash, paths []string, codes []common.Hash) { // over trie nodes as those can be written to disk and forgotten about. nodes = make([]common.Hash, 0, n) - paths = make([]trie.SyncPath, 0, n) + paths = make([]string, 0, n) codes = make([]common.Hash, 0, n) for hash, t := range s.tasks.codeTasks { @@ -186,7 +186,7 @@ func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Ha break } nodes = append(nodes, t.hash) - paths = append(paths, t.path) + paths = append(paths, path) s.requesting.addTrieTask(path, t) s.tasks.deleteTrieTask(path) } @@ -194,10 +194,10 @@ func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Ha } // getBatchFromRetries get the block number batch to be requested from retries. -func (s *StateDownloadManager) getBatchFromRetries(n int) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { +func (s *StateDownloadManager) getBatchFromRetries(n int) (nodes []common.Hash, paths []string, codes []common.Hash) { // over trie nodes as those can be written to disk and forgotten about. nodes = make([]common.Hash, 0, n) - paths = make([]trie.SyncPath, 0, n) + paths = make([]string, 0, n) codes = make([]common.Hash, 0, n) for hash, t := range s.retries.codeTasks { @@ -215,7 +215,7 @@ func (s *StateDownloadManager) getBatchFromRetries(n int) (nodes []common.Hash, break } nodes = append(nodes, t.hash) - paths = append(paths, t.path) + paths = append(paths, path) s.requesting.addTrieTask(path, t) s.retries.deleteTrieTask(path) } @@ -236,18 +236,18 @@ func (s *StateDownloadManager) HandleRequestError(codeHashes []common.Hash, trie } // add requested trie paths to retries - for _, p := range triePaths { - s.retries.trieTasks[p] = &trieTask{ - hash: s.requesting.trieTasks[p].hash, - path: s.requesting.trieTasks[p].path, - attempts: s.requesting.trieTasks[p].attempts, + for _, path := range triePaths { + s.retries.trieTasks[path] = &trieTask{ + hash: s.requesting.trieTasks[path].hash, + path: s.requesting.trieTasks[path].path, + attempts: s.requesting.trieTasks[path].attempts, } - delete(s.requesting.trieTasks, p) + delete(s.requesting.trieTasks, path) } } // HandleRequestResult handles get trie paths and code hashes result -func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, response [][]byte, loopID int, streamID sttypes.StreamID) error { +func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, triePaths []string, response [][]byte, loopID int, streamID sttypes.StreamID) error { s.lock.Lock() defer s.lock.Unlock() @@ -255,7 +255,7 @@ func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTas duplicate, unexpected, successful, numUncommitted, bytesUncommitted := 0, 0, 0, 0, 0 for _, blob := range response { - hash, err := s.processNodeData(trieTasks, codeTasks, blob) + hash, err := s.processNodeData(codeHashes, triePaths, blob) switch err { case nil: numUncommitted++ @@ -271,8 +271,7 @@ func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTas } //TODO: remove successful tasks from requesting - - for path, task := range trieTasks { + for path, task := range s.requesting.trieTasks { // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). @@ -285,9 +284,10 @@ func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTas } // Missing item, place into the retry queue. s.retries.addTrieTask(path, task) + s.requesting.deleteTrieTask(path) } - for hash, task := range codeTasks { + for hash, task := range s.requesting.codeTasks { // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). @@ -300,6 +300,7 @@ func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTas } // Missing item, place into the retry queue. s.retries.addCodeTask(hash, task) + s.requesting.deleteCodeTask(hash) } return nil @@ -312,28 +313,29 @@ func (s *StateDownloadManager) HandleRequestResult(trieTasks map[string]*trieTas // If multiple requests correspond to the same hash, this method will inject the // blob as a result for the first one only, leaving the remaining duplicates to // be fetched again. -func (s *StateDownloadManager) processNodeData(nodeTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, responseData []byte) (common.Hash, error) { +func (s *StateDownloadManager) processNodeData(codeHashes []common.Hash, triePaths []string, responseData []byte) (common.Hash, error) { var hash common.Hash s.keccak.Reset() s.keccak.Write(responseData) s.keccak.Read(hash[:]) //TODO: remove from requesting - if _, present := codeTasks[hash]; present { + if _, present := s.requesting.codeTasks[hash]; present { err := s.sched.ProcessCode(trie.CodeSyncResult{ Hash: hash, Data: responseData, }) - delete(codeTasks, hash) + s.requesting.deleteCodeTask(hash) return hash, err } - for path, task := range nodeTasks { + for _, path := range triePaths { + task := s.requesting.getTrieTask(path) if task.hash == hash { err := s.sched.ProcessNode(trie.NodeSyncResult{ Path: path, Data: responseData, }) - delete(nodeTasks, path) + s.requesting.deleteTrieTask(path) return hash, err } } From 841073da60cf260162eb00ce49ec4b8976c7333e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 28 Jun 2023 16:46:53 +0800 Subject: [PATCH 07/56] refactor stage state sync --- .../stagedstreamsync/stage_statesync.go | 123 +++++++++++++++--- 1 file changed, 105 insertions(+), 18 deletions(-) diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 9391944b7..75326b6ac 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -3,11 +3,15 @@ package stagedstreamsync import ( "context" "fmt" + "sync" "time" + "github.com/ethereum/go-ethereum/common" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" ) @@ -49,7 +53,7 @@ func NewStageStateSyncCfg(bc core.BlockChain, } // Exec progresses States stage in the forward direction -func (stg *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { +func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { // for short range sync, skip this step if !s.state.initSync { @@ -57,19 +61,29 @@ func (stg *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo } maxHeight := s.state.status.targetBN - currentHead := stg.configs.bc.CurrentBlock().NumberU64() + currentHead := sss.configs.bc.CurrentBlock().NumberU64() if currentHead >= maxHeight { return nil } - currProgress := stg.configs.bc.CurrentBlock().NumberU64() + currProgress := sss.configs.bc.CurrentBlock().NumberU64() targetHeight := s.state.currentCycle.TargetHeight + + if errV := CreateView(ctx, sss.configs.db, tx, func(etx kv.Tx) error { + if currProgress, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + if currProgress >= targetHeight { return nil } useInternalTx := tx == nil if useInternalTx { var err error - tx, err = stg.configs.db.BeginRw(ctx) + tx, err = sss.configs.db.BeginRw(ctx) if err != nil { return err } @@ -78,34 +92,107 @@ func (stg *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo // isLastCycle := targetHeight >= maxHeight startTime := time.Now() - startBlock := currProgress - if stg.configs.logProgress { + if sss.configs.logProgress { fmt.Print("\033[s") // save the cursor position } - for i := currProgress + 1; i <= targetHeight; i++ { - // log the stage progress in console - if stg.configs.logProgress { - //calculating block speed + // Fetch blocks from neighbors + root := sss.configs.bc.CurrentBlock().Root() + sdm := newStateDownloadManager(tx, sss.configs.bc, root, sss.configs.concurrency, s.state.logger) + + // Setup workers to fetch blocks from remote node + var wg sync.WaitGroup + + for i := 0; i != s.state.config.Concurrency; i++ { + wg.Add(1) + go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime) + } + + wg.Wait() + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// runStateWorkerLoop creates a work loop for download states +func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { + defer wg.Done() + + for { + select { + case <-ctx.Done(): + return + default: + } + nodes, paths, codes := sdm.GetNextBatch() + if len(nodes)+len(codes) == 0 { + select { + case <-ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + return + } + } + + data, stid, err := sss.downloadStates(ctx, nodes, codes) + if err != nil { + if !errors.Is(err, context.Canceled) { + sss.configs.protocol.StreamFailed(stid, "downloadStates failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("downloadStates failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(codes, paths, stid, err) + } else if data == nil || len(data) == 0 { + utils.Logger().Warn(). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("downloadStates failed, received empty data bytes")) + err := errors.New("downloadStates received empty data bytes") + sdm.HandleRequestError(codes, paths, stid, err) + } + sdm.HandleRequestResult(nodes, paths, data, loopID, stid) + if sss.configs.logProgress { + //calculating block download speed dt := time.Now().Sub(startTime).Seconds() speed := float64(0) if dt > 0 { - speed = float64(currProgress-startBlock) / dt + speed = float64(len(data)) / dt } - blockSpeed := fmt.Sprintf("%.2f", speed) + stateDownloadSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line - fmt.Println("insert blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + fmt.Println("state download speed:", stateDownloadSpeed, "states/s") } - } +} - if useInternalTx { - if err := tx.Commit(); err != nil { - return err - } +func (sss *StageStateSync) downloadStates(ctx context.Context, nodes []common.Hash, codes []common.Hash) ([][]byte, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + hashes := append(codes, nodes...) + data, stid, err := sss.configs.protocol.GetNodeData(ctx, hashes) + if err != nil { + return nil, stid, err + } + if err := validateGetNodeDataResult(hashes, data); err != nil { + return nil, stid, err } + return data, stid, nil +} +func validateGetNodeDataResult(requested []common.Hash, result [][]byte) error { + if len(result) != len(requested) { + return fmt.Errorf("unexpected number of nodes delivered: %v / %v", len(result), len(requested)) + } return nil } From 975857fc85ec28e445141dca56238a38e9301acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 29 Jun 2023 11:47:11 +0800 Subject: [PATCH 08/56] fix state download manager tasks issue --- .../state_download_manager.go | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index 8c136aebd..f06ec9cb6 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -163,7 +163,7 @@ func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []stri return nodes, paths, codes } -// getNextBatch returns objects with a maximum of n state download +// getBatchFromUnprocessed returns objects with a maximum of n unprocessed state download // tasks to send to the remote peer. func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Hash, paths []string, codes []common.Hash) { // over trie nodes as those can be written to disk and forgotten about. @@ -229,19 +229,13 @@ func (s *StateDownloadManager) HandleRequestError(codeHashes []common.Hash, trie // add requested code hashes to retries for _, h := range codeHashes { - s.retries.codeTasks[h] = &codeTask{ - attempts: s.requesting.codeTasks[h].attempts, - } + s.retries.addCodeTask(h,s.requesting.codeTasks[h]) delete(s.requesting.codeTasks, h) } // add requested trie paths to retries for _, path := range triePaths { - s.retries.trieTasks[path] = &trieTask{ - hash: s.requesting.trieTasks[path].hash, - path: s.requesting.trieTasks[path].path, - attempts: s.requesting.trieTasks[path].attempts, - } + s.retries.addTrieTask(path,s.requesting.trieTasks[path]) delete(s.requesting.trieTasks, path) } } @@ -270,13 +264,13 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri } } - //TODO: remove successful tasks from requesting - for path, task := range s.requesting.trieTasks { + for _, path := range triePaths { + task := s.requesting.getTrieTask(path) // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). if len(response) > 0 { //TODO: if timeout also do same - delete(task.attempts, streamID) + delete(s.requesting.trieTasks[path].attempts, streamID) } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. @@ -287,12 +281,13 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri s.requesting.deleteTrieTask(path) } - for hash, task := range s.requesting.codeTasks { + for _, hash := range codeHashes { + task:= s.requesting.getCodeTask(hash) // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). if len(response) > 0 { //TODO: if timeout also do same - delete(task.attempts, streamID) + delete(s.requesting.codeTasks[hash].attempts, streamID) //TODO: do we need delete attempts??? } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. From 0da96b942e2a1ff3a019c506bd24c18f6fa47f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 29 Jun 2023 11:57:50 +0800 Subject: [PATCH 09/56] add receipt download manager --- .../receipt_download_manager.go | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 api/service/stagedstreamsync/receipt_download_manager.go diff --git a/api/service/stagedstreamsync/receipt_download_manager.go b/api/service/stagedstreamsync/receipt_download_manager.go new file mode 100644 index 000000000..7b486a9aa --- /dev/null +++ b/api/service/stagedstreamsync/receipt_download_manager.go @@ -0,0 +1,172 @@ +package stagedstreamsync + +import ( + "sync" + + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/rs/zerolog" +) + +type ReceiptDownloadDetails struct { + loopID int + streamID sttypes.StreamID +} + +// receiptDownloadManager is the helper structure for get receipts request management +type receiptDownloadManager struct { + chain blockChain + tx kv.RwTx + + targetBN uint64 + requesting map[uint64]struct{} // receipt numbers that have been assigned to workers but not received + processing map[uint64]struct{} // receipt numbers received requests but not inserted + retries *prioritizedNumbers // requests where error happens + rq *resultQueue // result queue wait to be inserted into blockchain + rdd map[uint64]ReceiptDownloadDetails // details about how this receipt was downloaded + + logger zerolog.Logger + lock sync.Mutex +} + +func newReceiptDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, logger zerolog.Logger) *receiptDownloadManager { + return &receiptDownloadManager{ + chain: chain, + tx: tx, + targetBN: targetBN, + requesting: make(map[uint64]struct{}), + processing: make(map[uint64]struct{}), + retries: newPrioritizedNumbers(), + rq: newResultQueue(), + rdd: make(map[uint64]ReceiptDownloadDetails), + logger: logger, + } +} + +// GetNextBatch get the next receipt numbers batch +func (rdm *receiptDownloadManager) GetNextBatch() []uint64 { + rdm.lock.Lock() + defer rdm.lock.Unlock() + + cap := ReceiptsPerRequest + + bns := rdm.getBatchFromRetries(cap) + if len(bns) > 0 { + cap -= len(bns) + rdm.addBatchToRequesting(bns) + } + + if rdm.availableForMoreTasks() { + addBNs := rdm.getBatchFromUnprocessed(cap) + rdm.addBatchToRequesting(addBNs) + bns = append(bns, addBNs...) + } + + return bns +} + +// HandleRequestError handles the error result +func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, streamID sttypes.StreamID) { + rdm.lock.Lock() + defer rdm.lock.Unlock() + + // add requested receipt numbers to retries + for _, bn := range bns { + delete(rdm.requesting, bn) + rdm.retries.push(bn) + } +} + +// HandleRequestResult handles get blocks result +func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error { + rdm.lock.Lock() + defer rdm.lock.Unlock() + + for i, bn := range bns { + delete(rdm.requesting, bn) + if indexExists(blockBytes, i) && len(blockBytes[i]) <= 1 { + rdm.retries.push(bn) + } else { + rdm.processing[bn] = struct{}{} + rdm.rdd[bn] = ReceiptDownloadDetails{ + loopID: loopID, + streamID: streamID, + } + } + } + return nil +} + +// SetDownloadDetails sets the download details for a batch of blocks +func (rdm *receiptDownloadManager) SetDownloadDetails(bns []uint64, loopID int, streamID sttypes.StreamID) error { + rdm.lock.Lock() + defer rdm.lock.Unlock() + + for _, bn := range bns { + rdm.rdd[bn] = ReceiptDownloadDetails{ + loopID: loopID, + streamID: streamID, + } + } + return nil +} + +// GetDownloadDetails returns the download details for a receipt +func (rdm *receiptDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) { + rdm.lock.Lock() + defer rdm.lock.Unlock() + + return rdm.rdd[blockNumber].loopID, rdm.rdd[blockNumber].streamID +} + +// getBatchFromRetries get the receipt number batch to be requested from retries. +func (rdm *receiptDownloadManager) getBatchFromRetries(cap int) []uint64 { + var ( + requestBNs []uint64 + curHeight = rdm.chain.CurrentBlock().NumberU64() + ) + for cnt := 0; cnt < cap; cnt++ { + bn := rdm.retries.pop() + if bn == 0 { + break // no more retries + } + if bn <= curHeight { + continue + } + requestBNs = append(requestBNs, bn) + } + return requestBNs +} + +// getBatchFromUnprocessed returns a batch of receipt numbers to be requested from unprocessed. +func (rdm *receiptDownloadManager) getBatchFromUnprocessed(cap int) []uint64 { + var ( + requestBNs []uint64 + curHeight = rdm.chain.CurrentBlock().NumberU64() + ) + bn := curHeight + 1 + // TODO: this algorithm can be potentially optimized. + for cnt := 0; cnt < cap && bn <= rdm.targetBN; cnt++ { + for bn <= rdm.targetBN { + _, ok1 := rdm.requesting[bn] + _, ok2 := rdm.processing[bn] + if !ok1 && !ok2 { + requestBNs = append(requestBNs, bn) + bn++ + break + } + bn++ + } + } + return requestBNs +} + +func (rdm *receiptDownloadManager) availableForMoreTasks() bool { + return rdm.rq.results.Len() < SoftQueueCap +} + +func (rdm *receiptDownloadManager) addBatchToRequesting(bns []uint64) { + for _, bn := range bns { + rdm.requesting[bn] = struct{}{} + } +} From 12d930fd97d942f87820cd0352986ed130a20e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:09:13 +0800 Subject: [PATCH 10/56] fix receipt download manager result queue --- api/service/stagedstreamsync/const.go | 6 +++++- .../stagedstreamsync/receipt_download_manager.go | 12 +++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index d7b6510a4..f82bff572 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -23,7 +23,11 @@ const ( // no more request will be assigned to workers to wait for InsertChain to finish. SoftQueueCap int = 100 - StatesPerRequest int = 10 // number of get nodes by hashes for each request + // number of get nodes by hashes for each request + StatesPerRequest int = 100 + + // maximum number of blocks for get receipts request + ReceiptsPerRequest int = 10 // DefaultConcurrency is the default settings for concurrency DefaultConcurrency int = 4 diff --git a/api/service/stagedstreamsync/receipt_download_manager.go b/api/service/stagedstreamsync/receipt_download_manager.go index 7b486a9aa..ffe20d0be 100644 --- a/api/service/stagedstreamsync/receipt_download_manager.go +++ b/api/service/stagedstreamsync/receipt_download_manager.go @@ -22,7 +22,6 @@ type receiptDownloadManager struct { requesting map[uint64]struct{} // receipt numbers that have been assigned to workers but not received processing map[uint64]struct{} // receipt numbers received requests but not inserted retries *prioritizedNumbers // requests where error happens - rq *resultQueue // result queue wait to be inserted into blockchain rdd map[uint64]ReceiptDownloadDetails // details about how this receipt was downloaded logger zerolog.Logger @@ -37,7 +36,6 @@ func newReceiptDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, lo requesting: make(map[uint64]struct{}), processing: make(map[uint64]struct{}), retries: newPrioritizedNumbers(), - rq: newResultQueue(), rdd: make(map[uint64]ReceiptDownloadDetails), logger: logger, } @@ -77,14 +75,14 @@ func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, s } } -// HandleRequestResult handles get blocks result -func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error { +// HandleRequestResult handles get receipts result +func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receiptBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error { rdm.lock.Lock() defer rdm.lock.Unlock() for i, bn := range bns { delete(rdm.requesting, bn) - if indexExists(blockBytes, i) && len(blockBytes[i]) <= 1 { + if indexExists(receiptBytes, i) && len(receiptBytes[i]) <= 1 { rdm.retries.push(bn) } else { rdm.processing[bn] = struct{}{} @@ -111,7 +109,7 @@ func (rdm *receiptDownloadManager) SetDownloadDetails(bns []uint64, loopID int, return nil } -// GetDownloadDetails returns the download details for a receipt +// GetDownloadDetails returns the download details for a certain block number func (rdm *receiptDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) { rdm.lock.Lock() defer rdm.lock.Unlock() @@ -162,7 +160,7 @@ func (rdm *receiptDownloadManager) getBatchFromUnprocessed(cap int) []uint64 { } func (rdm *receiptDownloadManager) availableForMoreTasks() bool { - return rdm.rq.results.Len() < SoftQueueCap + return len(rdm.requesting) < SoftQueueCap } func (rdm *receiptDownloadManager) addBatchToRequesting(bns []uint64) { From 6f3aa67b88d4e026fc1117b7c2db88b0fc81a1ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 29 Jun 2023 18:43:05 +0800 Subject: [PATCH 11/56] refactor stage receipts and change the stages sorting --- .../stagedstreamsync/default_stages.go | 25 +- .../receipt_download_manager.go | 5 +- .../stagedstreamsync/stage_receipts.go | 327 ++++++++++++++++++ api/service/stagedstreamsync/stages.go | 3 +- 4 files changed, 349 insertions(+), 11 deletions(-) create mode 100644 api/service/stagedstreamsync/stage_receipts.go diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go index 4a1e719f2..e1bb8578d 100644 --- a/api/service/stagedstreamsync/default_stages.go +++ b/api/service/stagedstreamsync/default_stages.go @@ -13,9 +13,9 @@ var DefaultForwardOrder = ForwardOrder{ SyncEpoch, ShortRange, BlockBodies, - StateSync, - // Stages below don't use Internet States, + StateSync, + Receipts, LastMile, Finish, } @@ -23,8 +23,9 @@ var DefaultForwardOrder = ForwardOrder{ var DefaultRevertOrder = RevertOrder{ Finish, LastMile, - States, + Receipts, StateSync, + States, BlockBodies, ShortRange, SyncEpoch, @@ -34,8 +35,9 @@ var DefaultRevertOrder = RevertOrder{ var DefaultCleanUpOrder = CleanUpOrder{ Finish, LastMile, - States, + Receipts, StateSync, + States, BlockBodies, ShortRange, SyncEpoch, @@ -49,6 +51,7 @@ func DefaultStages(ctx context.Context, bodiesCfg StageBodiesCfg, stateSyncCfg StageStateSyncCfg, statesCfg StageStatesCfg, + receiptsCfg StageReceiptsCfg, lastMileCfg StageLastMileCfg, finishCfg StageFinishCfg, ) []*Stage { @@ -57,8 +60,9 @@ func DefaultStages(ctx context.Context, handlerStageShortRange := NewStageShortRange(srCfg) handlerStageEpochSync := NewStageEpoch(seCfg) handlerStageBodies := NewStageBodies(bodiesCfg) - handlerStageStateSync := NewStageStateSync(stateSyncCfg) handlerStageStates := NewStageStates(statesCfg) + handlerStageStateSync := NewStageStateSync(stateSyncCfg) + handlerStageReceipts := NewStageReceipts(receiptsCfg) handlerStageLastMile := NewStageLastMile(lastMileCfg) handlerStageFinish := NewStageFinish(finishCfg) @@ -83,15 +87,20 @@ func DefaultStages(ctx context.Context, Description: "Retrieve Block Bodies", Handler: handlerStageBodies, }, + { + ID: States, + Description: "Update Blockchain State", + Handler: handlerStageStates, + }, { ID: StateSync, Description: "Retrieve States", Handler: handlerStageStateSync, }, { - ID: States, - Description: "Update Blockchain State", - Handler: handlerStageStates, + ID: Receipts, + Description: "Retrieve Receipts", + Handler: handlerStageReceipts, }, { ID: LastMile, diff --git a/api/service/stagedstreamsync/receipt_download_manager.go b/api/service/stagedstreamsync/receipt_download_manager.go index ffe20d0be..2eaa3ca45 100644 --- a/api/service/stagedstreamsync/receipt_download_manager.go +++ b/api/service/stagedstreamsync/receipt_download_manager.go @@ -3,6 +3,7 @@ package stagedstreamsync import ( "sync" + "github.com/harmony-one/harmony/core/types" sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" "github.com/rs/zerolog" @@ -76,13 +77,13 @@ func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, s } // HandleRequestResult handles get receipts result -func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receiptBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error { +func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receipts []*types.Receipt, loopID int, streamID sttypes.StreamID) error { rdm.lock.Lock() defer rdm.lock.Unlock() for i, bn := range bns { delete(rdm.requesting, bn) - if indexExists(receiptBytes, i) && len(receiptBytes[i]) <= 1 { + if indexExists(receipts, i) { rdm.retries.push(bn) } else { rdm.processing[bn] = struct{}{} diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go new file mode 100644 index 000000000..a9bffa30f --- /dev/null +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -0,0 +1,327 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" +) + +type StageReceipts struct { + configs StageReceiptsCfg +} + +type StageReceiptsCfg struct { + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + protocol syncProtocol + isBeacon bool + logProgress bool +} + +func NewStageReceipts(cfg StageReceiptsCfg) *StageReceipts { + return &StageReceipts{ + configs: cfg, + } +} + +func NewStageReceiptsCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, logProgress bool) StageReceiptsCfg { + return StageReceiptsCfg{ + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + protocol: protocol, + isBeacon: isBeacon, + logProgress: logProgress, + } +} + +// Exec progresses Bodies stage in the forward direction +func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + + if invalidBlockRevert { + return nil + } + + // for short range sync, skip this stage + if !s.state.initSync { + return nil + } + + maxHeight := s.state.status.targetBN + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxHeight { + return nil + } + currProgress := uint64(0) + targetHeight := s.state.currentCycle.TargetHeight + + if errV := CreateView(ctx, b.configs.db, tx, func(etx kv.Tx) error { + if currProgress, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + + if currProgress == 0 { + currProgress = currentHead + } + + if currProgress >= targetHeight { + return nil + } + + // size := uint64(0) + startTime := time.Now() + // startBlock := currProgress + if b.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // Fetch blocks from neighbors + s.state.rdm = newReceiptDownloadManager(tx, b.configs.bc, targetHeight, s.state.logger) + + // Setup workers to fetch blocks from remote node + var wg sync.WaitGroup + + for i := 0; i != s.state.config.Concurrency; i++ { + wg.Add(1) + go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, startTime) + } + + wg.Wait() + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// runReceiptWorkerLoop creates a work loop for download receipts +func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { + + currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) + + defer wg.Done() + + for { + select { + case <-ctx.Done(): + return + default: + } + batch := rdm.GetNextBatch() + if len(batch) == 0 { + select { + case <-ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + return + } + } + var hashes []common.Hash + for _, bn := range batch { + header := b.configs.bc.GetHeaderByNumber(bn) + hashes = append(hashes, header.ReceiptHash()) + } + receipts, stid, err := b.downloadReceipts(ctx, hashes) + if err != nil { + if !errors.Is(err, context.Canceled) { + b.configs.protocol.StreamFailed(stid, "downloadRawBlocks failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Interface("block numbers", batch). + Msg(WrapStagedSyncMsg("downloadRawBlocks failed")) + err = errors.Wrap(err, "request error") + rdm.HandleRequestError(batch, err, stid) + } else if receipts == nil || len(receipts) == 0 { + utils.Logger().Warn(). + Str("stream", string(stid)). + Interface("block numbers", batch). + Msg(WrapStagedSyncMsg("downloadRawBlocks failed, received empty reciptBytes")) + err := errors.New("downloadRawBlocks received empty reciptBytes") + rdm.HandleRequestError(batch, err, stid) + } else { + rdm.HandleRequestResult(batch, receipts, loopID, stid) + if b.configs.logProgress { + //calculating block download speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(len(rdm.rdd)) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("downloaded blocks:", currentBlock+len(rdm.rdd), "/", int(rdm.targetBN), "(", blockSpeed, "blocks/s", ")") + } + } + } +} + +func (b *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) ([]*types.Receipt, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + receipts, stid, err := b.configs.protocol.GetReceipts(ctx, hs) + if err != nil { + return nil, stid, err + } + if err := validateGetReceiptsResult(hs, receipts); err != nil { + return nil, stid, err + } + return receipts, stid, nil +} + +func (b *StageReceipts) downloadRawBlocks(ctx context.Context, bns []uint64) ([][]byte, [][]byte, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + return b.configs.protocol.GetRawBlocksByNumber(ctx, bns) +} + +func validateGetReceiptsResult(requested []common.Hash, result []*types.Receipt) error { + // TODO: validate each receipt here + + return nil +} + +func (b *StageReceipts) saveProgress(ctx context.Context, s *StageState, progress uint64, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, progress); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage failed") + return ErrSavingBodiesProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageReceipts) cleanBlocksDB(ctx context.Context, loopID int) (err error) { + tx, errb := b.configs.blockDBs[loopID].BeginRw(ctx) + if errb != nil { + return errb + } + defer tx.Rollback() + + // clean block bodies db + if err = tx.ClearBucket(BlocksBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_STREAM_SYNC] clear blocks bucket after revert failed") + return err + } + // clean block signatures db + if err = tx.ClearBucket(BlockSignaturesBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_STREAM_SYNC] clear block signatures bucket after revert failed") + return err + } + + if err = tx.Commit(); err != nil { + return err + } + + return nil +} + +func (b *StageReceipts) cleanAllBlockDBs(ctx context.Context) (err error) { + //clean all blocks DBs + for i := 0; i < b.configs.concurrency; i++ { + if err := b.cleanBlocksDB(ctx, i); err != nil { + return err + } + } + return nil +} + +func (b *StageReceipts) Revert(ctx context.Context, firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + + //clean all blocks DBs + if err := b.cleanAllBlockDBs(ctx); err != nil { + return err + } + + useInternalTx := tx == nil + if useInternalTx { + tx, err = b.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + // save progress + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if err = s.Update(tx, currentHead); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage after revert failed") + return err + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageReceipts) CleanUp(ctx context.Context, firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + //clean all blocks DBs + if err := b.cleanAllBlockDBs(ctx); err != nil { + return err + } + + return nil +} diff --git a/api/service/stagedstreamsync/stages.go b/api/service/stagedstreamsync/stages.go index cb6efa0cd..909bb25c0 100644 --- a/api/service/stagedstreamsync/stages.go +++ b/api/service/stagedstreamsync/stages.go @@ -12,8 +12,9 @@ const ( ShortRange SyncStageID = "ShortRange" // short range SyncEpoch SyncStageID = "SyncEpoch" // epoch sync BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified - StateSync SyncStageID = "StateSync" // State sync States SyncStageID = "States" // will construct most recent state from downloaded blocks + StateSync SyncStageID = "StateSync" // State sync + Receipts SyncStageID = "Receipts" // Receipts LastMile SyncStageID = "LastMile" // update blocks after sync and update last mile blocks as well Finish SyncStageID = "Finish" // Nominal stage after all other stages ) From e11b6ef1227c11b028acb4bf7227428c903465d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 29 Jun 2023 18:45:47 +0800 Subject: [PATCH 12/56] goimports staged stream sync --- api/service/stagedstreamsync/const.go | 2 +- .../stagedstreamsync/staged_stream_sync.go | 34 +++++++++---------- api/service/stagedstreamsync/stages.go | 2 +- .../state_download_manager.go | 6 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index f82bff572..a41d2e859 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -27,7 +27,7 @@ const ( StatesPerRequest int = 100 // maximum number of blocks for get receipts request - ReceiptsPerRequest int = 10 + ReceiptsPerRequest int = 10 // DefaultConcurrency is the default settings for concurrency DefaultConcurrency int = 4 diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 0a14d0cb3..fea59b02a 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -59,23 +59,23 @@ func (ib *InvalidBlock) addBadStream(bsID sttypes.StreamID) { } type StagedStreamSync struct { - bc core.BlockChain - consensus *consensus.Consensus - isBeacon bool - isExplorer bool - db kv.RwDB - protocol syncProtocol - isBeaconNode bool - gbm *blockDownloadManager // initialized when finished get block number - lastMileBlocks []*types.Block // last mile blocks to catch up with the consensus - lastMileMux sync.Mutex - inserted int - config Config - logger zerolog.Logger - status *status //TODO: merge this with currentSyncCycle - initSync bool // if sets to true, node start long range syncing - UseMemDB bool - + bc core.BlockChain + consensus *consensus.Consensus + isBeacon bool + isExplorer bool + db kv.RwDB + protocol syncProtocol + isBeaconNode bool + gbm *blockDownloadManager // initialized when finished get block number + rdm *receiptDownloadManager + lastMileBlocks []*types.Block // last mile blocks to catch up with the consensus + lastMileMux sync.Mutex + inserted int + config Config + logger zerolog.Logger + status *status //TODO: merge this with currentSyncCycle + initSync bool // if sets to true, node start long range syncing + UseMemDB bool revertPoint *uint64 // used to run stages prevRevertPoint *uint64 // used to get value from outside of staged sync after cycle (for example to notify RPCDaemon) invalidBlock InvalidBlock diff --git a/api/service/stagedstreamsync/stages.go b/api/service/stagedstreamsync/stages.go index 909bb25c0..6ad9e4519 100644 --- a/api/service/stagedstreamsync/stages.go +++ b/api/service/stagedstreamsync/stages.go @@ -13,7 +13,7 @@ const ( SyncEpoch SyncStageID = "SyncEpoch" // epoch sync BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified States SyncStageID = "States" // will construct most recent state from downloaded blocks - StateSync SyncStageID = "StateSync" // State sync + StateSync SyncStageID = "StateSync" // State sync Receipts SyncStageID = "Receipts" // Receipts LastMile SyncStageID = "LastMile" // update blocks after sync and update last mile blocks as well Finish SyncStageID = "Finish" // Nominal stage after all other stages diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index f06ec9cb6..1cd414757 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -229,13 +229,13 @@ func (s *StateDownloadManager) HandleRequestError(codeHashes []common.Hash, trie // add requested code hashes to retries for _, h := range codeHashes { - s.retries.addCodeTask(h,s.requesting.codeTasks[h]) + s.retries.addCodeTask(h, s.requesting.codeTasks[h]) delete(s.requesting.codeTasks, h) } // add requested trie paths to retries for _, path := range triePaths { - s.retries.addTrieTask(path,s.requesting.trieTasks[path]) + s.retries.addTrieTask(path, s.requesting.trieTasks[path]) delete(s.requesting.trieTasks, path) } } @@ -282,7 +282,7 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri } for _, hash := range codeHashes { - task:= s.requesting.getCodeTask(hash) + task := s.requesting.getCodeTask(hash) // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). From 91034682c79f4e92fc9aad434506952342d70eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 12:32:33 +0800 Subject: [PATCH 13/56] add block insertion without execution to blockchain implementation --- core/blockchain.go | 18 +++- core/blockchain_impl.go | 147 ++++++++++++++++++++++++++++++++- core/blockchain_stub.go | 4 +- core/epochchain.go | 2 +- hmy/downloader/adapter.go | 2 +- hmy/downloader/beaconhelper.go | 2 +- hmy/downloader/downloader.go | 8 +- hmy/downloader/longrange.go | 2 +- hmy/downloader/shortrange.go | 4 +- node/node_handler_test.go | 2 +- 10 files changed, 171 insertions(+), 20 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index 41f72a9a2..a286af117 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -100,6 +100,18 @@ type BlockChain interface { // Rollback is designed to remove a chain of links from the database that aren't // certain enough to be valid. Rollback(chain []common.Hash) error + // WriteBlockWithoutState writes only the block and its metadata to the database, + // but does not write any state. This is used to construct competing side forks + // up to the point where they exceed the canonical total difficulty. + WriteBlockWithoutState(block *types.Block) (err error) + // WriteBlockWithState writes the block and all associated state to the database. + WriteBlockWithState( + block *types.Block, receipts []*types.Receipt, + cxReceipts []*types.CXReceipt, + stakeMsgs []types2.StakeMsg, + paid reward.Reader, + state *state.DB, + ) (status WriteStatus, err error) // GetMaxGarbageCollectedBlockNumber .. GetMaxGarbageCollectedBlockNumber() int64 // InsertChain attempts to insert the given batch of blocks in to the canonical @@ -108,9 +120,9 @@ type BlockChain interface { // wrong. // // After insertion is done, all accumulated events will be fired. - InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) - // LeaderRotationMeta returns info about leader rotation. - LeaderRotationMeta() LeaderRotationMeta + InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) + // LeaderRotationMeta returns the number of continuous blocks by the leader. + LeaderRotationMeta() (publicKeyBytes []byte, epoch, count, shifts uint64, err error) // BadBlocks returns a list of the last 'bad blocks' that // the client has seen on the network. BadBlocks() []BadBlock diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index cc3031567..8d9de8dbc 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -1194,7 +1194,18 @@ func (bc *BlockChainImpl) Rollback(chain []common.Hash) error { var lastWrite uint64 -func (bc *BlockChainImpl) writeBlockWithState( +func (bc *BlockChainImpl) WriteBlockWithoutState(block *types.Block) (err error) { + bc.chainmu.Lock() + defer bc.chainmu.Unlock() + + if err := rawdb.WriteBlock(bc.db, block); err != nil { + return err + } + + return nil +} + +func (bc *BlockChainImpl) WriteBlockWithState( block *types.Block, receipts []*types.Receipt, cxReceipts []*types.CXReceipt, stakeMsgs []staking.StakeMsg, @@ -1348,7 +1359,7 @@ func (bc *BlockChainImpl) GetMaxGarbageCollectedBlockNumber() int64 { return bc.maxGarbCollectedBlkNum } -func (bc *BlockChainImpl) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { +func (bc *BlockChainImpl) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { // if in tikv mode, writer node need preempt master or come be a follower if bc.isInitTiKV() && !bc.tikvPreemptMaster(bc.rangeBlock(chain)) { return len(chain), nil @@ -1392,10 +1403,17 @@ func (bc *BlockChainImpl) LeaderRotationMeta() LeaderRotationMeta { return bc.leaderRotationMeta.Clone() } +func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, []interface{}, []*types.Log, error) { + if blockExecution { + return bc.insertChainWithBlockExecution(chain, verifyHeaders) + } + return bc.insertChainWithoutBlockExecution(chain, verifyHeaders) +} + // insertChain will execute the actual chain insertion and event aggregation. The // only reason this method exists as a separate one is to make locking cleaner // with deferred statements. -func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { +func (bc *BlockChainImpl) insertChainWithBlockExecution(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { // Sanity check that we have something meaningful to import if len(chain) == 0 { return 0, nil, nil, ErrEmptyChain @@ -1506,7 +1524,9 @@ func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (i // Prune in case non-empty winner chain if len(winner) > 0 { // Import all the pruned blocks to make the state available - _, evs, logs, err := bc.insertChain(winner, true /* verifyHeaders */) + bc.chainmu.Unlock() + _, evs, logs, err := bc.insertChainWithBlockExecution(winner, true /* verifyHeaders */) + bc.chainmu.Lock() events, coalescedLogs = evs, logs if err != nil { @@ -1639,6 +1659,125 @@ func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (i return 0, events, coalescedLogs, nil } +//receiptChain []types.Receipts, +func (bc *BlockChainImpl) insertChainWithoutBlockExecution(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { + // Sanity check that we have something meaningful to import + if len(chain) == 0 { + return 0, nil, nil, nil + } + // Do a sanity check that the provided chain is actually ordered and linked + for i := 1; i < len(chain); i++ { + if chain[i].NumberU64() != chain[i-1].NumberU64()+1 || chain[i].ParentHash() != chain[i-1].Hash() { + // Chain broke ancestry, log a message (programming error) and skip insertion + utils.Logger().Error(). + Str("number", chain[i].Number().String()). + Str("hash", chain[i].Hash().Hex()). + Str("parent", chain[i].ParentHash().Hex()). + Str("prevnumber", chain[i-1].Number().String()). + Str("prevhash", chain[i-1].Hash().Hex()). + Msg("insertChain: non contiguous block insert") + + return 0, nil, nil, fmt.Errorf("non contiguous insert: item %d is #%d [%x…], item %d is #%d [%x…] (parent [%x…])", i-1, chain[i-1].NumberU64(), + chain[i-1].Hash().Bytes()[:4], i, chain[i].NumberU64(), chain[i].Hash().Bytes()[:4], chain[i].ParentHash().Bytes()[:4]) + } + } + + bc.chainmu.Lock() + defer bc.chainmu.Unlock() + + var verifyHeadersResults <-chan error + + // If the block header chain has not been verified, conduct header verification here. + if verifyHeaders { + headers := make([]*block.Header, len(chain)) + seals := make([]bool, len(chain)) + + for i, block := range chain { + headers[i] = block.Header() + seals[i] = true + } + // Note that VerifyHeaders verifies headers in the chain in parallel + abort, results := bc.Engine().VerifyHeaders(bc, headers, seals) + verifyHeadersResults = results + defer close(abort) + } + + // Start a parallel signature recovery (signer will fluke on fork transition, minimal perf loss) + //senderCacher.recoverFromBlocks(types.MakeSigner(bc.chainConfig, chain[0].Number()), chain) + + // Iterate over the blocks and insert when the verifier permits + for i, block := range chain { + // If the chain is terminating, stop processing blocks + if atomic.LoadInt32(&bc.procInterrupt) == 1 { + utils.Logger().Debug().Msg("Premature abort during blocks processing") + break + } + + var err error + if verifyHeaders { + err = <-verifyHeadersResults + } + if err == nil { + err = bc.Validator().ValidateBody(block) + } + switch { + case err == ErrKnownBlock: + // Block and state both already known. However if the current block is below + // this number we did a rollback and we should reimport it nonetheless. + if bc.CurrentBlock().NumberU64() >= block.NumberU64() { + continue + } + + case err == consensus_engine.ErrFutureBlock: + // Allow up to MaxFuture second in the future blocks. If this limit is exceeded + // the chain is discarded and processed at a later time if given. + max := big.NewInt(time.Now().Unix() + maxTimeFutureBlocks) + if block.Time().Cmp(max) > 0 { + return i, nil, nil, fmt.Errorf("future block: %v > %v", block.Time(), max) + } + bc.futureBlocks.Add(block.Hash(), block) + continue + + case err == consensus_engine.ErrUnknownAncestor && bc.futureBlocks.Contains(block.ParentHash()): + bc.futureBlocks.Add(block.Hash(), block) + continue + + case err == consensus_engine.ErrPrunedAncestor: + var winner []*types.Block + parent := bc.GetBlock(block.ParentHash(), block.NumberU64()-1) + for parent != nil && !bc.HasState(parent.Root()) { + winner = append(winner, parent) + parent = bc.GetBlock(parent.ParentHash(), parent.NumberU64()-1) + } + for j := 0; j < len(winner)/2; j++ { + winner[j], winner[len(winner)-1-j] = winner[len(winner)-1-j], winner[j] + } + // Prune in case non-empty winner chain + if len(winner) > 0 { + // Import all the pruned blocks to make the state available + bc.chainmu.Unlock() + _, _, _, err := bc.insertChainWithoutBlockExecution(winner, true /* verifyHeaders */) + bc.chainmu.Lock() + if err != nil { + return i, nil, nil, err + } + } + + case err != nil: + bc.reportBlock(block, nil, err) + return i, nil, nil, err + } + + // Create a new statedb using the parent block and report an + // error if it fails. + if err = bc.WriteBlockWithoutState(block); err != nil { + return i, nil, nil, err + } + } + + return 0, nil, nil, nil +} + // insertStats tracks and reports on block insertion. type insertStats struct { queued, processed, ignored int diff --git a/core/blockchain_stub.go b/core/blockchain_stub.go index e9ef10ce9..32a0b1c19 100644 --- a/core/blockchain_stub.go +++ b/core/blockchain_stub.go @@ -120,7 +120,7 @@ func (a Stub) Rollback(chain []common.Hash) error { return errors.Errorf("method Rollback not implemented for %s", a.Name) } -func (a Stub) WriteBlockWithoutState(block *types.Block, td *big.Int) (err error) { +func (a Stub) WriteBlockWithoutState(block *types.Block) (err error) { return errors.Errorf("method WriteBlockWithoutState not implemented for %s", a.Name) } @@ -132,7 +132,7 @@ func (a Stub) GetMaxGarbageCollectedBlockNumber() int64 { return 0 } -func (a Stub) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { +func (a Stub) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { return 0, errors.Errorf("method InsertChain not implemented for %s", a.Name) } diff --git a/core/epochchain.go b/core/epochchain.go index 2dab28471..3df271b11 100644 --- a/core/epochchain.go +++ b/core/epochchain.go @@ -114,7 +114,7 @@ func (bc *EpochChain) Stop() { }) } -func (bc *EpochChain) InsertChain(blocks types.Blocks, _ bool) (int, error) { +func (bc *EpochChain) InsertChain(blocks types.Blocks, _ bool, _ bool) (int, error) { if len(blocks) == 0 { return 0, nil } diff --git a/hmy/downloader/adapter.go b/hmy/downloader/adapter.go index c8758b506..70e4ca325 100644 --- a/hmy/downloader/adapter.go +++ b/hmy/downloader/adapter.go @@ -27,6 +27,6 @@ type blockChain interface { engine.ChainReader Engine() engine.Engine - InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) + InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) WriteCommitSig(blockNum uint64, lastCommits []byte) error } diff --git a/hmy/downloader/beaconhelper.go b/hmy/downloader/beaconhelper.go index 96d06ebf8..2c7f05675 100644 --- a/hmy/downloader/beaconhelper.go +++ b/hmy/downloader/beaconhelper.go @@ -123,7 +123,7 @@ func (bh *beaconHelper) insertLastMileBlocks() (inserted int, bn uint64, err err } // TODO: Instruct the beacon helper to verify signatures. This may require some forks // in pub-sub message (add commit sigs in node.block.sync messages) - if _, err = bh.bc.InsertChain(types.Blocks{b}, true); err != nil { + if _, err = bh.bc.InsertChain(types.Blocks{b}, true, true); err != nil { bn-- return } diff --git a/hmy/downloader/downloader.go b/hmy/downloader/downloader.go index 01ec242ab..378b1e630 100644 --- a/hmy/downloader/downloader.go +++ b/hmy/downloader/downloader.go @@ -280,16 +280,16 @@ func (e *sigVerifyErr) Error() string { return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error()) } -func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) { +func verifyAndInsertBlocks(bc blockChain, blockExecution bool, blocks types.Blocks) (int, error) { for i, block := range blocks { - if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil { + if err := verifyAndInsertBlock(bc, block, blockExecution, blocks[i+1:]...); err != nil { return i, err } } return len(blocks), nil } -func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { +func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { var ( sigBytes bls.SerializedSignature bitmap []byte @@ -314,7 +314,7 @@ func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*type if err := bc.Engine().VerifyHeader(bc, block.Header(), true); err != nil { return errors.Wrap(err, "[VerifyHeader]") } - if _, err := bc.InsertChain(types.Blocks{block}, false); err != nil { + if _, err := bc.InsertChain(types.Blocks{block}, false, blockExecution); err != nil { return errors.Wrap(err, "[InsertChain]") } return nil diff --git a/hmy/downloader/longrange.go b/hmy/downloader/longrange.go index 4d4935b8f..fc4d4962f 100644 --- a/hmy/downloader/longrange.go +++ b/hmy/downloader/longrange.go @@ -210,7 +210,7 @@ func (lsi *lrSyncIter) processBlocks(results []*blockResult, targetBN uint64) { blocks := blockResultsToBlocks(results) for i, block := range blocks { - if err := verifyAndInsertBlock(lsi.bc, block); err != nil { + if err := verifyAndInsertBlock(lsi.bc, block, true); err != nil { lsi.logger.Warn().Err(err).Uint64("target block", targetBN). Uint64("block number", block.NumberU64()). Msg("insert blocks failed in long range") diff --git a/hmy/downloader/shortrange.go b/hmy/downloader/shortrange.go index 8276911d4..2a705f99a 100644 --- a/hmy/downloader/shortrange.go +++ b/hmy/downloader/shortrange.go @@ -74,7 +74,7 @@ func (d *Downloader) doShortRangeSync() (int, error) { } d.logger.Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result") - n, err := verifyAndInsertBlocks(d.bc, blocks) + n, err := verifyAndInsertBlocks(d.bc, true, blocks) numBlocksInsertedShortRangeHistogramVec.With(d.promLabels()).Observe(float64(n)) if err != nil { d.logger.Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed") @@ -131,7 +131,7 @@ func (d *Downloader) doShortRangeSyncForEpochSync() (int, error) { // short circuit for no sync is needed return 0, nil } - n, err := d.bc.InsertChain(blocks, true) + n, err := d.bc.InsertChain(blocks, true, true) numBlocksInsertedShortRangeHistogramVec.With(d.promLabels()).Observe(float64(n)) if err != nil { sh.removeStreams([]sttypes.StreamID{streamID}) // Data provided by remote nodes is corrupted diff --git a/node/node_handler_test.go b/node/node_handler_test.go index 867a9616d..23c5498fe 100644 --- a/node/node_handler_test.go +++ b/node/node_handler_test.go @@ -69,7 +69,7 @@ func TestAddNewBlock(t *testing.T) { commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil, ) - _, err = node.Blockchain().InsertChain([]*types.Block{block}, true) + _, err = node.Blockchain().InsertChain([]*types.Block{block}, true, true) if err != nil { t.Errorf("error when adding new block %v", err) } From cfc94bb4e148700019a17bfd0695c38da1c0cbc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 12:33:17 +0800 Subject: [PATCH 14/56] fix tests for new block insertion --- hmy/downloader/adapter_test.go | 6 +++--- node/node_newblock_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hmy/downloader/adapter_test.go b/hmy/downloader/adapter_test.go index 4bc023b5c..3b7664007 100644 --- a/hmy/downloader/adapter_test.go +++ b/hmy/downloader/adapter_test.go @@ -60,7 +60,7 @@ func (bc *testBlockChain) currentBlockNumber() uint64 { return bc.curBN } -func (bc *testBlockChain) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { +func (bc *testBlockChain) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { bc.lock.Lock() defer bc.lock.Unlock() @@ -169,11 +169,11 @@ type testInsertHelper struct { } func (ch *testInsertHelper) verifyAndInsertBlock(block *types.Block) error { - _, err := ch.bc.InsertChain(types.Blocks{block}, true) + _, err := ch.bc.InsertChain(types.Blocks{block}, true, true) return err } func (ch *testInsertHelper) verifyAndInsertBlocks(blocks types.Blocks) (int, error) { - return ch.bc.InsertChain(blocks, true) + return ch.bc.InsertChain(blocks, true, true) } const ( diff --git a/node/node_newblock_test.go b/node/node_newblock_test.go index 5780b7cda..b8ca6c9e0 100644 --- a/node/node_newblock_test.go +++ b/node/node_newblock_test.go @@ -78,7 +78,7 @@ func TestFinalizeNewBlockAsync(t *testing.T) { t.Error("New block is not verified successfully:", err) } - node.Blockchain().InsertChain(types.Blocks{block}, false) + node.Blockchain().InsertChain(types.Blocks{block}, false, true) node.Worker.UpdateCurrent() From 9954a904aeb00684812016e26e20c48e61d4558a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:16:27 +0800 Subject: [PATCH 15/56] refactor staged stream sync to process fast sync and new block insertion --- api/service/stagedstreamsync/adapter.go | 2 +- api/service/stagedstreamsync/const.go | 13 +++++++ api/service/stagedstreamsync/sig_verify.go | 6 ++-- .../stagedstreamsync/stage_short_range.go | 4 ++- api/service/stagedstreamsync/stage_state.go | 35 ++++++++++++------- .../stagedstreamsync/staged_stream_sync.go | 7 ++-- api/service/stagedstreamsync/syncing.go | 8 +++-- 7 files changed, 52 insertions(+), 23 deletions(-) diff --git a/api/service/stagedstreamsync/adapter.go b/api/service/stagedstreamsync/adapter.go index ca9c6a678..9988ccc51 100644 --- a/api/service/stagedstreamsync/adapter.go +++ b/api/service/stagedstreamsync/adapter.go @@ -31,6 +31,6 @@ type blockChain interface { engine.ChainReader Engine() engine.Engine - InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) + InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) WriteCommitSig(blockNum uint64, lastCommits []byte) error } diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index a41d2e859..c87932bf7 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -40,6 +40,16 @@ const ( ShortRangeTimeout time.Duration = 1 * time.Minute ) +// SyncMode represents the synchronization mode of the downloader. +// It is a uint32 as it is used with atomic operations. +type SyncMode uint32 + +const ( + FullSync SyncMode = iota // Synchronize the entire blockchain history from full blocks + FastSync // Download all blocks and states + SnapSync // Download the chain and the state via compact snapshots +) + type ( // Config is the downloader config Config struct { @@ -47,6 +57,9 @@ type ( // TODO: remove this when stream sync is fully up. ServerOnly bool + // Synchronization mode of the downloader + SyncMode SyncMode + // parameters Network nodeconfig.NetworkType Concurrency int // Number of concurrent sync requests diff --git a/api/service/stagedstreamsync/sig_verify.go b/api/service/stagedstreamsync/sig_verify.go index 8de71effc..dc0b4cf4c 100644 --- a/api/service/stagedstreamsync/sig_verify.go +++ b/api/service/stagedstreamsync/sig_verify.go @@ -20,16 +20,16 @@ func (e *sigVerifyErr) Error() string { return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error()) } -func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) { +func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks, blockExecution bool) (int, error) { for i, block := range blocks { - if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil { + if err := verifyAndInsertBlock(bc, block, blockExecution, blocks[i+1:]...); err != nil { return i, err } } return len(blocks), nil } -func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { +func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { var ( sigBytes bls.SerializedSignature bitmap []byte diff --git a/api/service/stagedstreamsync/stage_short_range.go b/api/service/stagedstreamsync/stage_short_range.go index ce6cdf36b..a651490eb 100644 --- a/api/service/stagedstreamsync/stage_short_range.go +++ b/api/service/stagedstreamsync/stage_short_range.go @@ -136,7 +136,9 @@ func (sr *StageShortRange) doShortRangeSync(ctx context.Context, s *StageState) sh.streamsFailed(whitelist, "remote nodes cannot provide blocks with target hashes") } - n, err := verifyAndInsertBlocks(sr.configs.bc, blocks) + utils.Logger().Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result") + + n, err := verifyAndInsertBlocks(sr.configs.bc, blocks, true) numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n)) if err != nil { utils.Logger().Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed") diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index b8dfb1828..ea4775d1f 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -19,12 +19,13 @@ type StageStates struct { configs StageStatesCfg } type StageStatesCfg struct { - bc core.BlockChain - db kv.RwDB - blockDBs []kv.RwDB - concurrency int - logger zerolog.Logger - logProgress bool + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + blockExecution bool + logger zerolog.Logger + logProgress bool } func NewStageStates(cfg StageStatesCfg) *StageStates { @@ -38,16 +39,18 @@ func NewStageStatesCfg( db kv.RwDB, blockDBs []kv.RwDB, concurrency int, + blockExecution bool, logger zerolog.Logger, logProgress bool) StageStatesCfg { return StageStatesCfg{ - bc: bc, - db: db, - blockDBs: blockDBs, - concurrency: concurrency, - logger: logger, - logProgress: logProgress, + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + blockExecution: blockExecution, + logger: logger, + logProgress: logProgress, } } @@ -108,6 +111,8 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR fmt.Print("\033[s") // save the cursor position } + s.state.currentCycle.ReceiptHashes = make(map[uint64]common.Hash) + for i := currProgress + 1; i <= targetHeight; i++ { blkKey := marshalData(i) loopID, streamID := gbm.GetDownloadDetails(i) @@ -157,7 +162,7 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR return ErrInvalidBlockNumber } - if err := verifyAndInsertBlock(stg.configs.bc, block); err != nil { + if err := verifyAndInsertBlock(stg.configs.bc, block, stg.configs.blockExecution); err != nil { stg.configs.logger.Warn().Err(err).Uint64("cycle target block", targetHeight). Uint64("block number", block.NumberU64()). Msg(WrapStagedSyncMsg("insert blocks failed in long range")) @@ -169,6 +174,10 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR return err } + // TODO: only for fast sync + // add receipt hash for next stage + s.state.currentCycle.ReceiptHashes[block.NumberU64()]=block.Header().ReceiptHash() + if invalidBlockRevert { if s.state.invalidBlock.Number == i { s.state.invalidBlock.resolve() diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index fea59b02a..c5a201ef7 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -102,9 +102,10 @@ type Timing struct { } type SyncCycle struct { - Number uint64 - TargetHeight uint64 - lock sync.RWMutex + Number uint64 + TargetHeight uint64 + ReceiptHashes map[uint64]common.Hash + lock sync.RWMutex } func (s *StagedStreamSync) Len() int { return len(s.stages) } diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 6e0d01a4b..c741151cd 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -81,14 +81,17 @@ func CreateStagedSync(ctx context.Context, return nil, errInitDB } + fastSync := config.SyncMode == FastSync + stageHeadsCfg := NewStageHeadersCfg(bc, mainDB) stageShortRangeCfg := NewStageShortRangeCfg(bc, mainDB) stageSyncEpochCfg := NewStageEpochCfg(bc, mainDB) stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) - stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, logger, config.LogProgress) + stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, !fastSync, logger, config.LogProgress) stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, config.Concurrency, protocol, logger, config.LogProgress) - lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) + stageReceiptsCfg := NewStageReceiptsCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) + lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) stageFinishCfg := NewStageFinishCfg(mainDB) stages := DefaultStages(ctx, @@ -98,6 +101,7 @@ func CreateStagedSync(ctx context.Context, stageBodiesCfg, stageStateSyncCfg, stageStatesCfg, + stageReceiptsCfg, lastMileCfg, stageFinishCfg, ) From 352212744de65cd94cad254f27a7ebffc1870f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:16:56 +0800 Subject: [PATCH 16/56] refactor stage receipts --- api/service/stagedstreamsync/stage_receipts.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index a9bffa30f..71a46e0d2 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -110,7 +110,7 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR for i := 0; i != s.state.config.Concurrency; i++ { wg.Add(1) - go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, startTime) + go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, startTime) } wg.Wait() @@ -125,7 +125,7 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } // runReceiptWorkerLoop creates a work loop for download receipts -func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { +func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, startTime time.Time) { currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) @@ -148,8 +148,13 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo } var hashes []common.Hash for _, bn := range batch { + /* + // TODO: check if we can directly use bc rather than receipt hashes map header := b.configs.bc.GetHeaderByNumber(bn) hashes = append(hashes, header.ReceiptHash()) + */ + receiptHash := s.state.currentCycle.ReceiptHashes[bn] + hashes = append(hashes, receiptHash) } receipts, stid, err := b.downloadReceipts(ctx, hashes) if err != nil { From 591f223e84d40f3e0aa2be43d8f70b5caa6552bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:18:12 +0800 Subject: [PATCH 17/56] fix block insertion in main.go --- test/chain/main.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/chain/main.go b/test/chain/main.go index 4b935292f..d3f518dc2 100644 --- a/test/chain/main.go +++ b/test/chain/main.go @@ -134,7 +134,7 @@ func fundFaucetContract(chain core.BlockChain) { }() block, _ := contractworker. FinalizeNewBlock(commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil) - _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */) + _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */, true) if err != nil { fmt.Println(err) } @@ -184,7 +184,7 @@ func callFaucetContractToFundAnAddress(chain core.BlockChain) { block, _ := contractworker.FinalizeNewBlock( commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil, ) - _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */) + _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */, true) if err != nil { fmt.Println(err) } @@ -227,7 +227,7 @@ func main() { gen.SetShardID(0) gen.AddTx(pendingTxs[i].(*types.Transaction)) }) - if _, err := chain.InsertChain(blocks, true /* verifyHeaders */); err != nil { + if _, err := chain.InsertChain(blocks, true /* verifyHeaders */, true); err != nil { log.Fatal(err) } } From 7006e1568dca7128954cda88d4fc526f577b7704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:22:23 +0800 Subject: [PATCH 18/56] goimports staged sync files --- api/service/stagedstreamsync/stage_receipts.go | 6 +++--- api/service/stagedstreamsync/stage_state.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index 71a46e0d2..692222c6c 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -149,9 +149,9 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo var hashes []common.Hash for _, bn := range batch { /* - // TODO: check if we can directly use bc rather than receipt hashes map - header := b.configs.bc.GetHeaderByNumber(bn) - hashes = append(hashes, header.ReceiptHash()) + // TODO: check if we can directly use bc rather than receipt hashes map + header := b.configs.bc.GetHeaderByNumber(bn) + hashes = append(hashes, header.ReceiptHash()) */ receiptHash := s.state.currentCycle.ReceiptHashes[bn] hashes = append(hashes, receiptHash) diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index ea4775d1f..e5cab702f 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -176,7 +176,7 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR // TODO: only for fast sync // add receipt hash for next stage - s.state.currentCycle.ReceiptHashes[block.NumberU64()]=block.Header().ReceiptHash() + s.state.currentCycle.ReceiptHashes[block.NumberU64()] = block.Header().ReceiptHash() if invalidBlockRevert { if s.state.invalidBlock.Number == i { From 30de7c27e349595a74a9478dbd9c28f8b454d66a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:50:36 +0800 Subject: [PATCH 19/56] refactor stages list initialization based on the sync mode --- .../stagedstreamsync/default_stages.go | 111 +++++++++++++----- .../stagedstreamsync/staged_stream_sync.go | 18 ++- 2 files changed, 97 insertions(+), 32 deletions(-) diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go index e1bb8578d..3bebe8a00 100644 --- a/api/service/stagedstreamsync/default_stages.go +++ b/api/service/stagedstreamsync/default_stages.go @@ -8,40 +8,91 @@ type ForwardOrder []SyncStageID type RevertOrder []SyncStageID type CleanUpOrder []SyncStageID -var DefaultForwardOrder = ForwardOrder{ - Heads, - SyncEpoch, - ShortRange, - BlockBodies, - States, - StateSync, - Receipts, - LastMile, - Finish, +var ( + StagesForwardOrder ForwardOrder + StagesRevertOrder RevertOrder + StagesCleanUpOrder CleanUpOrder +) + +func initStagesOrder(syncMode SyncMode) { + switch syncMode { + case FullSync: + initFullSyncStagesOrder() + case FastSync: + initFastSyncStagesOrder() + default: + panic("not supported sync mode") + } } -var DefaultRevertOrder = RevertOrder{ - Finish, - LastMile, - Receipts, - StateSync, - States, - BlockBodies, - ShortRange, - SyncEpoch, - Heads, +func initFullSyncStagesOrder() { + StagesForwardOrder = ForwardOrder{ + Heads, + SyncEpoch, + ShortRange, + BlockBodies, + States, + LastMile, + Finish, + } + + StagesRevertOrder = RevertOrder{ + Finish, + LastMile, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, + } + + StagesCleanUpOrder = CleanUpOrder{ + Finish, + LastMile, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, + } } -var DefaultCleanUpOrder = CleanUpOrder{ - Finish, - LastMile, - Receipts, - StateSync, - States, - BlockBodies, - ShortRange, - SyncEpoch, - Heads, +func initFastSyncStagesOrder() { + StagesForwardOrder = ForwardOrder{ + Heads, + SyncEpoch, + ShortRange, + BlockBodies, + States, + StateSync, + Receipts, + LastMile, + Finish, + } + + StagesRevertOrder = RevertOrder{ + Finish, + LastMile, + Receipts, + StateSync, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, + } + + StagesCleanUpOrder = CleanUpOrder{ + Finish, + LastMile, + Receipts, + StateSync, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, + } } func DefaultStages(ctx context.Context, diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index c5a201ef7..48a47f28d 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -268,8 +268,21 @@ func New( logger zerolog.Logger, ) *StagedStreamSync { + // init stages order based on sync mode + initStagesOrder(config.SyncMode) + + forwardStages := make([]*Stage, len(stagesList)) + for i, stageIndex := range StagesForwardOrder { + for _, s := range stagesList { + if s.ID == stageIndex { + forwardStages[i] = s + break + } + } + } + revertStages := make([]*Stage, len(stagesList)) - for i, stageIndex := range DefaultRevertOrder { + for i, stageIndex := range StagesRevertOrder { for _, s := range stagesList { if s.ID == stageIndex { revertStages[i] = s @@ -277,8 +290,9 @@ func New( } } } + pruneStages := make([]*Stage, len(stagesList)) - for i, stageIndex := range DefaultCleanUpOrder { + for i, stageIndex := range StagesCleanUpOrder { for _, s := range stagesList { if s.ID == stageIndex { pruneStages[i] = s From f10dd1eec2dde9133b6e427ea35d48a42f94b1a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 3 Jul 2023 20:08:42 +0800 Subject: [PATCH 20/56] add SyncMode to configs --- cmd/harmony/default.go | 5 +++++ cmd/harmony/main.go | 1 + internal/configs/harmony/harmony.go | 1 + 3 files changed, 7 insertions(+) diff --git a/cmd/harmony/default.go b/cmd/harmony/default.go index 986a2f7f6..86ed4226a 100644 --- a/cmd/harmony/default.go +++ b/cmd/harmony/default.go @@ -192,6 +192,7 @@ var defaultStagedSyncConfig = harmonyconfig.StagedSyncConfig{ var ( defaultMainnetSyncConfig = harmonyconfig.SyncConfig{ Enabled: false, + SyncMode: 0, Downloader: false, StagedSync: false, StagedSyncCfg: defaultStagedSyncConfig, @@ -207,6 +208,7 @@ var ( defaultTestNetSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, + SyncMode: 0, Downloader: false, StagedSync: false, StagedSyncCfg: defaultStagedSyncConfig, @@ -222,6 +224,7 @@ var ( defaultLocalNetSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, + SyncMode: 0, Downloader: true, StagedSync: true, StagedSyncCfg: defaultStagedSyncConfig, @@ -237,6 +240,7 @@ var ( defaultPartnerSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, + SyncMode: 0, Downloader: true, StagedSync: false, StagedSyncCfg: defaultStagedSyncConfig, @@ -252,6 +256,7 @@ var ( defaultElseSyncConfig = harmonyconfig.SyncConfig{ Enabled: true, + SyncMode: 0, Downloader: true, StagedSync: false, StagedSyncCfg: defaultStagedSyncConfig, diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index 549237d1c..a29698f40 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -1005,6 +1005,7 @@ func setupStagedSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.Har sConfig := stagedstreamsync.Config{ ServerOnly: !hc.Sync.Downloader, + SyncMode: stagedstreamsync.SyncMode(hc.Sync.SyncMode), Network: nodeconfig.NetworkType(hc.Network.NetworkType), Concurrency: hc.Sync.Concurrency, MinStreams: hc.Sync.MinPeers, diff --git a/internal/configs/harmony/harmony.go b/internal/configs/harmony/harmony.go index 2fcb200c4..7ff250148 100644 --- a/internal/configs/harmony/harmony.go +++ b/internal/configs/harmony/harmony.go @@ -329,6 +329,7 @@ type PrometheusConfig struct { type SyncConfig struct { // TODO: Remove this bool after stream sync is fully up. Enabled bool // enable the stream sync protocol + SyncMode uint32 // sync mode (default:Full sync, 1: Fast Sync, 2: Snap Sync(not implemented yet)) Downloader bool // start the sync downloader client StagedSync bool // use staged sync StagedSyncCfg StagedSyncConfig // staged sync configurations From 498bcc0416ace1113486217f4fc85d6b124ac26e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 4 Jul 2023 15:01:32 +0800 Subject: [PATCH 21/56] fix state download manager failure message --- api/service/stagedstreamsync/state_download_manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index 1cd414757..80a758388 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -274,7 +274,7 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. - return fmt.Errorf("trie node %s failed with peer %s", task.hash.TerminalString(), task.attempts[streamID]) + return fmt.Errorf("trie node %s failed with peer %s (%d tries)", task.hash.TerminalString(), streamID, task.attempts[streamID]) } // Missing item, place into the retry queue. s.retries.addTrieTask(path, task) @@ -291,7 +291,7 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. - return fmt.Errorf("byte code %s failed with peer %s", hash.TerminalString(), task.attempts[streamID]) + return fmt.Errorf("byte code %s failed with peer %s (%d tries)", hash.TerminalString(), streamID, task.attempts[streamID]) } // Missing item, place into the retry queue. s.retries.addCodeTask(hash, task) From 1f26944a33be0280ade1817c67396a310b0480f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:12:47 +0800 Subject: [PATCH 22/56] split verifyAndInsertBlock function to be able to reuse verification part --- api/service/stagedstreamsync/sig_verify.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/api/service/stagedstreamsync/sig_verify.go b/api/service/stagedstreamsync/sig_verify.go index dc0b4cf4c..bc204fb1a 100644 --- a/api/service/stagedstreamsync/sig_verify.go +++ b/api/service/stagedstreamsync/sig_verify.go @@ -29,7 +29,7 @@ func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks, blockExecution bo return len(blocks), nil } -func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { +func verifyBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { var ( sigBytes bls.SerializedSignature bitmap []byte @@ -61,7 +61,18 @@ func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool case err != nil: return errors.Wrap(err, "[InsertChain]") default: + } + return nil +} +func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { + //verify block + if err := verifyBlock(bc, block, nextBlocks...); err != nil { + return err + } + // insert block + if _, err := bc.InsertChain(types.Blocks{block}, false, blockExecution); err != nil { + return errors.Wrap(err, "[InsertChain]") } return nil } From 7c3807a525882354b7f777b9497f182a6ccc0c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:14:13 +0800 Subject: [PATCH 23/56] refactor stage bodies to extract receip hashes in this stage rather than stage state --- .../stagedstreamsync/default_stages.go | 9 ++-- api/service/stagedstreamsync/stage_bodies.go | 46 +++++++++++++++++-- api/service/stagedstreamsync/syncing.go | 8 ++-- 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go index 3bebe8a00..60e9f4962 100644 --- a/api/service/stagedstreamsync/default_stages.go +++ b/api/service/stagedstreamsync/default_stages.go @@ -63,9 +63,8 @@ func initFastSyncStagesOrder() { SyncEpoch, ShortRange, BlockBodies, - States, - StateSync, Receipts, + StateSync, LastMile, Finish, } @@ -73,9 +72,8 @@ func initFastSyncStagesOrder() { StagesRevertOrder = RevertOrder{ Finish, LastMile, - Receipts, StateSync, - States, + Receipts, BlockBodies, ShortRange, SyncEpoch, @@ -85,9 +83,8 @@ func initFastSyncStagesOrder() { StagesCleanUpOrder = CleanUpOrder{ Finish, LastMile, - Receipts, StateSync, - States, + Receipts, BlockBodies, ShortRange, SyncEpoch, diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go index b5d92e3a1..401a8bc6c 100644 --- a/api/service/stagedstreamsync/stage_bodies.go +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/ethereum/go-ethereum/rlp" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/utils" @@ -26,6 +27,7 @@ type StageBodiesCfg struct { concurrency int protocol syncProtocol isBeacon bool + extractReceiptHashes bool logProgress bool } @@ -35,7 +37,7 @@ func NewStageBodies(cfg StageBodiesCfg) *StageBodies { } } -func NewStageBodiesCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, logProgress bool) StageBodiesCfg { +func NewStageBodiesCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, extractReceiptHashes bool, logProgress bool) StageBodiesCfg { return StageBodiesCfg{ bc: bc, db: db, @@ -43,6 +45,7 @@ func NewStageBodiesCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concu concurrency: concurrency, protocol: protocol, isBeacon: isBeacon, + extractReceiptHashes: extractReceiptHashes, logProgress: logProgress, } } @@ -118,7 +121,7 @@ func (b *StageBodies) Exec(ctx context.Context, firstCycle bool, invalidBlockRev for i := 0; i != s.state.config.Concurrency; i++ { wg.Add(1) - go b.runBlockWorkerLoop(ctx, s.state.gbm, &wg, i, startTime) + go b.runBlockWorkerLoop(ctx, s.state.gbm, &wg, i, s, startTime) } wg.Wait() @@ -133,7 +136,7 @@ func (b *StageBodies) Exec(ctx context.Context, firstCycle bool, invalidBlockRev } // runBlockWorkerLoop creates a work loop for download blocks -func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { +func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, startTime time.Time) { currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) @@ -184,6 +187,12 @@ func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownload gbm.HandleRequestError(batch, err, stid) b.configs.protocol.RemoveStream(stid) } else { + if b.configs.extractReceiptHashes { + if err = b.verifyBlockAndExtractReceiptsData(blockBytes, sigBytes, s); err != nil { + gbm.HandleRequestError(batch, err, stid) + continue + } + } if err = b.saveBlocks(ctx, gbm.tx, batch, blockBytes, sigBytes, loopID, stid); err != nil { panic(ErrSaveBlocksToDbFailed) } @@ -204,6 +213,37 @@ func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownload } } +func (b *StageBodies) verifyBlockAndExtractReceiptsData(batchBlockBytes [][]byte, batchSigBytes [][]byte, s *StageState) error { + var block *types.Block + for i := uint64(0); i < uint64(len(batchBlockBytes)); i++ { + blockBytes := batchBlockBytes[i] + sigBytes := batchSigBytes[i] + if blockBytes == nil { + continue + } + if err := rlp.DecodeBytes(blockBytes, &block); err != nil { + utils.Logger().Error(). + Uint64("block number", i). + Msg("block size invalid") + return ErrInvalidBlockBytes + } + if sigBytes != nil { + block.SetCurrentCommitSig(sigBytes) + } + + if block.NumberU64() != i { + return ErrInvalidBlockNumber + } + + if err := verifyBlock(b.configs.bc, block); err != nil { + return err + } + // add receipt hash for next stage + s.state.currentCycle.ReceiptHashes[block.NumberU64()] = block.Header().ReceiptHash() + } + return nil +} + // redownloadBadBlock tries to redownload the bad block from other streams func (b *StageBodies) redownloadBadBlock(ctx context.Context, s *StageState) error { diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index c741151cd..7abf00e7a 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -81,14 +81,14 @@ func CreateStagedSync(ctx context.Context, return nil, errInitDB } - fastSync := config.SyncMode == FastSync + blockExecution := config.SyncMode == FullSync + extractReceiptHashes := config.SyncMode == FastSync || config.SyncMode == SnapSync stageHeadsCfg := NewStageHeadersCfg(bc, mainDB) stageShortRangeCfg := NewStageShortRangeCfg(bc, mainDB) stageSyncEpochCfg := NewStageEpochCfg(bc, mainDB) - - stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) - stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, !fastSync, logger, config.LogProgress) + stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, extractReceiptHashes, config.LogProgress) + stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, blockExecution, logger, config.LogProgress) stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, config.Concurrency, protocol, logger, config.LogProgress) stageReceiptsCfg := NewStageReceiptsCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) From d4c85772e268445fd107e9e64509d89e12c8023b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:18:35 +0800 Subject: [PATCH 24/56] goimports --- api/service/stagedstreamsync/stage_bodies.go | 28 ++++++++++---------- core/blockchain_impl.go | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go index 401a8bc6c..6b2fb65ed 100644 --- a/api/service/stagedstreamsync/stage_bodies.go +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -21,14 +21,14 @@ type StageBodies struct { } type StageBodiesCfg struct { - bc core.BlockChain - db kv.RwDB - blockDBs []kv.RwDB - concurrency int - protocol syncProtocol - isBeacon bool + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + protocol syncProtocol + isBeacon bool extractReceiptHashes bool - logProgress bool + logProgress bool } func NewStageBodies(cfg StageBodiesCfg) *StageBodies { @@ -39,14 +39,14 @@ func NewStageBodies(cfg StageBodiesCfg) *StageBodies { func NewStageBodiesCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, extractReceiptHashes bool, logProgress bool) StageBodiesCfg { return StageBodiesCfg{ - bc: bc, - db: db, - blockDBs: blockDBs, - concurrency: concurrency, - protocol: protocol, - isBeacon: isBeacon, + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + protocol: protocol, + isBeacon: isBeacon, extractReceiptHashes: extractReceiptHashes, - logProgress: logProgress, + logProgress: logProgress, } } diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 8d9de8dbc..efb26df07 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -1659,7 +1659,7 @@ func (bc *BlockChainImpl) insertChainWithBlockExecution(chain types.Blocks, veri return 0, events, coalescedLogs, nil } -//receiptChain []types.Receipts, +// insertChainWithoutBlockExecution adds a set of blocks to blockchain without adding states func (bc *BlockChainImpl) insertChainWithoutBlockExecution(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { // Sanity check that we have something meaningful to import if len(chain) == 0 { From 8f818100a77be15ba97039836d5e9b881a9da503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:31:25 +0800 Subject: [PATCH 25/56] add InsertReceiptChain to blockchain interface --- core/blockchain.go | 3 + core/blockchain_impl.go | 165 ++++++++++++++++++++++++++++++++++++++++ core/blockchain_stub.go | 4 + 3 files changed, 172 insertions(+) diff --git a/core/blockchain.go b/core/blockchain.go index a286af117..c66f26ed3 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -121,6 +121,9 @@ type BlockChain interface { // // After insertion is done, all accumulated events will be fired. InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) + // InsertReceiptChain attempts to complete an already existing header chain with + // transaction and receipt data. + InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts) (int, error) // LeaderRotationMeta returns the number of continuous blocks by the leader. LeaderRotationMeta() (publicKeyBytes []byte, epoch, count, shifts uint64, err error) // BadBlocks returns a list of the last 'bad blocks' that diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index efb26df07..c91c322b8 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -1192,6 +1192,171 @@ func (bc *BlockChainImpl) Rollback(chain []common.Hash) error { return bc.removeInValidatorList(valsToRemove) } +// SetReceiptsData computes all the non-consensus fields of the receipts +func SetReceiptsData(config *params.ChainConfig, block *types.Block, receipts types.Receipts) error { + signer := types.MakeSigner(config, block.Epoch()) + ethSigner := types.NewEIP155Signer(config.EthCompatibleChainID) + + transactions, stakingTransactions, logIndex := block.Transactions(), block.StakingTransactions(), uint(0) + if len(transactions)+len(stakingTransactions) != len(receipts) { + return errors.New("transaction+stakingTransactions and receipt count mismatch") + } + + // The used gas can be calculated based on previous receipts + if len(receipts) > 0 && len(transactions) > 0 { + receipts[0].GasUsed = receipts[0].CumulativeGasUsed + } + for j := 1; j < len(transactions); j++ { + // The transaction hash can be retrieved from the transaction itself + receipts[j].TxHash = transactions[j].Hash() + receipts[j].GasUsed = receipts[j].CumulativeGasUsed - receipts[j-1].CumulativeGasUsed + // The contract address can be derived from the transaction itself + if transactions[j].To() == nil { + // Deriving the signer is expensive, only do if it's actually needed + var from common.Address + if transactions[j].IsEthCompatible() { + from, _ = types.Sender(ethSigner, transactions[j]) + } else { + from, _ = types.Sender(signer, transactions[j]) + } + receipts[j].ContractAddress = crypto.CreateAddress(from, transactions[j].Nonce()) + } + // The derived log fields can simply be set from the block and transaction + for k := 0; k < len(receipts[j].Logs); k++ { + receipts[j].Logs[k].BlockNumber = block.NumberU64() + receipts[j].Logs[k].BlockHash = block.Hash() + receipts[j].Logs[k].TxHash = receipts[j].TxHash + receipts[j].Logs[k].TxIndex = uint(j) + receipts[j].Logs[k].Index = logIndex + logIndex++ + } + } + + // The used gas can be calculated based on previous receipts + if len(receipts) > len(transactions) && len(stakingTransactions) > 0 { + receipts[len(transactions)].GasUsed = receipts[len(transactions)].CumulativeGasUsed + } + // in a block, txns are processed before staking txns + for j := len(transactions) + 1; j < len(transactions)+len(stakingTransactions); j++ { + // The transaction hash can be retrieved from the staking transaction itself + receipts[j].TxHash = stakingTransactions[j].Hash() + receipts[j].GasUsed = receipts[j].CumulativeGasUsed - receipts[j-1].CumulativeGasUsed + // The derived log fields can simply be set from the block and transaction + for k := 0; k < len(receipts[j].Logs); k++ { + receipts[j].Logs[k].BlockNumber = block.NumberU64() + receipts[j].Logs[k].BlockHash = block.Hash() + receipts[j].Logs[k].TxHash = receipts[j].TxHash + receipts[j].Logs[k].TxIndex = uint(j) + uint(len(transactions)) + receipts[j].Logs[k].Index = logIndex + logIndex++ + } + } + return nil +} + +// InsertReceiptChain attempts to complete an already existing header chain with +// transaction and receipt data. +func (bc *BlockChainImpl) InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts) (int, error) { + // Do a sanity check that the provided chain is actually ordered and linked + for i := 1; i < len(blockChain); i++ { + if blockChain[i].NumberU64() != blockChain[i-1].NumberU64()+1 || blockChain[i].ParentHash() != blockChain[i-1].Hash() { + utils.Logger().Error(). + Str("number", blockChain[i].Number().String()). + Str("hash", blockChain[i].Hash().Hex()). + Str("parent", blockChain[i].ParentHash().Hex()). + Str("prevnumber", blockChain[i-1].Number().String()). + Str("prevhash", blockChain[i-1].Hash().Hex()). + Msg("Non contiguous receipt insert") + return 0, fmt.Errorf("non contiguous insert: item %d is #%d [%x…], item %d is #%d [%x…] (parent [%x…])", i-1, blockChain[i-1].NumberU64(), + blockChain[i-1].Hash().Bytes()[:4], i, blockChain[i].NumberU64(), blockChain[i].Hash().Bytes()[:4], blockChain[i].ParentHash().Bytes()[:4]) + } + } + + bc.chainmu.Lock() + defer bc.chainmu.Unlock() + + var ( + stats = struct{ processed, ignored int32 }{} + start = time.Now() + bytes = 0 + batch = bc.db.NewBatch() + ) + for i, block := range blockChain { + receipts := receiptChain[i] + // Short circuit insertion if shutting down or processing failed + if atomic.LoadInt32(&bc.procInterrupt) == 1 { + return 0, nil + } + // Short circuit if the owner header is unknown + if !bc.HasHeader(block.Hash(), block.NumberU64()) { + return 0, fmt.Errorf("containing header #%d [%x…] unknown", block.Number(), block.Hash().Bytes()[:4]) + } + // Skip if the entire data is already known + if bc.HasBlock(block.Hash(), block.NumberU64()) { + stats.ignored++ + continue + } + // Compute all the non-consensus fields of the receipts + if err := SetReceiptsData(bc.chainConfig, block, receipts); err != nil { + return 0, fmt.Errorf("failed to set receipts data: %v", err) + } + // Write all the data out into the database + if err := rawdb.WriteBody(batch, block.Hash(), block.NumberU64(), block.Body()); err != nil { + return 0, err + } + if err := rawdb.WriteReceipts(batch, block.Hash(), block.NumberU64(), receipts); err != nil { + return 0, err + } + if err := rawdb.WriteBlockTxLookUpEntries(batch, block); err != nil { + return 0, err + } + if err := rawdb.WriteBlockStxLookUpEntries(batch, block); err != nil { + return 0, err + } + + stats.processed++ + + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return 0, err + } + bytes += batch.ValueSize() + batch.Reset() + } + } + if batch.ValueSize() > 0 { + bytes += batch.ValueSize() + if err := batch.Write(); err != nil { + return 0, err + } + } + + // Update the head fast sync block if better + bc.mu.Lock() + head := blockChain[len(blockChain)-1] + if td := bc.GetTd(head.Hash(), head.NumberU64()); td != nil { // Rewind may have occurred, skip in that case + currentFastBlock := bc.CurrentFastBlock() + if bc.GetTd(currentFastBlock.Hash(), currentFastBlock.NumberU64()).Cmp(td) < 0 { + rawdb.WriteHeadFastBlockHash(bc.db, head.Hash()) + bc.currentFastBlock.Store(head) + headFastBlockGauge.Update(int64(head.NumberU64())) + } + } + bc.mu.Unlock() + + utils.Logger().Info(). + Int32("count", stats.processed). + Str("elapsed", common.PrettyDuration(time.Since(start)).String()). + Str("age", common.PrettyAge(time.Unix(head.Time().Int64(), 0)).String()). + Str("head", head.Number().String()). + Str("hash", head.Hash().Hex()). + Str("size", common.StorageSize(bytes).String()). + Int32("ignored", stats.ignored). + Msg("Imported new block receipts") + + return 0, nil +} + var lastWrite uint64 func (bc *BlockChainImpl) WriteBlockWithoutState(block *types.Block) (err error) { diff --git a/core/blockchain_stub.go b/core/blockchain_stub.go index 32a0b1c19..a1eb92a05 100644 --- a/core/blockchain_stub.go +++ b/core/blockchain_stub.go @@ -136,6 +136,10 @@ func (a Stub) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution return 0, errors.Errorf("method InsertChain not implemented for %s", a.Name) } +func (a Stub) InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts) (int, error) { + return 0, errors.Errorf("method InsertReceiptChain not implemented for %s", a.Name) +} + func (a Stub) BadBlocks() []BadBlock { return nil } From 57a77ab0f1c4c3cb0d0bd86b5167f4b4eaff3f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 6 Jul 2023 23:17:15 +0800 Subject: [PATCH 26/56] refactor get receipts stage to use insertReceiptsChain --- .../receipt_download_manager.go | 2 +- .../stagedstreamsync/stage_receipts.go | 85 +++++++++++++++---- api/service/stagedstreamsync/stage_state.go | 4 - 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/api/service/stagedstreamsync/receipt_download_manager.go b/api/service/stagedstreamsync/receipt_download_manager.go index 2eaa3ca45..2bab10ade 100644 --- a/api/service/stagedstreamsync/receipt_download_manager.go +++ b/api/service/stagedstreamsync/receipt_download_manager.go @@ -77,7 +77,7 @@ func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, s } // HandleRequestResult handles get receipts result -func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receipts []*types.Receipt, loopID int, streamID sttypes.StreamID) error { +func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receipts []types.Receipts, loopID int, streamID sttypes.StreamID) error { rdm.lock.Lock() defer rdm.lock.Unlock() diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index 692222c6c..5faa553f8 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -7,6 +7,7 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/utils" @@ -89,6 +90,22 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR // size := uint64(0) startTime := time.Now() // startBlock := currProgress + + // prepare db transactions + txs := make([]kv.RwTx, b.configs.concurrency) + for i := 0; i < b.configs.concurrency; i++ { + txs[i], err = b.configs.blockDBs[i].BeginRw(ctx) + if err != nil { + return err + } + } + + defer func() { + for i := 0; i < b.configs.concurrency; i++ { + txs[i].Rollback() + } + }() + if b.configs.logProgress { fmt.Print("\033[s") // save the cursor position } @@ -110,7 +127,7 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR for i := 0; i != s.state.config.Concurrency; i++ { wg.Add(1) - go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, startTime) + go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, txs, startTime) } wg.Wait() @@ -125,9 +142,10 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } // runReceiptWorkerLoop creates a work loop for download receipts -func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, startTime time.Time) { +func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, txs []kv.RwTx, startTime time.Time) { currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) + gbm := s.state.gbm defer wg.Done() @@ -137,6 +155,7 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo return default: } + // get next batch of block numbers batch := rdm.GetNextBatch() if len(batch) == 0 { select { @@ -146,16 +165,43 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo return } } + // retrieve corresponding blocks from cache db var hashes []common.Hash + var blocks []*types.Block for _, bn := range batch { - /* - // TODO: check if we can directly use bc rather than receipt hashes map - header := b.configs.bc.GetHeaderByNumber(bn) - hashes = append(hashes, header.ReceiptHash()) - */ - receiptHash := s.state.currentCycle.ReceiptHashes[bn] - hashes = append(hashes, receiptHash) + blkKey := marshalData(bn) + loopID, _ := gbm.GetDownloadDetails(bn) + blockBytes, err := txs[loopID].GetOne(BlocksBucket, blkKey) + if err != nil { + return + } + sigBytes, err := txs[loopID].GetOne(BlockSignaturesBucket, blkKey) + if err != nil { + return + } + sz := len(blockBytes) + if sz <= 1 { + return + } + var block *types.Block + if err := rlp.DecodeBytes(blockBytes, &block); err != nil { + return + } + if sigBytes != nil { + block.SetCurrentCommitSig(sigBytes) + } + if block.NumberU64() != bn { + return + } + if block.Header().ReceiptHash() == emptyHash { + return + } + // receiptHash := s.state.currentCycle.ReceiptHashes[bn] + hashes = append(hashes, block.Header().ReceiptHash()) + blocks = append(blocks, block) } + + // download receipts receipts, stid, err := b.downloadReceipts(ctx, hashes) if err != nil { if !errors.Is(err, context.Canceled) { @@ -176,7 +222,17 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo err := errors.New("downloadRawBlocks received empty reciptBytes") rdm.HandleRequestError(batch, err, stid) } else { + // insert block and receipts to chain + if inserted, err := b.configs.bc.InsertReceiptChain(blocks, receipts); err != nil { + + } else { + if inserted != len(blocks) { + + } + } + rdm.HandleRequestResult(batch, receipts, loopID, stid) + if b.configs.logProgress { //calculating block download speed dt := time.Now().Sub(startTime).Seconds() @@ -193,7 +249,7 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo } } -func (b *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) ([]*types.Receipt, sttypes.StreamID, error) { +func (b *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) ([]types.Receipts, sttypes.StreamID, error) { ctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() @@ -207,14 +263,7 @@ func (b *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) return receipts, stid, nil } -func (b *StageReceipts) downloadRawBlocks(ctx context.Context, bns []uint64) ([][]byte, [][]byte, sttypes.StreamID, error) { - ctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - - return b.configs.protocol.GetRawBlocksByNumber(ctx, bns) -} - -func validateGetReceiptsResult(requested []common.Hash, result []*types.Receipt) error { +func validateGetReceiptsResult(requested []common.Hash, result []types.Receipts) error { // TODO: validate each receipt here return nil diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index e5cab702f..a5297b903 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -174,10 +174,6 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR return err } - // TODO: only for fast sync - // add receipt hash for next stage - s.state.currentCycle.ReceiptHashes[block.NumberU64()] = block.Header().ReceiptHash() - if invalidBlockRevert { if s.state.invalidBlock.Number == i { s.state.invalidBlock.resolve() From bcf1b770a1ede44e3e66212c7bacefa0e592d67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Fri, 7 Jul 2023 00:24:33 +0800 Subject: [PATCH 27/56] remove using currentCycle, cleanup stage receipts --- api/service/stagedstreamsync/stage_bodies.go | 2 - .../stagedstreamsync/stage_receipts.go | 137 +++++++----------- api/service/stagedstreamsync/stage_state.go | 2 - .../stagedstreamsync/staged_stream_sync.go | 1 - 4 files changed, 51 insertions(+), 91 deletions(-) diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go index 6b2fb65ed..d2ed95c9d 100644 --- a/api/service/stagedstreamsync/stage_bodies.go +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -238,8 +238,6 @@ func (b *StageBodies) verifyBlockAndExtractReceiptsData(batchBlockBytes [][]byte if err := verifyBlock(b.configs.bc, block); err != nil { return err } - // add receipt hash for next stage - s.state.currentCycle.ReceiptHashes[block.NumberU64()] = block.Header().ReceiptHash() } return nil } diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index 5faa553f8..fb0af99bc 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -48,8 +48,8 @@ func NewStageReceiptsCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, con } } -// Exec progresses Bodies stage in the forward direction -func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { +// Exec progresses receipts stage in the forward direction +func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { useInternalTx := tx == nil @@ -63,14 +63,14 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } maxHeight := s.state.status.targetBN - currentHead := b.configs.bc.CurrentBlock().NumberU64() + currentHead := r.configs.bc.CurrentBlock().NumberU64() if currentHead >= maxHeight { return nil } currProgress := uint64(0) targetHeight := s.state.currentCycle.TargetHeight - if errV := CreateView(ctx, b.configs.db, tx, func(etx kv.Tx) error { + if errV := CreateView(ctx, r.configs.db, tx, func(etx kv.Tx) error { if currProgress, err = s.CurrentStageProgress(etx); err != nil { return err } @@ -92,27 +92,27 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR // startBlock := currProgress // prepare db transactions - txs := make([]kv.RwTx, b.configs.concurrency) - for i := 0; i < b.configs.concurrency; i++ { - txs[i], err = b.configs.blockDBs[i].BeginRw(ctx) + txs := make([]kv.RwTx, r.configs.concurrency) + for i := 0; i < r.configs.concurrency; i++ { + txs[i], err = r.configs.blockDBs[i].BeginRw(ctx) if err != nil { return err } } defer func() { - for i := 0; i < b.configs.concurrency; i++ { + for i := 0; i < r.configs.concurrency; i++ { txs[i].Rollback() } }() - if b.configs.logProgress { + if r.configs.logProgress { fmt.Print("\033[s") // save the cursor position } if useInternalTx { var err error - tx, err = b.configs.db.BeginRw(ctx) + tx, err = r.configs.db.BeginRw(ctx) if err != nil { return err } @@ -120,14 +120,14 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } // Fetch blocks from neighbors - s.state.rdm = newReceiptDownloadManager(tx, b.configs.bc, targetHeight, s.state.logger) + s.state.rdm = newReceiptDownloadManager(tx, r.configs.bc, targetHeight, s.state.logger) // Setup workers to fetch blocks from remote node var wg sync.WaitGroup for i := 0; i != s.state.config.Concurrency; i++ { wg.Add(1) - go b.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, txs, startTime) + go r.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, txs, startTime) } wg.Wait() @@ -142,9 +142,9 @@ func (b *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } // runReceiptWorkerLoop creates a work loop for download receipts -func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, txs []kv.RwTx, startTime time.Time) { +func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, txs []kv.RwTx, startTime time.Time) { - currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) + currentBlock := int(r.configs.bc.CurrentBlock().NumberU64()) gbm := s.state.gbm defer wg.Done() @@ -202,10 +202,10 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo } // download receipts - receipts, stid, err := b.downloadReceipts(ctx, hashes) + receipts, stid, err := r.downloadReceipts(ctx, hashes) if err != nil { if !errors.Is(err, context.Canceled) { - b.configs.protocol.StreamFailed(stid, "downloadRawBlocks failed") + r.configs.protocol.StreamFailed(stid, "downloadRawBlocks failed") } utils.Logger().Error(). Err(err). @@ -223,37 +223,46 @@ func (b *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo rdm.HandleRequestError(batch, err, stid) } else { // insert block and receipts to chain - if inserted, err := b.configs.bc.InsertReceiptChain(blocks, receipts); err != nil { - + if inserted, err := r.configs.bc.InsertReceiptChain(blocks, receipts); err != nil { + utils.Logger().Err(err). + Str("stream", string(stid)). + Interface("block numbers", batch). + Msg(WrapStagedSyncMsg("InsertReceiptChain failed")) + err := errors.New("InsertReceiptChain failed") + rdm.HandleRequestError(batch, err, stid) } else { if inserted != len(blocks) { - + utils.Logger().Warn(). + Interface("block numbers", batch). + Int("inserted", inserted). + Int("blocks to insert", len(blocks)). + Msg(WrapStagedSyncMsg("InsertReceiptChain couldn't insert all downloaded blocks/receipts")) } } - + // handle request result rdm.HandleRequestResult(batch, receipts, loopID, stid) - - if b.configs.logProgress { + // log progress + if r.configs.logProgress { //calculating block download speed dt := time.Now().Sub(startTime).Seconds() speed := float64(0) if dt > 0 { speed = float64(len(rdm.rdd)) / dt } - blockSpeed := fmt.Sprintf("%.2f", speed) + blockReceiptSpeed := fmt.Sprintf("%.2f", speed) fmt.Print("\033[u\033[K") // restore the cursor position and clear the line - fmt.Println("downloaded blocks:", currentBlock+len(rdm.rdd), "/", int(rdm.targetBN), "(", blockSpeed, "blocks/s", ")") + fmt.Println("downloaded blocks and receipts:", currentBlock+len(rdm.rdd), "/", int(rdm.targetBN), "(", blockReceiptSpeed, "BlocksAndReceipts/s", ")") } } } } -func (b *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) ([]types.Receipts, sttypes.StreamID, error) { +func (r *StageReceipts) downloadReceipts(ctx context.Context, hs []common.Hash) ([]types.Receipts, sttypes.StreamID, error) { ctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() - receipts, stid, err := b.configs.protocol.GetReceipts(ctx, hs) + receipts, stid, err := r.configs.protocol.GetReceipts(ctx, hs) if err != nil { return nil, stid, err } @@ -269,11 +278,11 @@ func validateGetReceiptsResult(requested []common.Hash, result []types.Receipts) return nil } -func (b *StageReceipts) saveProgress(ctx context.Context, s *StageState, progress uint64, tx kv.RwTx) (err error) { +func (r *StageReceipts) saveProgress(ctx context.Context, s *StageState, progress uint64, tx kv.RwTx) (err error) { useInternalTx := tx == nil if useInternalTx { var err error - tx, err = b.configs.db.BeginRw(ctx) + tx, err = r.configs.db.BeginRw(ctx) if err != nil { return err } @@ -284,7 +293,7 @@ func (b *StageReceipts) saveProgress(ctx context.Context, s *StageState, progres if err = s.Update(tx, progress); err != nil { utils.Logger().Error(). Err(err). - Msgf("[STAGED_SYNC] saving progress for block bodies stage failed") + Msgf("[STAGED_SYNC] saving progress for receipt stage failed") return ErrSavingBodiesProgressFail } @@ -296,72 +305,37 @@ func (b *StageReceipts) saveProgress(ctx context.Context, s *StageState, progres return nil } -func (b *StageReceipts) cleanBlocksDB(ctx context.Context, loopID int) (err error) { - tx, errb := b.configs.blockDBs[loopID].BeginRw(ctx) - if errb != nil { - return errb - } - defer tx.Rollback() - - // clean block bodies db - if err = tx.ClearBucket(BlocksBucket); err != nil { - utils.Logger().Error(). - Err(err). - Msgf("[STAGED_STREAM_SYNC] clear blocks bucket after revert failed") - return err - } - // clean block signatures db - if err = tx.ClearBucket(BlockSignaturesBucket); err != nil { - utils.Logger().Error(). - Err(err). - Msgf("[STAGED_STREAM_SYNC] clear block signatures bucket after revert failed") - return err +func (r *StageReceipts) Revert(ctx context.Context, firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = r.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() } - if err = tx.Commit(); err != nil { + if err = u.Done(tx); err != nil { return err } - return nil -} - -func (b *StageReceipts) cleanAllBlockDBs(ctx context.Context) (err error) { - //clean all blocks DBs - for i := 0; i < b.configs.concurrency; i++ { - if err := b.cleanBlocksDB(ctx, i); err != nil { + if useInternalTx { + if err = tx.Commit(); err != nil { return err } } return nil } -func (b *StageReceipts) Revert(ctx context.Context, firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { - - //clean all blocks DBs - if err := b.cleanAllBlockDBs(ctx); err != nil { - return err - } - +func (r *StageReceipts) CleanUp(ctx context.Context, firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { useInternalTx := tx == nil if useInternalTx { - tx, err = b.configs.db.BeginRw(ctx) + tx, err = r.configs.db.BeginRw(ctx) if err != nil { return err } defer tx.Rollback() } - // save progress - currentHead := b.configs.bc.CurrentBlock().NumberU64() - if err = s.Update(tx, currentHead); err != nil { - utils.Logger().Error(). - Err(err). - Msgf("[STAGED_SYNC] saving progress for block bodies stage after revert failed") - return err - } - - if err = u.Done(tx); err != nil { - return err - } if useInternalTx { if err = tx.Commit(); err != nil { @@ -370,12 +344,3 @@ func (b *StageReceipts) Revert(ctx context.Context, firstCycle bool, u *RevertSt } return nil } - -func (b *StageReceipts) CleanUp(ctx context.Context, firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { - //clean all blocks DBs - if err := b.cleanAllBlockDBs(ctx); err != nil { - return err - } - - return nil -} diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index a5297b903..f579019f6 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -111,8 +111,6 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR fmt.Print("\033[s") // save the cursor position } - s.state.currentCycle.ReceiptHashes = make(map[uint64]common.Hash) - for i := currProgress + 1; i <= targetHeight; i++ { blkKey := marshalData(i) loopID, streamID := gbm.GetDownloadDetails(i) diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 48a47f28d..786f59779 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -104,7 +104,6 @@ type Timing struct { type SyncCycle struct { Number uint64 TargetHeight uint64 - ReceiptHashes map[uint64]common.Hash lock sync.RWMutex } From 6f9a1ecda1c0003a2095ceb95c1258b661e24586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Fri, 7 Jul 2023 00:25:12 +0800 Subject: [PATCH 28/56] goimports --- api/service/stagedstreamsync/staged_stream_sync.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 786f59779..5abe45b54 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -102,9 +102,9 @@ type Timing struct { } type SyncCycle struct { - Number uint64 - TargetHeight uint64 - lock sync.RWMutex + Number uint64 + TargetHeight uint64 + lock sync.RWMutex } func (s *StagedStreamSync) Len() int { return len(s.stages) } From 12235f5c5c81c89b7e74216c1c5ff9159a9de1ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:38:14 +0800 Subject: [PATCH 29/56] fix stages forward order for staged stream sync --- .../stagedstreamsync/staged_stream_sync.go | 3 --- api/service/stagedstreamsync/syncing.go | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 5abe45b54..29832ef18 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -267,9 +267,6 @@ func New( logger zerolog.Logger, ) *StagedStreamSync { - // init stages order based on sync mode - initStagesOrder(config.SyncMode) - forwardStages := make([]*Stage, len(stagesList)) for i, stageIndex := range StagesForwardOrder { for _, s := range stagesList { diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 7abf00e7a..b2c1aacdf 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -91,10 +91,13 @@ func CreateStagedSync(ctx context.Context, stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, blockExecution, logger, config.LogProgress) stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, config.Concurrency, protocol, logger, config.LogProgress) stageReceiptsCfg := NewStageReceiptsCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) - lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) + lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) stageFinishCfg := NewStageFinishCfg(mainDB) - stages := DefaultStages(ctx, + // init stages order based on sync mode + initStagesOrder(config.SyncMode) + + defaultStages := DefaultStages(ctx, stageHeadsCfg, stageSyncEpochCfg, stageShortRangeCfg, @@ -115,6 +118,17 @@ func CreateStagedSync(ctx context.Context, Int("minStreams", config.MinStreams). Msg(WrapStagedSyncMsg("staged sync created successfully")) + var stages []*Stage + // if any of the default stages doesn't exist in forward order, delete it from the list of stages + for _, stg := range defaultStages { + for _, stageID := range StagesForwardOrder { + if stg.ID == stageID { + stages = append(stages, stg) + break + } + } + } + return New( bc, consensus, From f6b89516568fba4064f60756acac98a99c8e1894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 13 Jul 2023 19:31:58 +0800 Subject: [PATCH 30/56] add SyncMode to flags --- cmd/harmony/flags.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmd/harmony/flags.go b/cmd/harmony/flags.go index 46a1decb0..2af21cb24 100644 --- a/cmd/harmony/flags.go +++ b/cmd/harmony/flags.go @@ -238,6 +238,7 @@ var ( syncFlags = []cli.Flag{ syncStreamEnabledFlag, + syncModeFlag, syncDownloaderFlag, syncStagedSyncFlag, syncConcurrencyFlag, @@ -1876,6 +1877,13 @@ var ( Usage: "Enable the stream sync protocol (experimental feature)", DefValue: false, } + + syncModeFlag = cli.IntFlag{ + Name: "sync.mode", + Usage: "synchronization mode of the downloader (0=FullSync, 1=FastSync, 2=SnapSync)", + DefValue: 0, + } + // TODO: Deprecate this flag, and always set to true after stream sync is fully up. syncDownloaderFlag = cli.BoolFlag{ Name: "sync.downloader", @@ -1937,6 +1945,10 @@ func applySyncFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { config.Sync.Enabled = cli.GetBoolFlagValue(cmd, syncStreamEnabledFlag) } + if cli.IsFlagChanged(cmd, syncModeFlag) { + config.Sync.SyncMode = uint32(cli.GetIntFlagValue(cmd, syncModeFlag)) + } + if cli.IsFlagChanged(cmd, syncDownloaderFlag) { config.Sync.Downloader = cli.GetBoolFlagValue(cmd, syncDownloaderFlag) } From cd7ccbe827eb86e70cb972a6e82b68fbe29d28ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 16 Aug 2023 18:11:51 +0800 Subject: [PATCH 31/56] fix stages and replace with forward stages --- api/service/stagedstreamsync/staged_stream_sync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 29832ef18..7d94126b0 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -317,7 +317,7 @@ func New( inserted: 0, config: config, logger: logger, - stages: stagesList, + stages: forwardStages, currentStage: 0, revertOrder: revertStages, pruningOrder: pruneStages, From 772d8658e1ed1b3c6b2a8132806e4eef34b687ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 6 Sep 2023 23:14:56 +0800 Subject: [PATCH 32/56] fix block validation in stage bodies --- api/service/stagedstreamsync/stage_bodies.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go index d2ed95c9d..8cce5207c 100644 --- a/api/service/stagedstreamsync/stage_bodies.go +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -187,12 +187,6 @@ func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownload gbm.HandleRequestError(batch, err, stid) b.configs.protocol.RemoveStream(stid) } else { - if b.configs.extractReceiptHashes { - if err = b.verifyBlockAndExtractReceiptsData(blockBytes, sigBytes, s); err != nil { - gbm.HandleRequestError(batch, err, stid) - continue - } - } if err = b.saveBlocks(ctx, gbm.tx, batch, blockBytes, sigBytes, loopID, stid); err != nil { panic(ErrSaveBlocksToDbFailed) } @@ -231,10 +225,9 @@ func (b *StageBodies) verifyBlockAndExtractReceiptsData(batchBlockBytes [][]byte block.SetCurrentCommitSig(sigBytes) } - if block.NumberU64() != i { - return ErrInvalidBlockNumber - } - + // if block.NumberU64() != i { + // return ErrInvalidBlockNumber + // } if err := verifyBlock(b.configs.bc, block); err != nil { return err } From c1d352b8955a9250cf43fc9a3a5f7b1052fdbd46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 27 Sep 2023 00:35:40 +0800 Subject: [PATCH 33/56] add pivot to chain accessor, add CurrentFastBlock to blockchain_impl, fix receipt insertion --- core/blockchain.go | 5 +++++ core/blockchain_impl.go | 23 +++++++++-------------- core/blockchain_stub.go | 4 ++++ core/rawdb/accessors_chain.go | 5 ++++- p2p/stream/protocols/sync/chain.go | 5 ++++- 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index c66f26ed3..766bfb668 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -52,6 +52,11 @@ type BlockChain interface { // CurrentBlock retrieves the current head block of the canonical chain. The // block is retrieved from the blockchain's internal cache. CurrentBlock() *types.Block + // CurrentFastBlock retrieves the current fast-sync head block of the canonical + // block is retrieved from the blockchain's internal cache. + CurrentFastBlock() *types.Block + // Validator returns the current validator. + Validator() Validator // Processor returns the current processor. Processor() Processor // State returns a new mutable state based on the current HEAD block. diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index c91c322b8..e9360dc15 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -1285,11 +1285,14 @@ func (bc *BlockChainImpl) InsertReceiptChain(blockChain types.Blocks, receiptCha receipts := receiptChain[i] // Short circuit insertion if shutting down or processing failed if atomic.LoadInt32(&bc.procInterrupt) == 1 { - return 0, nil + return 0, fmt.Errorf("Premature abort during blocks processing") } - // Short circuit if the owner header is unknown + // Add header if the owner header is unknown if !bc.HasHeader(block.Hash(), block.NumberU64()) { - return 0, fmt.Errorf("containing header #%d [%x…] unknown", block.Number(), block.Hash().Bytes()[:4]) + if err := rawdb.WriteHeader(batch, block.Header()); err != nil { + return 0, err + } + // return 0, fmt.Errorf("containing header #%d [%x…] unknown", block.Number(), block.Hash().Bytes()[:4]) } // Skip if the entire data is already known if bc.HasBlock(block.Hash(), block.NumberU64()) { @@ -1332,17 +1335,9 @@ func (bc *BlockChainImpl) InsertReceiptChain(blockChain types.Blocks, receiptCha } // Update the head fast sync block if better - bc.mu.Lock() head := blockChain[len(blockChain)-1] - if td := bc.GetTd(head.Hash(), head.NumberU64()); td != nil { // Rewind may have occurred, skip in that case - currentFastBlock := bc.CurrentFastBlock() - if bc.GetTd(currentFastBlock.Hash(), currentFastBlock.NumberU64()).Cmp(td) < 0 { - rawdb.WriteHeadFastBlockHash(bc.db, head.Hash()) - bc.currentFastBlock.Store(head) - headFastBlockGauge.Update(int64(head.NumberU64())) - } - } - bc.mu.Unlock() + rawdb.WriteHeadFastBlockHash(bc.db, head.Hash()) + bc.currentFastBlock.Store(head) utils.Logger().Info(). Int32("count", stats.processed). @@ -1354,7 +1349,7 @@ func (bc *BlockChainImpl) InsertReceiptChain(blockChain types.Blocks, receiptCha Int32("ignored", stats.ignored). Msg("Imported new block receipts") - return 0, nil + return int(stats.processed), nil } var lastWrite uint64 diff --git a/core/blockchain_stub.go b/core/blockchain_stub.go index a1eb92a05..3b9713804 100644 --- a/core/blockchain_stub.go +++ b/core/blockchain_stub.go @@ -49,6 +49,10 @@ func (a Stub) CurrentBlock() *types.Block { return nil } +func (a Stub) CurrentFastBlock() *types.Block { + return nil +} + func (a Stub) Validator() Validator { return nil } diff --git a/core/rawdb/accessors_chain.go b/core/rawdb/accessors_chain.go index 72ce358e2..b01dc0965 100644 --- a/core/rawdb/accessors_chain.go +++ b/core/rawdb/accessors_chain.go @@ -597,14 +597,17 @@ func ReadLastPivotNumber(db ethdb.KeyValueReader) *uint64 { } // WriteLastPivotNumber stores the number of the last pivot block. -func WriteLastPivotNumber(db ethdb.KeyValueWriter, pivot uint64) { +func WriteLastPivotNumber(db ethdb.KeyValueWriter, pivot uint64) error { enc, err := rlp.EncodeToBytes(pivot) if err != nil { utils.Logger().Error().Err(err).Msg("Failed to encode pivot block number") + return err } if err := db.Put(lastPivotKey, enc); err != nil { utils.Logger().Error().Err(err).Msg("Failed to store pivot block number") + return err } + return nil } // ReadTxIndexTail retrieves the number of oldest indexed block diff --git a/p2p/stream/protocols/sync/chain.go b/p2p/stream/protocols/sync/chain.go index efabd9307..aa4dced3f 100644 --- a/p2p/stream/protocols/sync/chain.go +++ b/p2p/stream/protocols/sync/chain.go @@ -171,7 +171,10 @@ func (ch *chainHelperImpl) getNodeData(hs []common.Hash) ([][]byte, error) { entry, err = ch.chain.ValidatorCode(hash) } } - if err == nil && len(entry) > 0 { + if err != nil { + return nil, err + } + if len(entry) > 0 { nodes = append(nodes, entry) bytes += len(entry) } From 8d66bdf078d51e5b00b56156033b359bb84e4198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 28 Sep 2023 00:24:16 +0800 Subject: [PATCH 34/56] add getBlockByMaxVote to sync helper --- api/service/stagedstreamsync/helpers.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/api/service/stagedstreamsync/helpers.go b/api/service/stagedstreamsync/helpers.go index 75e504214..96c1c22b0 100644 --- a/api/service/stagedstreamsync/helpers.go +++ b/api/service/stagedstreamsync/helpers.go @@ -73,6 +73,27 @@ func checkGetBlockByHashesResult(blocks []*types.Block, hashes []common.Hash) er return nil } +func getBlockByMaxVote(blocks []*types.Block) (*types.Block, error) { + hashesVote := make(map[common.Hash]int) + maxVote := int(-1) + maxVotedBlockIndex := int(0) + + for i, block := range blocks { + if block == nil { + continue + } + hashesVote[block.Header().Hash()]++ + if hashesVote[block.Header().Hash()] > maxVote { + maxVote = hashesVote[block.Header().Hash()] + maxVotedBlockIndex = i + } + } + if maxVote < 0 { + return nil, ErrInvalidBlockBytes + } + return blocks[maxVotedBlockIndex], nil +} + func countHashMaxVote(m map[sttypes.StreamID]common.Hash, whitelist map[sttypes.StreamID]struct{}) (common.Hash, map[sttypes.StreamID]struct{}) { var ( voteM = make(map[common.Hash]int) From 917a3019e244b3d3705c87152316b27d3ce9ef5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 3 Oct 2023 14:35:40 +0800 Subject: [PATCH 35/56] add tests for node data request --- p2p/stream/protocols/sync/stream_test.go | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/p2p/stream/protocols/sync/stream_test.go b/p2p/stream/protocols/sync/stream_test.go index 3b538c14b..cd3a3f061 100644 --- a/p2p/stream/protocols/sync/stream_test.go +++ b/p2p/stream/protocols/sync/stream_test.go @@ -51,14 +51,14 @@ var ( testGetReceiptsRequest = syncpb.MakeGetReceiptsRequest(testGetReceipts) testGetReceiptsRequestMsg = syncpb.MakeMessageFromRequest(testGetReceiptsRequest) - testGetNodeData = []common.Hash{ + testGetNodes = []common.Hash{ numberToHash(1), numberToHash(2), numberToHash(3), numberToHash(4), numberToHash(5), } - testGetNodeDataRequest = syncpb.MakeGetNodeDataRequest(testGetNodeData) + testGetNodeDataRequest = syncpb.MakeGetNodeDataRequest(testGetNodes) testGetNodeDataRequestMsg = syncpb.MakeMessageFromRequest(testGetNodeDataRequest) maxBytes = uint64(500) @@ -296,6 +296,27 @@ func TestSyncStream_HandleGetTrieNodes(t *testing.T) { } } +func TestSyncStream_HandleGetNodeData(t *testing.T) { + st, remoteSt := makeTestSyncStream() + + go st.run() + defer close(st.closeC) + + req := testGetNodeDataRequestMsg + b, _ := protobuf.Marshal(req) + err := remoteSt.WriteBytes(b) + if err != nil { + t.Fatal(err) + } + + time.Sleep(200 * time.Millisecond) + receivedBytes, _ := remoteSt.ReadBytes() + + if err := checkGetNodeDataResult(receivedBytes, testGetBlockByHashes); err != nil { + t.Fatal(err) + } +} + func makeTestSyncStream() (*syncStream, *testRemoteBaseStream) { localRaw, remoteRaw := makePairP2PStreams() remote := newTestRemoteBaseStream(remoteRaw) From 7c21eef5ef8b50d482f7c42fa6b840aad4c6baa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 3 Oct 2023 17:20:45 +0800 Subject: [PATCH 36/56] fix stream tests --- p2p/stream/protocols/sync/stream_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/p2p/stream/protocols/sync/stream_test.go b/p2p/stream/protocols/sync/stream_test.go index cd3a3f061..9511de2ce 100644 --- a/p2p/stream/protocols/sync/stream_test.go +++ b/p2p/stream/protocols/sync/stream_test.go @@ -51,14 +51,14 @@ var ( testGetReceiptsRequest = syncpb.MakeGetReceiptsRequest(testGetReceipts) testGetReceiptsRequestMsg = syncpb.MakeMessageFromRequest(testGetReceiptsRequest) - testGetNodes = []common.Hash{ + testGetNodeData = []common.Hash{ numberToHash(1), numberToHash(2), numberToHash(3), numberToHash(4), numberToHash(5), } - testGetNodeDataRequest = syncpb.MakeGetNodeDataRequest(testGetNodes) + testGetNodeDataRequest = syncpb.MakeGetNodeDataRequest(testGetNodeData) testGetNodeDataRequestMsg = syncpb.MakeMessageFromRequest(testGetNodeDataRequest) maxBytes = uint64(500) @@ -312,7 +312,7 @@ func TestSyncStream_HandleGetNodeData(t *testing.T) { time.Sleep(200 * time.Millisecond) receivedBytes, _ := remoteSt.ReadBytes() - if err := checkGetNodeDataResult(receivedBytes, testGetBlockByHashes); err != nil { + if err := checkGetNodeDataResult(receivedBytes, testGetNodeData); err != nil { t.Fatal(err) } } From d534fea261fa9b9ba7403b478592b05260cc7aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 3 Oct 2023 17:22:18 +0800 Subject: [PATCH 37/56] add Validator method to blockchain to fix the interface --- core/blockchain_impl.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index e9360dc15..2fbdb92d7 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -685,6 +685,17 @@ func (bc *BlockChainImpl) CurrentBlock() *types.Block { return bc.currentBlock.Load().(*types.Block) } +// CurrentFastBlock retrieves the current fast-sync head block of the canonical +// chain. The block is retrieved from the blockchain's internal cache. +func (bc *BlockChainImpl) CurrentFastBlock() *types.Block { + return bc.currentFastBlock.Load().(*types.Block) +} + +// Validator returns the current validator. +func (bc *BlockChainImpl) Validator() Validator { + return bc.validator +} + func (bc *BlockChainImpl) Processor() Processor { return bc.processor } From e96855b6ff7523d04a37f44cdbf82065abf609df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 3 Oct 2023 17:22:46 +0800 Subject: [PATCH 38/56] fix shard chain test --- core_test/shardchain_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core_test/shardchain_test.go b/core_test/shardchain_test.go index a6a9238ba..ad091482b 100644 --- a/core_test/shardchain_test.go +++ b/core_test/shardchain_test.go @@ -72,7 +72,7 @@ func TestAddNewBlock(t *testing.T) { nn := node.Blockchain().CurrentBlock() t.Log("[*]", nn.NumberU64(), nn.Hash().Hex(), nn.ParentHash()) - _, err = blockchain.InsertChain([]*types.Block{block}, false) + _, err = blockchain.InsertChain([]*types.Block{block}, false, true) require.NoError(t, err, "error when adding new block") meta := blockchain.LeaderRotationMeta() From ebd689f1a0b6297e9b906487ced785a5fc4eaccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 4 Oct 2023 00:51:58 +0800 Subject: [PATCH 39/56] remove blockExecution option from insertChain --- cmd/harmony/main.go | 2 +- core/blockchain.go | 2 +- core/blockchain_impl.go | 13 +++---------- core/blockchain_stub.go | 2 +- core/epochchain.go | 2 +- core_test/shardchain_test.go | 2 +- hmy/downloader/adapter.go | 2 +- hmy/downloader/adapter_test.go | 6 +++--- hmy/downloader/beaconhelper.go | 2 +- hmy/downloader/downloader.go | 2 +- hmy/downloader/shortrange.go | 2 +- node/node_handler_test.go | 2 +- node/node_newblock_test.go | 2 +- test/chain/main.go | 6 +++--- 14 files changed, 20 insertions(+), 27 deletions(-) diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index a29698f40..ec05e2419 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -1017,7 +1017,7 @@ func setupStagedSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.Har SmDiscBatch: hc.Sync.DiscBatch, UseMemDB: hc.Sync.StagedSyncCfg.UseMemDB, LogProgress: hc.Sync.StagedSyncCfg.LogProgress, - DebugMode: hc.Sync.StagedSyncCfg.DebugMode, + DebugMode: true, // hc.Sync.StagedSyncCfg.DebugMode, } // If we are running side chain, we will need to do some extra works for beacon diff --git a/core/blockchain.go b/core/blockchain.go index 766bfb668..f6f50e71f 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -125,7 +125,7 @@ type BlockChain interface { // wrong. // // After insertion is done, all accumulated events will be fired. - InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) + InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) // InsertReceiptChain attempts to complete an already existing header chain with // transaction and receipt data. InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts) (int, error) diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 2fbdb92d7..97660544d 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -1530,7 +1530,7 @@ func (bc *BlockChainImpl) GetMaxGarbageCollectedBlockNumber() int64 { return bc.maxGarbCollectedBlkNum } -func (bc *BlockChainImpl) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { +func (bc *BlockChainImpl) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { // if in tikv mode, writer node need preempt master or come be a follower if bc.isInitTiKV() && !bc.tikvPreemptMaster(bc.rangeBlock(chain)) { return len(chain), nil @@ -1574,17 +1574,10 @@ func (bc *BlockChainImpl) LeaderRotationMeta() LeaderRotationMeta { return bc.leaderRotationMeta.Clone() } -func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, []interface{}, []*types.Log, error) { - if blockExecution { - return bc.insertChainWithBlockExecution(chain, verifyHeaders) - } - return bc.insertChainWithoutBlockExecution(chain, verifyHeaders) -} - // insertChain will execute the actual chain insertion and event aggregation. The // only reason this method exists as a separate one is to make locking cleaner // with deferred statements. -func (bc *BlockChainImpl) insertChainWithBlockExecution(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { +func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { // Sanity check that we have something meaningful to import if len(chain) == 0 { return 0, nil, nil, ErrEmptyChain @@ -1696,7 +1689,7 @@ func (bc *BlockChainImpl) insertChainWithBlockExecution(chain types.Blocks, veri if len(winner) > 0 { // Import all the pruned blocks to make the state available bc.chainmu.Unlock() - _, evs, logs, err := bc.insertChainWithBlockExecution(winner, true /* verifyHeaders */) + _, evs, logs, err := bc.insertChain(winner, true /* verifyHeaders */) bc.chainmu.Lock() events, coalescedLogs = evs, logs diff --git a/core/blockchain_stub.go b/core/blockchain_stub.go index 3b9713804..437bc32e7 100644 --- a/core/blockchain_stub.go +++ b/core/blockchain_stub.go @@ -136,7 +136,7 @@ func (a Stub) GetMaxGarbageCollectedBlockNumber() int64 { return 0 } -func (a Stub) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { +func (a Stub) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { return 0, errors.Errorf("method InsertChain not implemented for %s", a.Name) } diff --git a/core/epochchain.go b/core/epochchain.go index 3df271b11..2dab28471 100644 --- a/core/epochchain.go +++ b/core/epochchain.go @@ -114,7 +114,7 @@ func (bc *EpochChain) Stop() { }) } -func (bc *EpochChain) InsertChain(blocks types.Blocks, _ bool, _ bool) (int, error) { +func (bc *EpochChain) InsertChain(blocks types.Blocks, _ bool) (int, error) { if len(blocks) == 0 { return 0, nil } diff --git a/core_test/shardchain_test.go b/core_test/shardchain_test.go index ad091482b..a6a9238ba 100644 --- a/core_test/shardchain_test.go +++ b/core_test/shardchain_test.go @@ -72,7 +72,7 @@ func TestAddNewBlock(t *testing.T) { nn := node.Blockchain().CurrentBlock() t.Log("[*]", nn.NumberU64(), nn.Hash().Hex(), nn.ParentHash()) - _, err = blockchain.InsertChain([]*types.Block{block}, false, true) + _, err = blockchain.InsertChain([]*types.Block{block}, false) require.NoError(t, err, "error when adding new block") meta := blockchain.LeaderRotationMeta() diff --git a/hmy/downloader/adapter.go b/hmy/downloader/adapter.go index 70e4ca325..c8758b506 100644 --- a/hmy/downloader/adapter.go +++ b/hmy/downloader/adapter.go @@ -27,6 +27,6 @@ type blockChain interface { engine.ChainReader Engine() engine.Engine - InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) + InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) WriteCommitSig(blockNum uint64, lastCommits []byte) error } diff --git a/hmy/downloader/adapter_test.go b/hmy/downloader/adapter_test.go index 3b7664007..4bc023b5c 100644 --- a/hmy/downloader/adapter_test.go +++ b/hmy/downloader/adapter_test.go @@ -60,7 +60,7 @@ func (bc *testBlockChain) currentBlockNumber() uint64 { return bc.curBN } -func (bc *testBlockChain) InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) { +func (bc *testBlockChain) InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) { bc.lock.Lock() defer bc.lock.Unlock() @@ -169,11 +169,11 @@ type testInsertHelper struct { } func (ch *testInsertHelper) verifyAndInsertBlock(block *types.Block) error { - _, err := ch.bc.InsertChain(types.Blocks{block}, true, true) + _, err := ch.bc.InsertChain(types.Blocks{block}, true) return err } func (ch *testInsertHelper) verifyAndInsertBlocks(blocks types.Blocks) (int, error) { - return ch.bc.InsertChain(blocks, true, true) + return ch.bc.InsertChain(blocks, true) } const ( diff --git a/hmy/downloader/beaconhelper.go b/hmy/downloader/beaconhelper.go index 2c7f05675..96d06ebf8 100644 --- a/hmy/downloader/beaconhelper.go +++ b/hmy/downloader/beaconhelper.go @@ -123,7 +123,7 @@ func (bh *beaconHelper) insertLastMileBlocks() (inserted int, bn uint64, err err } // TODO: Instruct the beacon helper to verify signatures. This may require some forks // in pub-sub message (add commit sigs in node.block.sync messages) - if _, err = bh.bc.InsertChain(types.Blocks{b}, true, true); err != nil { + if _, err = bh.bc.InsertChain(types.Blocks{b}, true); err != nil { bn-- return } diff --git a/hmy/downloader/downloader.go b/hmy/downloader/downloader.go index 378b1e630..9e132fd27 100644 --- a/hmy/downloader/downloader.go +++ b/hmy/downloader/downloader.go @@ -314,7 +314,7 @@ func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool if err := bc.Engine().VerifyHeader(bc, block.Header(), true); err != nil { return errors.Wrap(err, "[VerifyHeader]") } - if _, err := bc.InsertChain(types.Blocks{block}, false, blockExecution); err != nil { + if _, err := bc.InsertChain(types.Blocks{block}, false); err != nil { return errors.Wrap(err, "[InsertChain]") } return nil diff --git a/hmy/downloader/shortrange.go b/hmy/downloader/shortrange.go index 2a705f99a..81adc8131 100644 --- a/hmy/downloader/shortrange.go +++ b/hmy/downloader/shortrange.go @@ -131,7 +131,7 @@ func (d *Downloader) doShortRangeSyncForEpochSync() (int, error) { // short circuit for no sync is needed return 0, nil } - n, err := d.bc.InsertChain(blocks, true, true) + n, err := d.bc.InsertChain(blocks, true) numBlocksInsertedShortRangeHistogramVec.With(d.promLabels()).Observe(float64(n)) if err != nil { sh.removeStreams([]sttypes.StreamID{streamID}) // Data provided by remote nodes is corrupted diff --git a/node/node_handler_test.go b/node/node_handler_test.go index 23c5498fe..867a9616d 100644 --- a/node/node_handler_test.go +++ b/node/node_handler_test.go @@ -69,7 +69,7 @@ func TestAddNewBlock(t *testing.T) { commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil, ) - _, err = node.Blockchain().InsertChain([]*types.Block{block}, true, true) + _, err = node.Blockchain().InsertChain([]*types.Block{block}, true) if err != nil { t.Errorf("error when adding new block %v", err) } diff --git a/node/node_newblock_test.go b/node/node_newblock_test.go index b8ca6c9e0..5780b7cda 100644 --- a/node/node_newblock_test.go +++ b/node/node_newblock_test.go @@ -78,7 +78,7 @@ func TestFinalizeNewBlockAsync(t *testing.T) { t.Error("New block is not verified successfully:", err) } - node.Blockchain().InsertChain(types.Blocks{block}, false, true) + node.Blockchain().InsertChain(types.Blocks{block}, false) node.Worker.UpdateCurrent() diff --git a/test/chain/main.go b/test/chain/main.go index d3f518dc2..4b935292f 100644 --- a/test/chain/main.go +++ b/test/chain/main.go @@ -134,7 +134,7 @@ func fundFaucetContract(chain core.BlockChain) { }() block, _ := contractworker. FinalizeNewBlock(commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil) - _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */, true) + _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */) if err != nil { fmt.Println(err) } @@ -184,7 +184,7 @@ func callFaucetContractToFundAnAddress(chain core.BlockChain) { block, _ := contractworker.FinalizeNewBlock( commitSigs, func() uint64 { return 0 }, common.Address{}, nil, nil, ) - _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */, true) + _, err = chain.InsertChain(types.Blocks{block}, true /* verifyHeaders */) if err != nil { fmt.Println(err) } @@ -227,7 +227,7 @@ func main() { gen.SetShardID(0) gen.AddTx(pendingTxs[i].(*types.Transaction)) }) - if _, err := chain.InsertChain(blocks, true /* verifyHeaders */, true); err != nil { + if _, err := chain.InsertChain(blocks, true /* verifyHeaders */); err != nil { log.Fatal(err) } } From 36d2abd0a418cb09e8075b106df9deb161ba8cb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 4 Oct 2023 09:52:35 +0800 Subject: [PATCH 40/56] remove extra blockExecutions --- hmy/downloader/downloader.go | 6 +++--- hmy/downloader/longrange.go | 2 +- hmy/downloader/shortrange.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hmy/downloader/downloader.go b/hmy/downloader/downloader.go index 9e132fd27..01ec242ab 100644 --- a/hmy/downloader/downloader.go +++ b/hmy/downloader/downloader.go @@ -280,16 +280,16 @@ func (e *sigVerifyErr) Error() string { return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error()) } -func verifyAndInsertBlocks(bc blockChain, blockExecution bool, blocks types.Blocks) (int, error) { +func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) { for i, block := range blocks { - if err := verifyAndInsertBlock(bc, block, blockExecution, blocks[i+1:]...); err != nil { + if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil { return i, err } } return len(blocks), nil } -func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { +func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { var ( sigBytes bls.SerializedSignature bitmap []byte diff --git a/hmy/downloader/longrange.go b/hmy/downloader/longrange.go index fc4d4962f..4d4935b8f 100644 --- a/hmy/downloader/longrange.go +++ b/hmy/downloader/longrange.go @@ -210,7 +210,7 @@ func (lsi *lrSyncIter) processBlocks(results []*blockResult, targetBN uint64) { blocks := blockResultsToBlocks(results) for i, block := range blocks { - if err := verifyAndInsertBlock(lsi.bc, block, true); err != nil { + if err := verifyAndInsertBlock(lsi.bc, block); err != nil { lsi.logger.Warn().Err(err).Uint64("target block", targetBN). Uint64("block number", block.NumberU64()). Msg("insert blocks failed in long range") diff --git a/hmy/downloader/shortrange.go b/hmy/downloader/shortrange.go index 81adc8131..8276911d4 100644 --- a/hmy/downloader/shortrange.go +++ b/hmy/downloader/shortrange.go @@ -74,7 +74,7 @@ func (d *Downloader) doShortRangeSync() (int, error) { } d.logger.Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result") - n, err := verifyAndInsertBlocks(d.bc, true, blocks) + n, err := verifyAndInsertBlocks(d.bc, blocks) numBlocksInsertedShortRangeHistogramVec.With(d.promLabels()).Observe(float64(n)) if err != nil { d.logger.Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed") From 9629d9ca56caad41cb7cceae59b6fc31865d618f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 4 Oct 2023 10:02:07 +0800 Subject: [PATCH 41/56] remove blockExecution option from staged stream sync --- api/service/stagedstreamsync/adapter.go | 2 +- api/service/stagedstreamsync/sig_verify.go | 8 ++++---- api/service/stagedstreamsync/stage_short_range.go | 2 +- api/service/stagedstreamsync/stage_state.go | 12 ++++++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/api/service/stagedstreamsync/adapter.go b/api/service/stagedstreamsync/adapter.go index 9988ccc51..ca9c6a678 100644 --- a/api/service/stagedstreamsync/adapter.go +++ b/api/service/stagedstreamsync/adapter.go @@ -31,6 +31,6 @@ type blockChain interface { engine.ChainReader Engine() engine.Engine - InsertChain(chain types.Blocks, verifyHeaders bool, blockExecution bool) (int, error) + InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) WriteCommitSig(blockNum uint64, lastCommits []byte) error } diff --git a/api/service/stagedstreamsync/sig_verify.go b/api/service/stagedstreamsync/sig_verify.go index bc204fb1a..bdf5a2107 100644 --- a/api/service/stagedstreamsync/sig_verify.go +++ b/api/service/stagedstreamsync/sig_verify.go @@ -20,9 +20,9 @@ func (e *sigVerifyErr) Error() string { return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error()) } -func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks, blockExecution bool) (int, error) { +func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) { for i, block := range blocks { - if err := verifyAndInsertBlock(bc, block, blockExecution, blocks[i+1:]...); err != nil { + if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil { return i, err } } @@ -65,13 +65,13 @@ func verifyBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) return nil } -func verifyAndInsertBlock(bc blockChain, block *types.Block, blockExecution bool, nextBlocks ...*types.Block) error { +func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { //verify block if err := verifyBlock(bc, block, nextBlocks...); err != nil { return err } // insert block - if _, err := bc.InsertChain(types.Blocks{block}, false, blockExecution); err != nil { + if _, err := bc.InsertChain(types.Blocks{block}, false); err != nil { return errors.Wrap(err, "[InsertChain]") } return nil diff --git a/api/service/stagedstreamsync/stage_short_range.go b/api/service/stagedstreamsync/stage_short_range.go index a651490eb..d771cd660 100644 --- a/api/service/stagedstreamsync/stage_short_range.go +++ b/api/service/stagedstreamsync/stage_short_range.go @@ -138,7 +138,7 @@ func (sr *StageShortRange) doShortRangeSync(ctx context.Context, s *StageState) utils.Logger().Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result") - n, err := verifyAndInsertBlocks(sr.configs.bc, blocks, true) + n, err := verifyAndInsertBlocks(sr.configs.bc, blocks) numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n)) if err != nil { utils.Logger().Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed") diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index f579019f6..1294edf70 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -23,7 +23,6 @@ type StageStatesCfg struct { db kv.RwDB blockDBs []kv.RwDB concurrency int - blockExecution bool logger zerolog.Logger logProgress bool } @@ -39,7 +38,6 @@ func NewStageStatesCfg( db kv.RwDB, blockDBs []kv.RwDB, concurrency int, - blockExecution bool, logger zerolog.Logger, logProgress bool) StageStatesCfg { @@ -48,7 +46,6 @@ func NewStageStatesCfg( db: db, blockDBs: blockDBs, concurrency: concurrency, - blockExecution: blockExecution, logger: logger, logProgress: logProgress, } @@ -56,6 +53,13 @@ func NewStageStatesCfg( // Exec progresses States stage in the forward direction func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + // only execute this stage in full sync mode + if s.state.config.SyncMode != FullSync { + if s.state.status.pivotBlock != nil && s.state.bc.CurrentBlock().NumberU64() <= s.state.status.pivotBlock.NumberU64() { + return nil + } + } + // for short range sync, skip this step if !s.state.initSync { return nil @@ -160,7 +164,7 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR return ErrInvalidBlockNumber } - if err := verifyAndInsertBlock(stg.configs.bc, block, stg.configs.blockExecution); err != nil { + if err := verifyAndInsertBlock(stg.configs.bc, block); err != nil { stg.configs.logger.Warn().Err(err).Uint64("cycle target block", targetHeight). Uint64("block number", block.NumberU64()). Msg(WrapStagedSyncMsg("insert blocks failed in long range")) From e4dcda67e48457369eda114d48c0ed855a6222fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 4 Oct 2023 10:17:22 +0800 Subject: [PATCH 42/56] refactor staged stream sync, fix the state sync functions --- api/service/stagedstreamsync/block_manager.go | 48 +++-- api/service/stagedstreamsync/const.go | 4 + .../stagedstreamsync/default_stages.go | 3 + .../receipt_download_manager.go | 45 +++-- api/service/stagedstreamsync/stage_bodies.go | 9 +- api/service/stagedstreamsync/stage_heads.go | 11 +- .../stagedstreamsync/stage_receipts.go | 155 ++++++++++------ api/service/stagedstreamsync/stage_state.go | 24 +-- .../stagedstreamsync/stage_statesync.go | 82 ++++++--- .../stagedstreamsync/staged_stream_sync.go | 16 +- .../state_download_manager.go | 170 ++++++++++++++---- api/service/stagedstreamsync/syncing.go | 144 ++++++++++++++- api/service/stagedstreamsync/types.go | 7 +- 13 files changed, 533 insertions(+), 185 deletions(-) diff --git a/api/service/stagedstreamsync/block_manager.go b/api/service/stagedstreamsync/block_manager.go index 28c966b4d..273078c59 100644 --- a/api/service/stagedstreamsync/block_manager.go +++ b/api/service/stagedstreamsync/block_manager.go @@ -3,6 +3,7 @@ package stagedstreamsync import ( "sync" + "github.com/ethereum/go-ethereum/common" sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" "github.com/rs/zerolog" @@ -11,6 +12,7 @@ import ( type BlockDownloadDetails struct { loopID int streamID sttypes.StreamID + rootHash common.Hash } // blockDownloadManager is the helper structure for get blocks request management @@ -19,11 +21,11 @@ type blockDownloadManager struct { tx kv.RwTx targetBN uint64 - requesting map[uint64]struct{} // block numbers that have been assigned to workers but not received - processing map[uint64]struct{} // block numbers received requests but not inserted - retries *prioritizedNumbers // requests where error happens - rq *resultQueue // result queue wait to be inserted into blockchain - bdd map[uint64]BlockDownloadDetails // details about how this block was downloaded + requesting map[uint64]struct{} // block numbers that have been assigned to workers but not received + processing map[uint64]struct{} // block numbers received requests but not inserted + retries *prioritizedNumbers // requests where error happens + rq *resultQueue // result queue wait to be inserted into blockchain + bdd map[uint64]*BlockDownloadDetails // details about how this block was downloaded logger zerolog.Logger lock sync.Mutex @@ -38,26 +40,26 @@ func newBlockDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, logg processing: make(map[uint64]struct{}), retries: newPrioritizedNumbers(), rq: newResultQueue(), - bdd: make(map[uint64]BlockDownloadDetails), + bdd: make(map[uint64]*BlockDownloadDetails), logger: logger, } } // GetNextBatch get the next block numbers batch -func (gbm *blockDownloadManager) GetNextBatch() []uint64 { +func (gbm *blockDownloadManager) GetNextBatch(curHeight uint64) []uint64 { gbm.lock.Lock() defer gbm.lock.Unlock() cap := BlocksPerRequest - bns := gbm.getBatchFromRetries(cap) + bns := gbm.getBatchFromRetries(cap, curHeight) if len(bns) > 0 { cap -= len(bns) gbm.addBatchToRequesting(bns) } if gbm.availableForMoreTasks() { - addBNs := gbm.getBatchFromUnprocessed(cap) + addBNs := gbm.getBatchFromUnprocessed(cap, curHeight) gbm.addBatchToRequesting(addBNs) bns = append(bns, addBNs...) } @@ -88,7 +90,7 @@ func (gbm *blockDownloadManager) HandleRequestResult(bns []uint64, blockBytes [] gbm.retries.push(bn) } else { gbm.processing[bn] = struct{}{} - gbm.bdd[bn] = BlockDownloadDetails{ + gbm.bdd[bn] = &BlockDownloadDetails{ loopID: loopID, streamID: streamID, } @@ -107,7 +109,7 @@ func (gbm *blockDownloadManager) SetDownloadDetails(bns []uint64, loopID int, st defer gbm.lock.Unlock() for _, bn := range bns { - gbm.bdd[bn] = BlockDownloadDetails{ + gbm.bdd[bn] = &BlockDownloadDetails{ loopID: loopID, streamID: streamID, } @@ -123,18 +125,33 @@ func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID return gbm.bdd[blockNumber].loopID, gbm.bdd[blockNumber].streamID } +// SetRootHash sets the root hash for a specific block +func (gbm *blockDownloadManager) SetRootHash(blockNumber uint64, root common.Hash) { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + gbm.bdd[blockNumber].rootHash = root +} + +// GetRootHash returns the root hash for a specific block +func (gbm *blockDownloadManager) GetRootHash(blockNumber uint64) common.Hash { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + return gbm.bdd[blockNumber].rootHash +} + // getBatchFromRetries get the block number batch to be requested from retries. -func (gbm *blockDownloadManager) getBatchFromRetries(cap int) []uint64 { +func (gbm *blockDownloadManager) getBatchFromRetries(cap int, fromBlockNumber uint64) []uint64 { var ( requestBNs []uint64 - curHeight = gbm.chain.CurrentBlock().NumberU64() ) for cnt := 0; cnt < cap; cnt++ { bn := gbm.retries.pop() if bn == 0 { break // no more retries } - if bn <= curHeight { + if bn <= fromBlockNumber { continue } requestBNs = append(requestBNs, bn) @@ -143,10 +160,9 @@ func (gbm *blockDownloadManager) getBatchFromRetries(cap int) []uint64 { } // getBatchFromUnprocessed returns a batch of block numbers to be requested from unprocessed. -func (gbm *blockDownloadManager) getBatchFromUnprocessed(cap int) []uint64 { +func (gbm *blockDownloadManager) getBatchFromUnprocessed(cap int, curHeight uint64) []uint64 { var ( requestBNs []uint64 - curHeight = gbm.chain.CurrentBlock().NumberU64() ) bn := curHeight + 1 // TODO: this algorithm can be potentially optimized. diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index c87932bf7..e172854ec 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -38,6 +38,10 @@ const ( // ShortRangeTimeout is the timeout for each short range sync, which allow short range sync // to restart automatically when stuck in `getBlockHashes` ShortRangeTimeout time.Duration = 1 * time.Minute + + // pivot block distance ranges + MinPivotDistanceToHead uint64 = 1028 + MaxPivotDistanceToHead uint64 = 2048 ) // SyncMode represents the synchronization mode of the downloader. diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go index 60e9f4962..f869ee5fe 100644 --- a/api/service/stagedstreamsync/default_stages.go +++ b/api/service/stagedstreamsync/default_stages.go @@ -65,6 +65,7 @@ func initFastSyncStagesOrder() { BlockBodies, Receipts, StateSync, + States, LastMile, Finish, } @@ -72,6 +73,7 @@ func initFastSyncStagesOrder() { StagesRevertOrder = RevertOrder{ Finish, LastMile, + States, StateSync, Receipts, BlockBodies, @@ -83,6 +85,7 @@ func initFastSyncStagesOrder() { StagesCleanUpOrder = CleanUpOrder{ Finish, LastMile, + States, StateSync, Receipts, BlockBodies, diff --git a/api/service/stagedstreamsync/receipt_download_manager.go b/api/service/stagedstreamsync/receipt_download_manager.go index 2bab10ade..55d949082 100644 --- a/api/service/stagedstreamsync/receipt_download_manager.go +++ b/api/service/stagedstreamsync/receipt_download_manager.go @@ -10,10 +10,15 @@ import ( ) type ReceiptDownloadDetails struct { - loopID int streamID sttypes.StreamID } +type Received struct { + streamID sttypes.StreamID + block *types.Block + receipts types.Receipts +} + // receiptDownloadManager is the helper structure for get receipts request management type receiptDownloadManager struct { chain blockChain @@ -25,6 +30,8 @@ type receiptDownloadManager struct { retries *prioritizedNumbers // requests where error happens rdd map[uint64]ReceiptDownloadDetails // details about how this receipt was downloaded + received map[uint64]Received + logger zerolog.Logger lock sync.Mutex } @@ -38,25 +45,27 @@ func newReceiptDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, lo processing: make(map[uint64]struct{}), retries: newPrioritizedNumbers(), rdd: make(map[uint64]ReceiptDownloadDetails), - logger: logger, + received: make(map[uint64]Received), + + logger: logger, } } // GetNextBatch get the next receipt numbers batch -func (rdm *receiptDownloadManager) GetNextBatch() []uint64 { +func (rdm *receiptDownloadManager) GetNextBatch(curHeight uint64) []uint64 { rdm.lock.Lock() defer rdm.lock.Unlock() cap := ReceiptsPerRequest - bns := rdm.getBatchFromRetries(cap) + bns := rdm.getBatchFromRetries(cap, curHeight) if len(bns) > 0 { cap -= len(bns) rdm.addBatchToRequesting(bns) } if rdm.availableForMoreTasks() { - addBNs := rdm.getBatchFromUnprocessed(cap) + addBNs := rdm.getBatchFromUnprocessed(cap, curHeight) rdm.addBatchToRequesting(addBNs) bns = append(bns, addBNs...) } @@ -65,7 +74,7 @@ func (rdm *receiptDownloadManager) GetNextBatch() []uint64 { } // HandleRequestError handles the error result -func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, streamID sttypes.StreamID) { +func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error) { rdm.lock.Lock() defer rdm.lock.Unlock() @@ -77,33 +86,35 @@ func (rdm *receiptDownloadManager) HandleRequestError(bns []uint64, err error, s } // HandleRequestResult handles get receipts result -func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receipts []types.Receipts, loopID int, streamID sttypes.StreamID) error { +func (rdm *receiptDownloadManager) HandleRequestResult(bns []uint64, receivedReceipts []types.Receipts, receivedBlocks []*types.Block, streamID sttypes.StreamID) error { rdm.lock.Lock() defer rdm.lock.Unlock() for i, bn := range bns { delete(rdm.requesting, bn) - if indexExists(receipts, i) { + if !indexExists(receivedBlocks, i) || !indexExists(receivedReceipts, i) { rdm.retries.push(bn) } else { rdm.processing[bn] = struct{}{} rdm.rdd[bn] = ReceiptDownloadDetails{ - loopID: loopID, streamID: streamID, } + rdm.received[bn] = Received{ + block: receivedBlocks[i], + receipts: receivedReceipts[i], + } } } return nil } // SetDownloadDetails sets the download details for a batch of blocks -func (rdm *receiptDownloadManager) SetDownloadDetails(bns []uint64, loopID int, streamID sttypes.StreamID) error { +func (rdm *receiptDownloadManager) SetDownloadDetails(bns []uint64, streamID sttypes.StreamID) error { rdm.lock.Lock() defer rdm.lock.Unlock() for _, bn := range bns { rdm.rdd[bn] = ReceiptDownloadDetails{ - loopID: loopID, streamID: streamID, } } @@ -111,25 +122,24 @@ func (rdm *receiptDownloadManager) SetDownloadDetails(bns []uint64, loopID int, } // GetDownloadDetails returns the download details for a certain block number -func (rdm *receiptDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) { +func (rdm *receiptDownloadManager) GetDownloadDetails(blockNumber uint64) (streamID sttypes.StreamID) { rdm.lock.Lock() defer rdm.lock.Unlock() - return rdm.rdd[blockNumber].loopID, rdm.rdd[blockNumber].streamID + return rdm.rdd[blockNumber].streamID } // getBatchFromRetries get the receipt number batch to be requested from retries. -func (rdm *receiptDownloadManager) getBatchFromRetries(cap int) []uint64 { +func (rdm *receiptDownloadManager) getBatchFromRetries(cap int, fromBlockNumber uint64) []uint64 { var ( requestBNs []uint64 - curHeight = rdm.chain.CurrentBlock().NumberU64() ) for cnt := 0; cnt < cap; cnt++ { bn := rdm.retries.pop() if bn == 0 { break // no more retries } - if bn <= curHeight { + if bn <= fromBlockNumber { continue } requestBNs = append(requestBNs, bn) @@ -138,10 +148,9 @@ func (rdm *receiptDownloadManager) getBatchFromRetries(cap int) []uint64 { } // getBatchFromUnprocessed returns a batch of receipt numbers to be requested from unprocessed. -func (rdm *receiptDownloadManager) getBatchFromUnprocessed(cap int) []uint64 { +func (rdm *receiptDownloadManager) getBatchFromUnprocessed(cap int, curHeight uint64) []uint64 { var ( requestBNs []uint64 - curHeight = rdm.chain.CurrentBlock().NumberU64() ) bn := curHeight + 1 // TODO: this algorithm can be potentially optimized. diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go index 8cce5207c..9fdf4681a 100644 --- a/api/service/stagedstreamsync/stage_bodies.go +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -70,7 +70,7 @@ func (b *StageBodies) Exec(ctx context.Context, firstCycle bool, invalidBlockRev } maxHeight := s.state.status.targetBN - currentHead := b.configs.bc.CurrentBlock().NumberU64() + currentHead := s.state.CurrentBlockNumber() if currentHead >= maxHeight { return nil } @@ -138,7 +138,7 @@ func (b *StageBodies) Exec(ctx context.Context, firstCycle bool, invalidBlockRev // runBlockWorkerLoop creates a work loop for download blocks func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, startTime time.Time) { - currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) + currentBlock := int(s.state.CurrentBlockNumber()) defer wg.Done() @@ -148,7 +148,8 @@ func (b *StageBodies) runBlockWorkerLoop(ctx context.Context, gbm *blockDownload return default: } - batch := gbm.GetNextBatch() + curHeight := s.state.CurrentBlockNumber() + batch := gbm.GetNextBatch(curHeight) if len(batch) == 0 { select { case <-ctx.Done(): @@ -434,7 +435,7 @@ func (b *StageBodies) Revert(ctx context.Context, firstCycle bool, u *RevertStat defer tx.Rollback() } // save progress - currentHead := b.configs.bc.CurrentBlock().NumberU64() + currentHead := s.state.CurrentBlockNumber() if err = s.Update(tx, currentHead); err != nil { utils.Logger().Error(). Err(err). diff --git a/api/service/stagedstreamsync/stage_heads.go b/api/service/stagedstreamsync/stage_heads.go index c917884a3..99e0248ba 100644 --- a/api/service/stagedstreamsync/stage_heads.go +++ b/api/service/stagedstreamsync/stage_heads.go @@ -53,7 +53,7 @@ func (heads *StageHeads) Exec(ctx context.Context, firstCycle bool, invalidBlock maxHeight := s.state.status.targetBN maxBlocksPerSyncCycle := uint64(1024) // TODO: should be in config -> s.state.MaxBlocksPerSyncCycle - currentHeight := heads.configs.bc.CurrentBlock().NumberU64() + currentHeight := s.state.CurrentBlockNumber() s.state.currentCycle.TargetHeight = maxHeight targetHeight := uint64(0) if errV := CreateView(ctx, heads.configs.db, tx, func(etx kv.Tx) (err error) { @@ -89,6 +89,15 @@ func (heads *StageHeads) Exec(ctx context.Context, firstCycle bool, invalidBlock targetHeight = currentHeight + maxBlocksPerSyncCycle } + // check pivot: if chain hasn't reached to pivot yet + if s.state.status.pivotBlock != nil && s.state.CurrentBlockNumber() < s.state.status.pivotBlock.NumberU64() { + // set target height on the block before pivot + // pivot block would be downloaded by StateSync stage + if targetHeight >= s.state.status.pivotBlock.NumberU64() { + targetHeight = s.state.status.pivotBlock.NumberU64() - 1 + } + } + s.state.currentCycle.TargetHeight = targetHeight if err := s.Update(tx, targetHeight); err != nil { diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index fb0af99bc..0a2d8ab02 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -51,6 +51,11 @@ func NewStageReceiptsCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, con // Exec progresses receipts stage in the forward direction func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + // only execute this stage in fast/snap sync mode + if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() >= s.state.status.pivotBlock.NumberU64() { + return nil + } + useInternalTx := tx == nil if invalidBlockRevert { @@ -63,7 +68,7 @@ func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR } maxHeight := s.state.status.targetBN - currentHead := r.configs.bc.CurrentBlock().NumberU64() + currentHead := s.state.CurrentBlockNumber() if currentHead >= maxHeight { return nil } @@ -91,21 +96,6 @@ func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR startTime := time.Now() // startBlock := currProgress - // prepare db transactions - txs := make([]kv.RwTx, r.configs.concurrency) - for i := 0; i < r.configs.concurrency; i++ { - txs[i], err = r.configs.blockDBs[i].BeginRw(ctx) - if err != nil { - return err - } - } - - defer func() { - for i := 0; i < r.configs.concurrency; i++ { - txs[i].Rollback() - } - }() - if r.configs.logProgress { fmt.Print("\033[s") // save the cursor position } @@ -119,19 +109,53 @@ func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR defer tx.Rollback() } - // Fetch blocks from neighbors - s.state.rdm = newReceiptDownloadManager(tx, r.configs.bc, targetHeight, s.state.logger) + for { + // check if there is no any more to download break the loop + curBn := s.state.CurrentBlockNumber() + if curBn == targetHeight { + break + } - // Setup workers to fetch blocks from remote node - var wg sync.WaitGroup + // calculate the block numbers range to download + toBn := curBn + uint64(ReceiptsPerRequest*s.state.config.Concurrency) + if toBn > targetHeight { + toBn = targetHeight + } - for i := 0; i != s.state.config.Concurrency; i++ { - wg.Add(1) - go r.runReceiptWorkerLoop(ctx, s.state.rdm, &wg, i, s, txs, startTime) + // Fetch receipts from connected peers + rdm := newReceiptDownloadManager(tx, r.configs.bc, toBn, s.state.logger) + + // Setup workers to fetch blocks from remote node + var wg sync.WaitGroup + + for i := 0; i < s.state.config.Concurrency; i++ { + wg.Add(1) + go func() { + // prepare db transactions + txs := make([]kv.RwTx, r.configs.concurrency) + for i := 0; i < r.configs.concurrency; i++ { + txs[i], err = r.configs.blockDBs[i].BeginRw(ctx) + if err != nil { + return + } + } + // rollback the transactions after worker loop + defer func() { + for i := 0; i < r.configs.concurrency; i++ { + txs[i].Rollback() + } + }() + + r.runReceiptWorkerLoop(ctx, rdm, &wg, s, txs, startTime) + }() + } + wg.Wait() + // insert all downloaded blocks and receipts to chain + if err := r.insertBlocksAndReceipts(ctx, rdm, toBn, s); err != nil { + utils.Logger().Err(err).Msg(WrapStagedSyncMsg("InsertReceiptChain failed")) + } } - wg.Wait() - if useInternalTx { if err := tx.Commit(); err != nil { return err @@ -141,10 +165,52 @@ func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockR return nil } +func (r *StageReceipts) insertBlocksAndReceipts(ctx context.Context, rdm *receiptDownloadManager, toBn uint64, s *StageState) error { + if len(rdm.received) == 0 { + return nil + } + var ( + bns []uint64 + blocks []*types.Block + receipts []types.Receipts + streamIDs []sttypes.StreamID + ) + // populate blocks and receipts in separate array + // this way helps to sort blocks and receipts by block number + for bn := s.state.CurrentBlockNumber() + 1; bn <= toBn; bn++ { + if received, ok := rdm.received[bn]; !ok { + return errors.New("some blocks are missing") + } else { + bns = append(bns, bn) + blocks = append(blocks, received.block) + receipts = append(receipts, received.receipts) + streamIDs = append(streamIDs, received.streamID) + } + } + // insert sorted blocks and receipts to chain + if inserted, err := r.configs.bc.InsertReceiptChain(blocks, receipts); err != nil { + utils.Logger().Err(err). + Interface("streams", streamIDs). + Interface("block numbers", bns). + Msg(WrapStagedSyncMsg("InsertReceiptChain failed")) + rdm.HandleRequestError(bns, err) + return fmt.Errorf("InsertReceiptChain failed: %s", err.Error()) + } else { + if inserted != len(blocks) { + utils.Logger().Warn(). + Interface("block numbers", bns). + Int("inserted", inserted). + Int("blocks to insert", len(blocks)). + Msg(WrapStagedSyncMsg("InsertReceiptChain couldn't insert all downloaded blocks/receipts")) + } + } + return nil +} + // runReceiptWorkerLoop creates a work loop for download receipts -func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, loopID int, s *StageState, txs []kv.RwTx, startTime time.Time) { +func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDownloadManager, wg *sync.WaitGroup, s *StageState, txs []kv.RwTx, startTime time.Time) { - currentBlock := int(r.configs.bc.CurrentBlock().NumberU64()) + currentBlock := int(s.state.CurrentBlockNumber()) gbm := s.state.gbm defer wg.Done() @@ -156,7 +222,8 @@ func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo default: } // get next batch of block numbers - batch := rdm.GetNextBatch() + curHeight := s.state.CurrentBlockNumber() + batch := rdm.GetNextBatch(curHeight) if len(batch) == 0 { select { case <-ctx.Done(): @@ -168,6 +235,7 @@ func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo // retrieve corresponding blocks from cache db var hashes []common.Hash var blocks []*types.Block + for _, bn := range batch { blkKey := marshalData(bn) loopID, _ := gbm.GetDownloadDetails(bn) @@ -197,7 +265,8 @@ func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo return } // receiptHash := s.state.currentCycle.ReceiptHashes[bn] - hashes = append(hashes, block.Header().ReceiptHash()) + gbm.SetRootHash(bn, block.Header().Root()) + hashes = append(hashes, block.Header().Hash()) blocks = append(blocks, block) } @@ -213,34 +282,10 @@ func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo Interface("block numbers", batch). Msg(WrapStagedSyncMsg("downloadRawBlocks failed")) err = errors.Wrap(err, "request error") - rdm.HandleRequestError(batch, err, stid) - } else if receipts == nil || len(receipts) == 0 { - utils.Logger().Warn(). - Str("stream", string(stid)). - Interface("block numbers", batch). - Msg(WrapStagedSyncMsg("downloadRawBlocks failed, received empty reciptBytes")) - err := errors.New("downloadRawBlocks received empty reciptBytes") - rdm.HandleRequestError(batch, err, stid) + rdm.HandleRequestError(batch, err) } else { - // insert block and receipts to chain - if inserted, err := r.configs.bc.InsertReceiptChain(blocks, receipts); err != nil { - utils.Logger().Err(err). - Str("stream", string(stid)). - Interface("block numbers", batch). - Msg(WrapStagedSyncMsg("InsertReceiptChain failed")) - err := errors.New("InsertReceiptChain failed") - rdm.HandleRequestError(batch, err, stid) - } else { - if inserted != len(blocks) { - utils.Logger().Warn(). - Interface("block numbers", batch). - Int("inserted", inserted). - Int("blocks to insert", len(blocks)). - Msg(WrapStagedSyncMsg("InsertReceiptChain couldn't insert all downloaded blocks/receipts")) - } - } // handle request result - rdm.HandleRequestResult(batch, receipts, loopID, stid) + rdm.HandleRequestResult(batch, receipts, blocks, stid) // log progress if r.configs.logProgress { //calculating block download speed diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index 1294edf70..6c82a69c1 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -19,12 +19,12 @@ type StageStates struct { configs StageStatesCfg } type StageStatesCfg struct { - bc core.BlockChain - db kv.RwDB - blockDBs []kv.RwDB - concurrency int - logger zerolog.Logger - logProgress bool + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + logger zerolog.Logger + logProgress bool } func NewStageStates(cfg StageStatesCfg) *StageStates { @@ -42,12 +42,12 @@ func NewStageStatesCfg( logProgress bool) StageStatesCfg { return StageStatesCfg{ - bc: bc, - db: db, - blockDBs: blockDBs, - concurrency: concurrency, - logger: logger, - logProgress: logProgress, + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + logger: logger, + logProgress: logProgress, } } diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 75326b6ac..654171df4 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -55,17 +55,22 @@ func NewStageStateSyncCfg(bc core.BlockChain, // Exec progresses States stage in the forward direction func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + // only execute this stage in fast/snap sync mode and once we reach to pivot + if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64()-1 { + return nil + } + // for short range sync, skip this step if !s.state.initSync { return nil } maxHeight := s.state.status.targetBN - currentHead := sss.configs.bc.CurrentBlock().NumberU64() + currentHead := s.state.CurrentBlockNumber() if currentHead >= maxHeight { return nil } - currProgress := sss.configs.bc.CurrentBlock().NumberU64() + currProgress := s.state.CurrentBlockNumber() targetHeight := s.state.currentCycle.TargetHeight if errV := CreateView(ctx, sss.configs.db, tx, func(etx kv.Tx) error { @@ -97,20 +102,38 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo fmt.Print("\033[s") // save the cursor position } - // Fetch blocks from neighbors - root := sss.configs.bc.CurrentBlock().Root() - sdm := newStateDownloadManager(tx, sss.configs.bc, root, sss.configs.concurrency, s.state.logger) - - // Setup workers to fetch blocks from remote node + // Fetch states from neighbors + pivotRootHash := s.state.status.pivotBlock.Root() + sdm := newStateDownloadManager(tx, sss.configs.bc, sss.configs.concurrency, s.state.logger) + sdm.setRootHash(pivotRootHash) var wg sync.WaitGroup - - for i := 0; i != s.state.config.Concurrency; i++ { + for i := 0; i < s.state.config.Concurrency; i++ { wg.Add(1) - go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime) + go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime, s) } - wg.Wait() + /* + gbm := s.state.gbm + + // Setup workers to fetch states from remote node + var wg sync.WaitGroup + curHeight := s.state.CurrentBlockNumber() + + for bn := curHeight + 1; bn <= gbm.targetBN; bn++ { + root := gbm.GetRootHash(bn) + if root == emptyHash { + continue + } + sdm.setRootHash(root) + for i := 0; i < s.state.config.Concurrency; i++ { + wg.Add(1) + go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime, s) + } + wg.Wait() + } + */ + if useInternalTx { if err := tx.Commit(); err != nil { return err @@ -121,7 +144,8 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo } // runStateWorkerLoop creates a work loop for download states -func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { +func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time, s *StageState) { + defer wg.Done() for { @@ -130,8 +154,8 @@ func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDow return default: } - nodes, paths, codes := sdm.GetNextBatch() - if len(nodes)+len(codes) == 0 { + nodes, paths, codes, err := sdm.GetNextBatch() + if len(nodes)+len(codes) == 0 || err != nil { select { case <-ctx.Done(): return @@ -139,10 +163,9 @@ func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDow return } } - data, stid, err := sss.downloadStates(ctx, nodes, codes) if err != nil { - if !errors.Is(err, context.Canceled) { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { sss.configs.protocol.StreamFailed(stid, "downloadStates failed") } utils.Logger().Error(). @@ -157,19 +180,20 @@ func (sss *StageStateSync) runStateWorkerLoop(ctx context.Context, sdm *StateDow Msg(WrapStagedSyncMsg("downloadStates failed, received empty data bytes")) err := errors.New("downloadStates received empty data bytes") sdm.HandleRequestError(codes, paths, stid, err) - } - sdm.HandleRequestResult(nodes, paths, data, loopID, stid) - if sss.configs.logProgress { - //calculating block download speed - dt := time.Now().Sub(startTime).Seconds() - speed := float64(0) - if dt > 0 { - speed = float64(len(data)) / dt + } else { + sdm.HandleRequestResult(nodes, paths, data, loopID, stid) + if sss.configs.logProgress { + //calculating block download speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(len(data)) / dt + } + stateDownloadSpeed := fmt.Sprintf("%.2f", speed) + + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("state download speed:", stateDownloadSpeed, "states/s") } - stateDownloadSpeed := fmt.Sprintf("%.2f", speed) - - fmt.Print("\033[u\033[K") // restore the cursor position and clear the line - fmt.Println("state download speed:", stateDownloadSpeed, "states/s") } } } @@ -216,7 +240,7 @@ func (stg *StageStateSync) saveProgress(s *StageState, tx kv.RwTx) (err error) { } // save progress - if err = s.Update(tx, stg.configs.bc.CurrentBlock().NumberU64()); err != nil { + if err = s.Update(tx, s.state.CurrentBlockNumber()); err != nil { utils.Logger().Error(). Err(err). Msgf("[STAGED_SYNC] saving progress for block States stage failed") diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index 7d94126b0..a4e04dff3 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -67,8 +67,7 @@ type StagedStreamSync struct { protocol syncProtocol isBeaconNode bool gbm *blockDownloadManager // initialized when finished get block number - rdm *receiptDownloadManager - lastMileBlocks []*types.Block // last mile blocks to catch up with the consensus + lastMileBlocks []*types.Block // last mile blocks to catch up with the consensus lastMileMux sync.Mutex inserted int config Config @@ -338,6 +337,18 @@ func (s *StagedStreamSync) doGetCurrentNumberRequest(ctx context.Context) (uint6 return bn, stid, nil } +// doGetBlockByNumberRequest returns block by its number and corresponding stream +func (s *StagedStreamSync) doGetBlockByNumberRequest(ctx context.Context, bn uint64) (*types.Block, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + blocks, stid, err := s.protocol.GetBlocksByNumber(ctx, []uint64{bn}, syncproto.WithHighPriority()) + if err != nil || len(blocks) != 1 { + return nil, stid, err + } + return blocks[0], stid, nil +} + // promLabels returns a prometheus labels for current shard id func (s *StagedStreamSync) promLabels() prometheus.Labels { sid := s.bc.ShardID() @@ -483,7 +494,6 @@ func (s *StagedStreamSync) runStage(ctx context.Context, stage *Stage, db kv.RwD if err != nil { return err } - if err = stage.Handler.Exec(ctx, firstCycle, invalidBlockRevert, stageState, s, tx); err != nil { utils.Logger().Error(). Err(err). diff --git a/api/service/stagedstreamsync/state_download_manager.go b/api/service/stagedstreamsync/state_download_manager.go index 80a758388..51eccb8ec 100644 --- a/api/service/stagedstreamsync/state_download_manager.go +++ b/api/service/stagedstreamsync/state_download_manager.go @@ -3,13 +3,16 @@ package stagedstreamsync import ( "fmt" "sync" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/rawdb" "github.com/harmony-one/harmony/core/state" + "github.com/harmony-one/harmony/internal/utils" sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" "github.com/rs/zerolog" @@ -22,6 +25,26 @@ type codeTask struct { attempts map[sttypes.StreamID]int } +// trieTask represents a single trie node download task, containing a set of +// peers already attempted retrieval from to detect stalled syncs and abort. +type trieTask struct { + hash common.Hash + path [][]byte + attempts map[sttypes.StreamID]int +} + +type task struct { + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash +} + +func newTask() *task { + return &task{ + trieTasks: make(map[string]*trieTask), + codeTasks: make(map[common.Hash]*codeTask), + } +} + func (t *task) addCodeTask(h common.Hash, ct *codeTask) { t.codeTasks[h] = &codeTask{ attempts: ct.attempts, @@ -29,7 +52,10 @@ func (t *task) addCodeTask(h common.Hash, ct *codeTask) { } func (t *task) getCodeTask(h common.Hash) *codeTask { - return t.codeTasks[h] + if task, ok := t.codeTasks[h]; ok { + return task + } + return nil } func (t *task) addNewCodeTask(h common.Hash) { @@ -39,15 +65,17 @@ func (t *task) addNewCodeTask(h common.Hash) { } func (t *task) deleteCodeTask(hash common.Hash) { - delete(t.codeTasks, hash) + if _, ok := t.codeTasks[hash]; ok { + delete(t.codeTasks, hash) + } } -// trieTask represents a single trie node download task, containing a set of -// peers already attempted retrieval from to detect stalled syncs and abort. -type trieTask struct { - hash common.Hash - path [][]byte - attempts map[sttypes.StreamID]int +func (t *task) deleteCodeTaskAttempts(h common.Hash, stID sttypes.StreamID) { + if task, ok := t.codeTasks[h]; ok { + if _, ok := task.attempts[stID]; ok { + delete(t.codeTasks[h].attempts, stID) + } + } } func (t *task) addTrieTask(path string, tt *trieTask) { @@ -59,7 +87,10 @@ func (t *task) addTrieTask(path string, tt *trieTask) { } func (t *task) getTrieTask(path string) *trieTask { - return t.trieTasks[path] + if task, ok := t.trieTasks[path]; ok { + return task + } + return nil } func (t *task) addNewTrieTask(hash common.Hash, path string) { @@ -71,18 +102,16 @@ func (t *task) addNewTrieTask(hash common.Hash, path string) { } func (t *task) deleteTrieTask(path string) { - delete(t.trieTasks, path) -} - -type task struct { - trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval, indexed by path - codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval, indexed by hash + if _, ok := t.trieTasks[path]; ok { + delete(t.trieTasks, path) + } } -func newTask() *task { - return &task{ - trieTasks: make(map[string]*trieTask), - codeTasks: make(map[common.Hash]*codeTask), +func (t *task) deleteTrieTaskAttempts(path string, stID sttypes.StreamID) { + if task, ok := t.trieTasks[path]; ok { + if _, ok := task.attempts[stID]; ok { + delete(t.trieTasks[path].attempts, stID) + } } } @@ -99,6 +128,9 @@ type StateDownloadManager struct { logger zerolog.Logger lock sync.Mutex + numUncommitted int + bytesUncommitted int + tasks *task requesting *task processing *task @@ -107,15 +139,12 @@ type StateDownloadManager struct { func newStateDownloadManager(tx kv.RwTx, bc core.BlockChain, - root common.Hash, concurrency int, logger zerolog.Logger) *StateDownloadManager { return &StateDownloadManager{ bc: bc, tx: tx, - root: root, - sched: state.NewStateSync(root, bc.ChainDb(), nil, rawdb.HashScheme), keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), concurrency: concurrency, logger: logger, @@ -126,9 +155,13 @@ func newStateDownloadManager(tx kv.RwTx, } } +func (s *StateDownloadManager) setRootHash(root common.Hash) { + s.root = root + s.sched = state.NewStateSync(root, s.bc.ChainDb(), nil, rawdb.HashScheme) +} + // fillTasks fills the tasks to send to the remote peer. func (s *StateDownloadManager) fillTasks(n int) error { - // Refill available tasks from the scheduler. if fill := n - (len(s.tasks.trieTasks) + len(s.tasks.codeTasks)); fill > 0 { paths, hashes, codes := s.sched.Missing(fill) for i, path := range paths { @@ -143,7 +176,7 @@ func (s *StateDownloadManager) fillTasks(n int) error { // getNextBatch returns objects with a maximum of n state download // tasks to send to the remote peer. -func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []string, codes []common.Hash) { +func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []string, codes []common.Hash, err error) { s.lock.Lock() defer s.lock.Unlock() @@ -154,13 +187,57 @@ func (s *StateDownloadManager) GetNextBatch() (nodes []common.Hash, paths []stri cap -= nItems if cap > 0 { + // Refill available tasks from the scheduler. + if s.sched.Pending() == 0 { + return + } + + if err = s.commit(false); err != nil { + return + } + + if err = s.fillTasks(cap); err != nil { + return + } newNodes, newPaths, newCodes := s.getBatchFromUnprocessed(cap) nodes = append(nodes, newNodes...) paths = append(paths, newPaths...) codes = append(codes, newCodes...) } + return +} - return nodes, paths, codes +func (s *StateDownloadManager) commit(force bool) error { + if !force && s.bytesUncommitted < ethdb.IdealBatchSize { + return nil + } + start := time.Now() + b := s.bc.ChainDb().NewBatch() + if err := s.sched.Commit(b); err != nil { + return err + } + if err := b.Write(); err != nil { + return fmt.Errorf("DB write error: %v", err) + } + s.updateStats(s.numUncommitted, 0, 0, time.Since(start)) + s.numUncommitted = 0 + s.bytesUncommitted = 0 + return nil +} + +// updateStats bumps the various state sync progress counters and displays a log +// message for the user to see. +func (s *StateDownloadManager) updateStats(written, duplicate, unexpected int, duration time.Duration) { + // TODO: here it updates the stats for total pending, processed, duplicates and unexpected + + // for now, we just jog current stats + if written > 0 || duplicate > 0 || unexpected > 0 { + utils.Logger().Info(). + Int("count", written). + Int("duplicate", duplicate). + Int("unexpected", unexpected). + Msg("Imported new state entries") + } } // getBatchFromUnprocessed returns objects with a maximum of n unprocessed state download @@ -194,11 +271,11 @@ func (s *StateDownloadManager) getBatchFromUnprocessed(n int) (nodes []common.Ha } // getBatchFromRetries get the block number batch to be requested from retries. -func (s *StateDownloadManager) getBatchFromRetries(n int) (nodes []common.Hash, paths []string, codes []common.Hash) { +func (s *StateDownloadManager) getBatchFromRetries(n int) ([]common.Hash, []string, []common.Hash) { // over trie nodes as those can be written to disk and forgotten about. - nodes = make([]common.Hash, 0, n) - paths = make([]string, 0, n) - codes = make([]common.Hash, 0, n) + nodes := make([]common.Hash, 0, n) + paths := make([]string, 0, n) + codes := make([]common.Hash, 0, n) for hash, t := range s.retries.codeTasks { // Stop when we've gathered enough requests @@ -229,14 +306,16 @@ func (s *StateDownloadManager) HandleRequestError(codeHashes []common.Hash, trie // add requested code hashes to retries for _, h := range codeHashes { - s.retries.addCodeTask(h, s.requesting.codeTasks[h]) - delete(s.requesting.codeTasks, h) + task := s.requesting.getCodeTask(h) + s.retries.addCodeTask(h, task) + s.requesting.deleteCodeTask(h) } // add requested trie paths to retries for _, path := range triePaths { - s.retries.addTrieTask(path, s.requesting.trieTasks[path]) - delete(s.requesting.trieTasks, path) + task := s.requesting.getTrieTask(path) + s.retries.addTrieTask(path, task) + s.requesting.deleteTrieTask(path) } } @@ -246,14 +325,14 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri defer s.lock.Unlock() // Collect processing stats and update progress if valid data was received - duplicate, unexpected, successful, numUncommitted, bytesUncommitted := 0, 0, 0, 0, 0 + duplicate, unexpected, successful := 0, 0, 0 for _, blob := range response { hash, err := s.processNodeData(codeHashes, triePaths, blob) switch err { case nil: - numUncommitted++ - bytesUncommitted += len(blob) + s.numUncommitted++ + s.bytesUncommitted += len(blob) successful++ case trie.ErrNotRequested: unexpected++ @@ -266,11 +345,16 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri for _, path := range triePaths { task := s.requesting.getTrieTask(path) + if task == nil { + // it is already removed from requesting + // either it has been completed and deleted by processNodeData or it does not exist + continue + } // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). if len(response) > 0 { //TODO: if timeout also do same - delete(s.requesting.trieTasks[path].attempts, streamID) + s.requesting.deleteTrieTaskAttempts(path, streamID) } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. @@ -283,11 +367,16 @@ func (s *StateDownloadManager) HandleRequestResult(codeHashes []common.Hash, tri for _, hash := range codeHashes { task := s.requesting.getCodeTask(hash) + if task == nil { + // it is already removed from requesting + // either it has been completed and deleted by processNodeData or it does not exist + continue + } // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). if len(response) > 0 { //TODO: if timeout also do same - delete(s.requesting.codeTasks[hash].attempts, streamID) //TODO: do we need delete attempts??? + s.requesting.deleteCodeTaskAttempts(hash, streamID) //TODO: do we need delete attempts??? } else if task.attempts[streamID] >= MaxTriesToFetchNodeData { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. @@ -325,6 +414,11 @@ func (s *StateDownloadManager) processNodeData(codeHashes []common.Hash, triePat } for _, path := range triePaths { task := s.requesting.getTrieTask(path) + if task == nil { + // this shouldn't happen while the path is given from triPaths and triPaths + // are given from requesting queue + continue + } if task.hash == hash { err := s.sched.ProcessNode(trie.NodeSyncResult{ Path: path, diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index b2c1aacdf..d2457a526 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -11,6 +11,8 @@ import ( "github.com/harmony-one/harmony/consensus" "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/rawdb" + "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/utils" sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" @@ -81,14 +83,12 @@ func CreateStagedSync(ctx context.Context, return nil, errInitDB } - blockExecution := config.SyncMode == FullSync extractReceiptHashes := config.SyncMode == FastSync || config.SyncMode == SnapSync - stageHeadsCfg := NewStageHeadersCfg(bc, mainDB) stageShortRangeCfg := NewStageShortRangeCfg(bc, mainDB) stageSyncEpochCfg := NewStageEpochCfg(bc, mainDB) stageBodiesCfg := NewStageBodiesCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, extractReceiptHashes, config.LogProgress) - stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, blockExecution, logger, config.LogProgress) + stageStatesCfg := NewStageStatesCfg(bc, mainDB, dbs, config.Concurrency, logger, config.LogProgress) stageStateSyncCfg := NewStageStateSyncCfg(bc, mainDB, config.Concurrency, protocol, logger, config.LogProgress) stageReceiptsCfg := NewStageReceiptsCfg(bc, mainDB, dbs, config.Concurrency, protocol, isBeaconNode, config.LogProgress) lastMileCfg := NewStageLastMileCfg(ctx, bc, mainDB) @@ -235,6 +235,60 @@ func (s *StagedStreamSync) Debug(source string, msg interface{}) { } } +func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint64) (uint64, error) { + + // do full sync if chain is at early stage + if estimatedHeight < MaxPivotDistanceToHead { + return 0, nil + } + + pivotBlockNumber := uint64(0) + if curPivot := rawdb.ReadLastPivotNumber(s.bc.ChainDb()); curPivot != nil { + // if head is behind pivot, that means it is still on fast/snap sync mode + if head := s.CurrentBlockNumber(); head < *curPivot { + pivotBlockNumber = *curPivot + // pivot could be moved forward if it is far from head + if pivotBlockNumber < estimatedHeight-MaxPivotDistanceToHead { + pivotBlockNumber = estimatedHeight - MinPivotDistanceToHead + if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { + s.logger.Error().Err(err). + Uint64("current pivot number", *curPivot). + Uint64("new pivot number", pivotBlockNumber). + Msg(WrapStagedSyncMsg("update pivot number failed")) + return pivotBlockNumber, err + } + } + } + } else { + pivot := estimatedHeight - MinPivotDistanceToHead + if s.config.SyncMode == FastSync && s.CurrentBlockNumber() < pivot { + pivotBlockNumber = pivot + if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { + s.logger.Error().Err(err). + Uint64("new pivot number", pivotBlockNumber). + Msg(WrapStagedSyncMsg("update pivot number failed")) + return pivotBlockNumber, err + } + } + } + if pivotBlockNumber > 0 { + if block, err := s.queryAllPeersForBlockByNumber(ctx, pivotBlockNumber); err != nil { + s.logger.Error().Err(err). + Uint64("pivot", pivotBlockNumber). + Msg(WrapStagedSyncMsg("query peers for pivot block failed")) + return pivotBlockNumber, err + } else { + s.status.pivotBlock = block + } + s.logger.Info(). + Uint64("estimatedHeight", estimatedHeight). + Uint64("pivot number", pivotBlockNumber). + Msg(WrapStagedSyncMsg("fast/snap sync mode, pivot is set successfully")) + } + + return pivotBlockNumber, nil +} + // doSync does the long range sync. // One LongRangeSync consists of several iterations. // For each iteration, estimate the current block number, then fetch block & insert to blockchain @@ -245,7 +299,6 @@ func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bo var totalInserted int s.initSync = initSync - if err := s.checkPrerequisites(); err != nil { return 0, 0, err } @@ -259,13 +312,20 @@ func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bo //TODO: use directly currentCycle var s.status.setTargetBN(estimatedHeight) } - if curBN := s.bc.CurrentBlock().NumberU64(); estimatedHeight <= curBN { + if curBN := s.CurrentBlockNumber(); estimatedHeight <= curBN { s.logger.Info().Uint64("current number", curBN).Uint64("target number", estimatedHeight). Msg(WrapStagedSyncMsg("early return of long range sync (chain is already ahead of target height)")) return estimatedHeight, 0, nil } } + // We are probably in full sync, but we might have rewound to before the + // fast/snap sync pivot, check if we should reenable + if _, err := s.checkPivot(downloaderContext, estimatedHeight); err != nil { + s.logger.Error().Err(err).Msg(WrapStagedSyncMsg("check pivot failed")) + return 0, 0, err + } + s.startSyncing() defer s.finishSyncing() @@ -336,7 +396,7 @@ func (s *StagedStreamSync) doSyncCycle(ctx context.Context) (int, error) { var totalInserted int s.inserted = 0 - startHead := s.bc.CurrentBlock().NumberU64() + startHead := s.CurrentBlockNumber() canRunCycleInOneTransaction := false var tx kv.RwTx @@ -400,6 +460,36 @@ func (s *StagedStreamSync) checkPrerequisites() error { return s.checkHaveEnoughStreams() } +func (s *StagedStreamSync) CurrentBlockNumber() uint64 { + // if current head is ahead of pivot block, return chain head regardless of sync mode + if s.status.pivotBlock != nil && s.bc.CurrentBlock().NumberU64() > s.status.pivotBlock.NumberU64() { + return s.bc.CurrentBlock().NumberU64() + } + + current := uint64(0) + switch s.config.SyncMode { + case FullSync: + current = s.bc.CurrentBlock().NumberU64() + case FastSync: + current = s.bc.CurrentFastBlock().NumberU64() + case SnapSync: + current = s.bc.CurrentHeader().Number().Uint64() + } + return current +} + +func (s *StagedStreamSync) stateSyncStage() bool { + switch s.config.SyncMode { + case FullSync: + return false + case FastSync: + return s.status.pivotBlock != nil && s.bc.CurrentFastBlock().NumberU64() == s.status.pivotBlock.NumberU64()-1 + case SnapSync: + return false + } + return false +} + // estimateCurrentNumber roughly estimates the current block number. // The block number does not need to be exact, but just a temporary target of the iteration func (s *StagedStreamSync) estimateCurrentNumber(ctx context.Context) (uint64, error) { @@ -439,3 +529,45 @@ func (s *StagedStreamSync) estimateCurrentNumber(ctx context.Context) (uint64, e bn := computeBlockNumberByMaxVote(cnResults) return bn, nil } + +// queryAllPeersForBlockByNumber queries all connected streams for a block by its number. +func (s *StagedStreamSync) queryAllPeersForBlockByNumber(ctx context.Context, bn uint64) (*types.Block, error) { + var ( + blkResults []*types.Block + lock sync.Mutex + wg sync.WaitGroup + ) + wg.Add(s.config.Concurrency) + for i := 0; i != s.config.Concurrency; i++ { + go func() { + defer wg.Done() + block, stid, err := s.doGetBlockByNumberRequest(ctx, bn) + if err != nil { + s.logger.Err(err).Str("streamID", string(stid)). + Msg(WrapStagedSyncMsg("getBlockByNumber request failed")) + if !errors.Is(err, context.Canceled) { + s.protocol.StreamFailed(stid, "getBlockByNumber request failed") + } + return + } + lock.Lock() + blkResults = append(blkResults, block) + lock.Unlock() + }() + } + wg.Wait() + + if len(blkResults) == 0 { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + return nil, ErrZeroBlockResponse + } + block, err := getBlockByMaxVote(blkResults) + if err != nil { + return nil, err + } + return block, nil +} diff --git a/api/service/stagedstreamsync/types.go b/api/service/stagedstreamsync/types.go index 6d6326452..17a3d345f 100644 --- a/api/service/stagedstreamsync/types.go +++ b/api/service/stagedstreamsync/types.go @@ -14,9 +14,10 @@ var ( ) type status struct { - isSyncing bool - targetBN uint64 - lock sync.Mutex + isSyncing bool + targetBN uint64 + pivotBlock *types.Block + lock sync.Mutex } func newStatus() status { From 6348128c482b2ecaaadeeb3fba386f001498868a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 4 Oct 2023 20:05:12 +0800 Subject: [PATCH 43/56] improve stage handling for create new instance of staged stream sync --- api/service/stagedstreamsync/staged_stream_sync.go | 6 +++--- api/service/stagedstreamsync/syncing.go | 13 +------------ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go index a4e04dff3..03340eb15 100644 --- a/api/service/stagedstreamsync/staged_stream_sync.go +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -266,7 +266,7 @@ func New( logger zerolog.Logger, ) *StagedStreamSync { - forwardStages := make([]*Stage, len(stagesList)) + forwardStages := make([]*Stage, len(StagesForwardOrder)) for i, stageIndex := range StagesForwardOrder { for _, s := range stagesList { if s.ID == stageIndex { @@ -276,7 +276,7 @@ func New( } } - revertStages := make([]*Stage, len(stagesList)) + revertStages := make([]*Stage, len(StagesRevertOrder)) for i, stageIndex := range StagesRevertOrder { for _, s := range stagesList { if s.ID == stageIndex { @@ -286,7 +286,7 @@ func New( } } - pruneStages := make([]*Stage, len(stagesList)) + pruneStages := make([]*Stage, len(StagesCleanUpOrder)) for i, stageIndex := range StagesCleanUpOrder { for _, s := range stagesList { if s.ID == stageIndex { diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index d2457a526..03043525b 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -118,22 +118,11 @@ func CreateStagedSync(ctx context.Context, Int("minStreams", config.MinStreams). Msg(WrapStagedSyncMsg("staged sync created successfully")) - var stages []*Stage - // if any of the default stages doesn't exist in forward order, delete it from the list of stages - for _, stg := range defaultStages { - for _, stageID := range StagesForwardOrder { - if stg.ID == stageID { - stages = append(stages, stg) - break - } - } - } - return New( bc, consensus, mainDB, - stages, + defaultStages, isBeaconNode, protocol, isBeaconNode, From c808f2b733f560fa821201e3de6235095662347b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:08:10 +0800 Subject: [PATCH 44/56] fix pivot block issue for write on chain --- api/service/stagedstreamsync/stage_state.go | 2 +- api/service/stagedstreamsync/stage_statesync.go | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index 6c82a69c1..80a3faa0e 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -55,7 +55,7 @@ func NewStageStatesCfg( func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { // only execute this stage in full sync mode if s.state.config.SyncMode != FullSync { - if s.state.status.pivotBlock != nil && s.state.bc.CurrentBlock().NumberU64() <= s.state.status.pivotBlock.NumberU64() { + if s.state.status.pivotBlock != nil && s.state.bc.CurrentBlock().NumberU64() < s.state.status.pivotBlock.NumberU64() { return nil } } diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 654171df4..130f7f71f 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -113,6 +113,16 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo } wg.Wait() + // insert block + if err := sss.configs.bc.WriteHeadBlock(s.state.status.pivotBlock); err != nil { + sss.configs.logger.Warn().Err(err). + Uint64("pivot block number", s.state.status.pivotBlock.NumberU64()). + Msg(WrapStagedSyncMsg("insert pivot block failed")) + s.state.Debug("StateSync/pivot/insert/error", err) + // TODO: panic("pivot block is failed to insert in chain.") + return err + } + /* gbm := s.state.gbm From bdd7f142c7f0f7f8235691c45e0d433c05a073d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:22:36 +0800 Subject: [PATCH 45/56] improve stream sync current cycle and pivot checks, fix edge case issue to insert pivot block and its receipts --- api/service/stagedstreamsync/stage_heads.go | 6 +-- .../stagedstreamsync/stage_receipts.go | 2 +- api/service/stagedstreamsync/stage_state.go | 6 +-- .../stagedstreamsync/stage_statesync.go | 5 ++- api/service/stagedstreamsync/syncing.go | 44 +++++++++++-------- api/service/stagedstreamsync/types.go | 10 +++-- 6 files changed, 41 insertions(+), 32 deletions(-) diff --git a/api/service/stagedstreamsync/stage_heads.go b/api/service/stagedstreamsync/stage_heads.go index 99e0248ba..46ebed1d2 100644 --- a/api/service/stagedstreamsync/stage_heads.go +++ b/api/service/stagedstreamsync/stage_heads.go @@ -90,11 +90,11 @@ func (heads *StageHeads) Exec(ctx context.Context, firstCycle bool, invalidBlock } // check pivot: if chain hasn't reached to pivot yet - if s.state.status.pivotBlock != nil && s.state.CurrentBlockNumber() < s.state.status.pivotBlock.NumberU64() { + if s.state.status.cycleSyncMode != FullSync && s.state.status.pivotBlock != nil { // set target height on the block before pivot // pivot block would be downloaded by StateSync stage - if targetHeight >= s.state.status.pivotBlock.NumberU64() { - targetHeight = s.state.status.pivotBlock.NumberU64() - 1 + if !s.state.status.statesSynced && targetHeight > s.state.status.pivotBlock.NumberU64() { + targetHeight = s.state.status.pivotBlock.NumberU64() } } diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index 0a2d8ab02..63f09f986 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -52,7 +52,7 @@ func NewStageReceiptsCfg(bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, con func (r *StageReceipts) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { // only execute this stage in fast/snap sync mode - if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() >= s.state.status.pivotBlock.NumberU64() { + if s.state.status.cycleSyncMode == FullSync { return nil } diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index 80a3faa0e..c477f4309 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -54,10 +54,8 @@ func NewStageStatesCfg( // Exec progresses States stage in the forward direction func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { // only execute this stage in full sync mode - if s.state.config.SyncMode != FullSync { - if s.state.status.pivotBlock != nil && s.state.bc.CurrentBlock().NumberU64() < s.state.status.pivotBlock.NumberU64() { - return nil - } + if s.state.status.cycleSyncMode != FullSync { + return nil } // for short range sync, skip this step diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 130f7f71f..1a973c13e 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -56,7 +56,7 @@ func NewStageStateSyncCfg(bc core.BlockChain, func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { // only execute this stage in fast/snap sync mode and once we reach to pivot - if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64()-1 { + if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64() { return nil } @@ -123,6 +123,9 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo return err } + // states should be fully synced in this stage + s.state.status.statesSynced = true + /* gbm := s.state.gbm diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 03043525b..88e0a0857 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -224,11 +224,16 @@ func (s *StagedStreamSync) Debug(source string, msg interface{}) { } } -func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint64) (uint64, error) { +// checkPivot checks pivot block and returns pivot block and cycle Sync mode +func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint64, initSync bool) (*types.Block, SyncMode, error) { + + if s.config.SyncMode == FullSync { + return nil, FullSync, nil + } // do full sync if chain is at early stage - if estimatedHeight < MaxPivotDistanceToHead { - return 0, nil + if initSync && estimatedHeight < MaxPivotDistanceToHead { + return nil, FullSync, nil } pivotBlockNumber := uint64(0) @@ -240,23 +245,21 @@ func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint6 if pivotBlockNumber < estimatedHeight-MaxPivotDistanceToHead { pivotBlockNumber = estimatedHeight - MinPivotDistanceToHead if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { - s.logger.Error().Err(err). + s.logger.Warn().Err(err). Uint64("current pivot number", *curPivot). Uint64("new pivot number", pivotBlockNumber). Msg(WrapStagedSyncMsg("update pivot number failed")) - return pivotBlockNumber, err + pivotBlockNumber = *curPivot } } } } else { - pivot := estimatedHeight - MinPivotDistanceToHead - if s.config.SyncMode == FastSync && s.CurrentBlockNumber() < pivot { - pivotBlockNumber = pivot + if head := s.CurrentBlockNumber(); s.config.SyncMode == FastSync && head <= 1 { + pivotBlockNumber = estimatedHeight - MinPivotDistanceToHead if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { - s.logger.Error().Err(err). + s.logger.Warn().Err(err). Uint64("new pivot number", pivotBlockNumber). Msg(WrapStagedSyncMsg("update pivot number failed")) - return pivotBlockNumber, err } } } @@ -265,17 +268,17 @@ func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint6 s.logger.Error().Err(err). Uint64("pivot", pivotBlockNumber). Msg(WrapStagedSyncMsg("query peers for pivot block failed")) - return pivotBlockNumber, err + return block, FastSync, err } else { s.status.pivotBlock = block + s.logger.Info(). + Uint64("estimatedHeight", estimatedHeight). + Uint64("pivot number", pivotBlockNumber). + Msg(WrapStagedSyncMsg("fast/snap sync mode, pivot is set successfully")) + return block, FastSync, nil } - s.logger.Info(). - Uint64("estimatedHeight", estimatedHeight). - Uint64("pivot number", pivotBlockNumber). - Msg(WrapStagedSyncMsg("fast/snap sync mode, pivot is set successfully")) } - - return pivotBlockNumber, nil + return nil, FullSync, nil } // doSync does the long range sync. @@ -310,9 +313,12 @@ func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bo // We are probably in full sync, but we might have rewound to before the // fast/snap sync pivot, check if we should reenable - if _, err := s.checkPivot(downloaderContext, estimatedHeight); err != nil { + if pivotBlock, cycleSyncMode, err := s.checkPivot(downloaderContext, estimatedHeight, initSync); err != nil { s.logger.Error().Err(err).Msg(WrapStagedSyncMsg("check pivot failed")) return 0, 0, err + } else { + s.status.cycleSyncMode = cycleSyncMode + s.status.pivotBlock = pivotBlock } s.startSyncing() @@ -451,7 +457,7 @@ func (s *StagedStreamSync) checkPrerequisites() error { func (s *StagedStreamSync) CurrentBlockNumber() uint64 { // if current head is ahead of pivot block, return chain head regardless of sync mode - if s.status.pivotBlock != nil && s.bc.CurrentBlock().NumberU64() > s.status.pivotBlock.NumberU64() { + if s.status.pivotBlock != nil && s.bc.CurrentBlock().NumberU64() >= s.status.pivotBlock.NumberU64() { return s.bc.CurrentBlock().NumberU64() } diff --git a/api/service/stagedstreamsync/types.go b/api/service/stagedstreamsync/types.go index 17a3d345f..e46b61429 100644 --- a/api/service/stagedstreamsync/types.go +++ b/api/service/stagedstreamsync/types.go @@ -14,10 +14,12 @@ var ( ) type status struct { - isSyncing bool - targetBN uint64 - pivotBlock *types.Block - lock sync.Mutex + isSyncing bool + targetBN uint64 + pivotBlock *types.Block + cycleSyncMode SyncMode + statesSynced bool + lock sync.Mutex } func newStatus() status { From 135c7da45506312b9a613ea09f8599cc079e114b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 24 Oct 2023 15:41:39 +0800 Subject: [PATCH 46/56] fix WriteHeadBlock, fix GetDownloadDetails index, improve fetching current block in a few stages, improve pivot calculation --- api/service/stagedstreamsync/block_manager.go | 8 ++- api/service/stagedstreamsync/const.go | 2 +- api/service/stagedstreamsync/stage_heads.go | 3 +- .../stagedstreamsync/stage_receipts.go | 9 +++- api/service/stagedstreamsync/stage_state.go | 9 ++-- .../stagedstreamsync/stage_statesync.go | 52 ++++++++++--------- api/service/stagedstreamsync/syncing.go | 18 ++++--- core/blockchain_impl.go | 25 +++++++++ core/rawdb/accessors_offchain.go | 2 +- 9 files changed, 85 insertions(+), 43 deletions(-) diff --git a/api/service/stagedstreamsync/block_manager.go b/api/service/stagedstreamsync/block_manager.go index 273078c59..f5ba8fdc4 100644 --- a/api/service/stagedstreamsync/block_manager.go +++ b/api/service/stagedstreamsync/block_manager.go @@ -1,6 +1,7 @@ package stagedstreamsync import ( + "fmt" "sync" "github.com/ethereum/go-ethereum/common" @@ -118,11 +119,14 @@ func (gbm *blockDownloadManager) SetDownloadDetails(bns []uint64, loopID int, st } // GetDownloadDetails returns the download details for a block -func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) { +func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID, err error) { gbm.lock.Lock() defer gbm.lock.Unlock() - return gbm.bdd[blockNumber].loopID, gbm.bdd[blockNumber].streamID + if dm, exist := gbm.bdd[blockNumber]; exist { + return dm.loopID, dm.streamID, nil + } + return 0, sttypes.StreamID(0), fmt.Errorf("there is no download details for the block number: %d", blockNumber) } // SetRootHash sets the root hash for a specific block diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go index e172854ec..2789bfb1e 100644 --- a/api/service/stagedstreamsync/const.go +++ b/api/service/stagedstreamsync/const.go @@ -40,7 +40,7 @@ const ( ShortRangeTimeout time.Duration = 1 * time.Minute // pivot block distance ranges - MinPivotDistanceToHead uint64 = 1028 + MinPivotDistanceToHead uint64 = 1024 MaxPivotDistanceToHead uint64 = 2048 ) diff --git a/api/service/stagedstreamsync/stage_heads.go b/api/service/stagedstreamsync/stage_heads.go index 46ebed1d2..bf0721aad 100644 --- a/api/service/stagedstreamsync/stage_heads.go +++ b/api/service/stagedstreamsync/stage_heads.go @@ -91,8 +91,7 @@ func (heads *StageHeads) Exec(ctx context.Context, firstCycle bool, invalidBlock // check pivot: if chain hasn't reached to pivot yet if s.state.status.cycleSyncMode != FullSync && s.state.status.pivotBlock != nil { - // set target height on the block before pivot - // pivot block would be downloaded by StateSync stage + // set target height on the pivot block if !s.state.status.statesSynced && targetHeight > s.state.status.pivotBlock.NumberU64() { targetHeight = s.state.status.pivotBlock.NumberU64() } diff --git a/api/service/stagedstreamsync/stage_receipts.go b/api/service/stagedstreamsync/stage_receipts.go index 63f09f986..4445eb6ba 100644 --- a/api/service/stagedstreamsync/stage_receipts.go +++ b/api/service/stagedstreamsync/stage_receipts.go @@ -238,7 +238,14 @@ func (r *StageReceipts) runReceiptWorkerLoop(ctx context.Context, rdm *receiptDo for _, bn := range batch { blkKey := marshalData(bn) - loopID, _ := gbm.GetDownloadDetails(bn) + loopID, _, errBDD := gbm.GetDownloadDetails(bn) + if errBDD != nil { + utils.Logger().Warn(). + Err(errBDD). + Interface("block numbers", bn). + Msg(WrapStagedSyncMsg("get block download details failed")) + return + } blockBytes, err := txs[loopID].GetOne(BlocksBucket, blkKey) if err != nil { return diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go index c477f4309..df864d63f 100644 --- a/api/service/stagedstreamsync/stage_state.go +++ b/api/service/stagedstreamsync/stage_state.go @@ -69,11 +69,11 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR } maxHeight := s.state.status.targetBN - currentHead := stg.configs.bc.CurrentBlock().NumberU64() + currentHead := s.state.CurrentBlockNumber() if currentHead >= maxHeight { return nil } - currProgress := stg.configs.bc.CurrentBlock().NumberU64() + currProgress := currentHead targetHeight := s.state.currentCycle.TargetHeight if currProgress >= targetHeight { return nil @@ -115,7 +115,10 @@ func (stg *StageStates) Exec(ctx context.Context, firstCycle bool, invalidBlockR for i := currProgress + 1; i <= targetHeight; i++ { blkKey := marshalData(i) - loopID, streamID := gbm.GetDownloadDetails(i) + loopID, streamID, errBDD := gbm.GetDownloadDetails(i) + if errBDD != nil { + return errBDD + } blockBytes, err := txs[loopID].GetOne(BlocksBucket, blkKey) if err != nil { diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 1a973c13e..081b3e8b9 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -55,36 +55,37 @@ func NewStageStateSyncCfg(bc core.BlockChain, // Exec progresses States stage in the forward direction func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { - // only execute this stage in fast/snap sync mode and once we reach to pivot - if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64() { - return nil - } - // for short range sync, skip this step if !s.state.initSync { return nil - } - - maxHeight := s.state.status.targetBN - currentHead := s.state.CurrentBlockNumber() - if currentHead >= maxHeight { - return nil - } - currProgress := s.state.CurrentBlockNumber() - targetHeight := s.state.currentCycle.TargetHeight - - if errV := CreateView(ctx, sss.configs.db, tx, func(etx kv.Tx) error { - if currProgress, err = s.CurrentStageProgress(etx); err != nil { - return err - } + } // only execute this stage in fast/snap sync mode and once we reach to pivot + + if s.state.status.pivotBlock == nil || + s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64() || + s.state.status.statesSynced { return nil - }); errV != nil { - return errV } - if currProgress >= targetHeight { - return nil - } + // maxHeight := s.state.status.targetBN + // currentHead := s.state.CurrentBlockNumber() + // if currentHead >= maxHeight { + // return nil + // } + // currProgress := s.state.CurrentBlockNumber() + // targetHeight := s.state.currentCycle.TargetHeight + + // if errV := CreateView(ctx, sss.configs.db, tx, func(etx kv.Tx) error { + // if currProgress, err = s.CurrentStageProgress(etx); err != nil { + // return err + // } + // return nil + // }); errV != nil { + // return errV + // } + + // if currProgress >= targetHeight { + // return nil + // } useInternalTx := tx == nil if useInternalTx { var err error @@ -104,8 +105,9 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo // Fetch states from neighbors pivotRootHash := s.state.status.pivotBlock.Root() + currentBlockRootHash := s.state.bc.CurrentFastBlock().Root() sdm := newStateDownloadManager(tx, sss.configs.bc, sss.configs.concurrency, s.state.logger) - sdm.setRootHash(pivotRootHash) + sdm.setRootHash(currentBlockRootHash) var wg sync.WaitGroup for i := 0; i < s.state.config.Concurrency; i++ { wg.Add(1) diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 88e0a0857..73f050080 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -237,20 +237,14 @@ func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint6 } pivotBlockNumber := uint64(0) - if curPivot := rawdb.ReadLastPivotNumber(s.bc.ChainDb()); curPivot != nil { + var curPivot *uint64 + if curPivot = rawdb.ReadLastPivotNumber(s.bc.ChainDb()); curPivot != nil { // if head is behind pivot, that means it is still on fast/snap sync mode if head := s.CurrentBlockNumber(); head < *curPivot { pivotBlockNumber = *curPivot // pivot could be moved forward if it is far from head if pivotBlockNumber < estimatedHeight-MaxPivotDistanceToHead { pivotBlockNumber = estimatedHeight - MinPivotDistanceToHead - if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { - s.logger.Warn().Err(err). - Uint64("current pivot number", *curPivot). - Uint64("new pivot number", pivotBlockNumber). - Msg(WrapStagedSyncMsg("update pivot number failed")) - pivotBlockNumber = *curPivot - } } } } else { @@ -270,6 +264,14 @@ func (s *StagedStreamSync) checkPivot(ctx context.Context, estimatedHeight uint6 Msg(WrapStagedSyncMsg("query peers for pivot block failed")) return block, FastSync, err } else { + if curPivot == nil || pivotBlockNumber != *curPivot { + if err := rawdb.WriteLastPivotNumber(s.bc.ChainDb(), pivotBlockNumber); err != nil { + s.logger.Warn().Err(err). + Uint64("new pivot number", pivotBlockNumber). + Msg(WrapStagedSyncMsg("update pivot number failed")) + return block, FastSync, err + } + } s.status.pivotBlock = block s.logger.Info(). Uint64("estimatedHeight", estimatedHeight). diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 97660544d..15527c3fe 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -852,6 +852,20 @@ func (bc *BlockChainImpl) writeHeadBlock(block *types.Block) error { if err := rawdb.WriteHeadBlockHash(batch, block.Hash()); err != nil { return err } + if err := rawdb.WriteHeadHeaderHash(batch, block.Hash()); err != nil { + return err + } + + isNewEpoch := block.IsLastBlockInEpoch() + if isNewEpoch { + epoch := block.Header().Epoch() + nextEpoch := epoch.Add(epoch, common.Big1) + if err := rawdb.WriteShardStateBytes(batch, nextEpoch, block.Header().ShardState()); err != nil { + utils.Logger().Error().Err(err).Msg("failed to store shard state") + return err + } + } + if err := batch.Write(); err != nil { return err } @@ -1328,6 +1342,17 @@ func (bc *BlockChainImpl) InsertReceiptChain(blockChain types.Blocks, receiptCha return 0, err } + isNewEpoch := block.IsLastBlockInEpoch() + if isNewEpoch { + epoch := block.Header().Epoch() + nextEpoch := epoch.Add(epoch, common.Big1) + err := rawdb.WriteShardStateBytes(batch, nextEpoch, block.Header().ShardState()) + if err != nil { + utils.Logger().Error().Err(err).Msg("failed to store shard state") + return 0, err + } + } + stats.processed++ if batch.ValueSize() >= ethdb.IdealBatchSize { diff --git a/core/rawdb/accessors_offchain.go b/core/rawdb/accessors_offchain.go index 4808c8c23..05a2321a2 100644 --- a/core/rawdb/accessors_offchain.go +++ b/core/rawdb/accessors_offchain.go @@ -22,7 +22,7 @@ func ReadShardState( data, err := db.Get(shardStateKey(epoch)) if err != nil { return nil, errors.Errorf( - MsgNoShardStateFromDB, "epoch: %d", epoch, + MsgNoShardStateFromDB, "epoch: %d", epoch.Uint64(), ) } ss, err2 := shard.DecodeWrapper(data) From 3fcfad4531e63f32c08094787ef955b847fd809d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:26:47 +0800 Subject: [PATCH 47/56] fix rebase conflicts --- api/service/stagedstreamsync/block_manager.go | 2 +- .../stagedstreamsync/stage_statesync.go | 2 +- core/blockchain.go | 2 +- p2p/stream/protocols/sync/stream_test.go | 21 ------------------- 4 files changed, 3 insertions(+), 24 deletions(-) diff --git a/api/service/stagedstreamsync/block_manager.go b/api/service/stagedstreamsync/block_manager.go index f5ba8fdc4..d614d2420 100644 --- a/api/service/stagedstreamsync/block_manager.go +++ b/api/service/stagedstreamsync/block_manager.go @@ -126,7 +126,7 @@ func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID if dm, exist := gbm.bdd[blockNumber]; exist { return dm.loopID, dm.streamID, nil } - return 0, sttypes.StreamID(0), fmt.Errorf("there is no download details for the block number: %d", blockNumber) + return 0, sttypes.StreamID(fmt.Sprint(0)), fmt.Errorf("there is no download details for the block number: %d", blockNumber) } // SetRootHash sets the root hash for a specific block diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 081b3e8b9..086d0fb41 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -104,7 +104,7 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo } // Fetch states from neighbors - pivotRootHash := s.state.status.pivotBlock.Root() + // pivotRootHash := s.state.status.pivotBlock.Root() currentBlockRootHash := s.state.bc.CurrentFastBlock().Root() sdm := newStateDownloadManager(tx, sss.configs.bc, sss.configs.concurrency, s.state.logger) sdm.setRootHash(currentBlockRootHash) diff --git a/core/blockchain.go b/core/blockchain.go index f6f50e71f..1f7233f42 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -130,7 +130,7 @@ type BlockChain interface { // transaction and receipt data. InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts) (int, error) // LeaderRotationMeta returns the number of continuous blocks by the leader. - LeaderRotationMeta() (publicKeyBytes []byte, epoch, count, shifts uint64, err error) + LeaderRotationMeta() LeaderRotationMeta // BadBlocks returns a list of the last 'bad blocks' that // the client has seen on the network. BadBlocks() []BadBlock diff --git a/p2p/stream/protocols/sync/stream_test.go b/p2p/stream/protocols/sync/stream_test.go index 9511de2ce..3b538c14b 100644 --- a/p2p/stream/protocols/sync/stream_test.go +++ b/p2p/stream/protocols/sync/stream_test.go @@ -296,27 +296,6 @@ func TestSyncStream_HandleGetTrieNodes(t *testing.T) { } } -func TestSyncStream_HandleGetNodeData(t *testing.T) { - st, remoteSt := makeTestSyncStream() - - go st.run() - defer close(st.closeC) - - req := testGetNodeDataRequestMsg - b, _ := protobuf.Marshal(req) - err := remoteSt.WriteBytes(b) - if err != nil { - t.Fatal(err) - } - - time.Sleep(200 * time.Millisecond) - receivedBytes, _ := remoteSt.ReadBytes() - - if err := checkGetNodeDataResult(receivedBytes, testGetNodeData); err != nil { - t.Fatal(err) - } -} - func makeTestSyncStream() (*syncStream, *testRemoteBaseStream) { localRaw, remoteRaw := makePairP2PStreams() remote := newTestRemoteBaseStream(remoteRaw) From 99928257d092f7c0786aea0abdae6b9c131c0040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Wed, 15 Nov 2023 01:14:17 +0800 Subject: [PATCH 48/56] add state sync --- api/service/stagedstreamsync/range.go | 84 + api/service/stagedstreamsync/satate_sync.go | 2013 +++++++++++++++++++ 2 files changed, 2097 insertions(+) create mode 100644 api/service/stagedstreamsync/range.go create mode 100644 api/service/stagedstreamsync/satate_sync.go diff --git a/api/service/stagedstreamsync/range.go b/api/service/stagedstreamsync/range.go new file mode 100644 index 000000000..de18b02ab --- /dev/null +++ b/api/service/stagedstreamsync/range.go @@ -0,0 +1,84 @@ +// Copyright 2021 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stagedstreamsync + +import ( + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/holiman/uint256" +) + +// hashSpace is the total size of the 256 bit hash space for accounts. +var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) + +// hashRange is a utility to handle ranges of hashes, Split up the +// hash-space into sections, and 'walk' over the sections +type hashRange struct { + current *uint256.Int + step *uint256.Int +} + +// newHashRange creates a new hashRange, initiated at the start position, +// and with the step set to fill the desired 'num' chunks +func newHashRange(start common.Hash, num uint64) *hashRange { + left := new(big.Int).Sub(hashSpace, start.Big()) + step := new(big.Int).Div( + new(big.Int).Add(left, new(big.Int).SetUint64(num-1)), + new(big.Int).SetUint64(num), + ) + step256 := new(uint256.Int) + step256.SetFromBig(step) + + return &hashRange{ + current: new(uint256.Int).SetBytes32(start[:]), + step: step256, + } +} + +// Next pushes the hash range to the next interval. +func (r *hashRange) Next() bool { + next, overflow := new(uint256.Int).AddOverflow(r.current, r.step) + if overflow { + return false + } + r.current = next + return true +} + +// Start returns the first hash in the current interval. +func (r *hashRange) Start() common.Hash { + return r.current.Bytes32() +} + +// End returns the last hash in the current interval. +func (r *hashRange) End() common.Hash { + // If the end overflows (non divisible range), return a shorter interval + next, overflow := new(uint256.Int).AddOverflow(r.current, r.step) + if overflow { + return common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") + } + return next.SubUint64(next, 1).Bytes32() +} + +// incHash returns the next hash, in lexicographical order (a.k.a plus one) +func incHash(h common.Hash) common.Hash { + var a uint256.Int + a.SetBytes32(h[:]) + a.AddUint64(&a, 1) + return common.Hash(a.Bytes32()) +} \ No newline at end of file diff --git a/api/service/stagedstreamsync/satate_sync.go b/api/service/stagedstreamsync/satate_sync.go new file mode 100644 index 000000000..e90640a9a --- /dev/null +++ b/api/service/stagedstreamsync/satate_sync.go @@ -0,0 +1,2013 @@ +package stagedstreamsync + +import ( + "bytes" + "encoding/json" + gomath "math" + "math/big" + "math/rand" + "sort" + "sync" + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/harmony-one/harmony/common/math" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/rawdb" + "github.com/harmony-one/harmony/core/state" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/log/v3" + "github.com/pkg/errors" + "github.com/rs/zerolog" + "golang.org/x/crypto/sha3" + // "github.com/ethereum/go-ethereum/eth/protocols/snap/range" +) + +const ( + // minRequestSize is the minimum number of bytes to request from a remote peer. + // This number is used as the low cap for account and storage range requests. + // Bytecode and trienode are limited inherently by item count (1). + minRequestSize = 64 * 1024 + + // maxRequestSize is the maximum number of bytes to request from a remote peer. + // This number is used as the high cap for account and storage range requests. + // Bytecode and trienode are limited more explicitly by the caps below. + maxRequestSize = 512 * 1024 + + // maxCodeRequestCount is the maximum number of bytecode blobs to request in a + // single query. If this number is too low, we're not filling responses fully + // and waste round trip times. If it's too high, we're capping responses and + // waste bandwidth. + // + // Deployed bytecodes are currently capped at 24KB, so the minimum request + // size should be maxRequestSize / 24K. Assuming that most contracts do not + // come close to that, requesting 4x should be a good approximation. + maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 + + // maxTrieRequestCount is the maximum number of trie node blobs to request in + // a single query. If this number is too low, we're not filling responses fully + // and waste round trip times. If it's too high, we're capping responses and + // waste bandwidth. + maxTrieRequestCount = maxRequestSize / 512 + + // trienodeHealRateMeasurementImpact is the impact a single measurement has on + // the local node's trienode processing capacity. A value closer to 0 reacts + // slower to sudden changes, but it is also more stable against temporary hiccups. + trienodeHealRateMeasurementImpact = 0.005 + + // minTrienodeHealThrottle is the minimum divisor for throttling trie node + // heal requests to avoid overloading the local node and excessively expanding + // the state trie breadth wise. + minTrienodeHealThrottle = 1 + + // maxTrienodeHealThrottle is the maximum divisor for throttling trie node + // heal requests to avoid overloading the local node and exessively expanding + // the state trie bedth wise. + maxTrienodeHealThrottle = maxTrieRequestCount + + // trienodeHealThrottleIncrease is the multiplier for the throttle when the + // rate of arriving data is higher than the rate of processing it. + trienodeHealThrottleIncrease = 1.33 + + // trienodeHealThrottleDecrease is the divisor for the throttle when the + // rate of arriving data is lower than the rate of processing it. + trienodeHealThrottleDecrease = 1.25 +) + +// of only the account path. There's no need to be able to address both an +// account node and a storage node in the same request as it cannot happen +// that a slot is accessed before the account path is fully expanded. +type TrieNodePathSet [][]byte + +var ( + // accountConcurrency is the number of chunks to split the account trie into + // to allow concurrent retrievals. + accountConcurrency = 16 + + // storageConcurrency is the number of chunks to split the a large contract + // storage trie into to allow concurrent retrievals. + storageConcurrency = 16 + + // MaxHash represents the maximum possible hash value. + MaxHash = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") +) + +// accountTask represents the sync task for a chunk of the account snapshot. +type accountTask struct { + id uint64 //unique id for account task + + // These fields get serialized to leveldb on shutdown + Next common.Hash // Next account to sync in this interval + Last common.Hash // Last account to sync in this interval + SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts + + // These fields are internals used during runtime + //req *accountRequest // Pending request to fill this task + //res *accountResponse // Validate response filling this task + pend int // Number of pending subtasks for this round + + needCode []bool // Flags whether the filling accounts need code retrieval + needState []bool // Flags whether the filling accounts need storage retrieval + needHeal []bool // Flags whether the filling accounts's state was chunked and need healing + + codeTasks map[common.Hash]struct{} // Code hashes that need retrieval + stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval + + genBatch ethdb.Batch // Batch used by the node generator + genTrie *trie.StackTrie // Node generator from storage slots + + requested bool + done bool // Flag whether the task can be removed + + res *accountResponse +} + +// accountResponse is an already Merkle-verified remote response to an account +// range request. It contains the subtrie for the requested account range and +// the database that's going to be filled with the internal nodes on commit. +type accountResponse struct { + task *accountTask // Task which this request is filling + + hashes []common.Hash // Account hashes in the returned range + accounts []*types.StateAccount // Expanded accounts in the returned range + + cont bool // Whether the account range has a continuation +} + +// storageTask represents the sync task for a chunk of the storage snapshot. +type storageTask struct { + Next common.Hash // Next account to sync in this interval + Last common.Hash // Last account to sync in this interval + + // These fields are internals used during runtime + root common.Hash // Storage root hash for this instance + //req *storageTaskBundleuest // Pending request to fill this task + + genBatch ethdb.Batch // Batch used by the node generator + genTrie *trie.StackTrie // Node generator from storage slots + + requested bool + done bool // Flag whether the task can be removed +} + +// healRequestSort implements the Sort interface, allowing sorting trienode +// heal requests, which is a prerequisite for merging storage-requests. +type healRequestSort struct { + paths []string + hashes []common.Hash + syncPaths []trie.SyncPath +} + +func (t *healRequestSort) Len() int { + return len(t.hashes) +} + +func (t *healRequestSort) Less(i, j int) bool { + a := t.syncPaths[i] + b := t.syncPaths[j] + switch bytes.Compare(a[0], b[0]) { + case -1: + return true + case 1: + return false + } + // identical first part + if len(a) < len(b) { + return true + } + if len(b) < len(a) { + return false + } + if len(a) == 2 { + return bytes.Compare(a[1], b[1]) < 0 + } + return false +} + +func (t *healRequestSort) Swap(i, j int) { + t.paths[i], t.paths[j] = t.paths[j], t.paths[i] + t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] + t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i] +} + +// Merge merges the pathsets, so that several storage requests concerning the +// same account are merged into one, to reduce bandwidth. +// OBS: This operation is moot if t has not first been sorted. +func (t *healRequestSort) Merge() []TrieNodePathSet { + var result []TrieNodePathSet + for _, path := range t.syncPaths { + pathset := TrieNodePathSet(path) + if len(path) == 1 { + // It's an account reference. + result = append(result, pathset) + } else { + // It's a storage reference. + end := len(result) - 1 + if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) { + // The account doesn't match last, create a new entry. + result = append(result, pathset) + } else { + // It's the same account as the previous one, add to the storage + // paths of that request. + result[end] = append(result[end], pathset[1]) + } + } + } + return result +} + +type storageTaskBundle struct { + id uint64 //unique id for storage task bundle + accounts []common.Hash + roots []common.Hash + mainTask *accountTask + subtask *storageTask + origin common.Hash + limit common.Hash +} + +// healTask represents the sync task for healing the snap-synced chunk boundaries. +type healTask struct { + id uint64 + trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path + codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash + paths []string + hashes []common.Hash + pathsets []TrieNodePathSet + task *healTask + root common.Hash + byteCodeReq bool +} + +type tasks struct { + accountTasks map[uint64]*accountTask // Current account task set being synced + storageTasks map[uint64]*storageTaskBundle // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by hash + healer map[uint64]*healTask + snapped bool // Flag to signal that snap phase is done +} + +func newTasks() *tasks { + return &tasks{ + accountTasks: make(map[uint64]*accountTask, 0), + storageTasks: make(map[uint64]*storageTaskBundle, 0), + codeTasks: make(map[common.Hash]struct{}), + healer: make(map[uint64]*healTask, 0), + snapped: false, + } +} + +func (t *tasks) addAccountTask(accountTaskIndex uint64, ct *accountTask) { + t.accountTasks[accountTaskIndex] = ct +} + +func (t *tasks) getAccountTask(accountTaskIndex uint64) *accountTask { + if _, ok := t.accountTasks[accountTaskIndex]; ok { + return t.accountTasks[accountTaskIndex] + } + return nil +} + +func (t *tasks) deleteAccountTask(accountTaskIndex uint64) { + if _, ok := t.accountTasks[accountTaskIndex]; ok { + delete(t.accountTasks, accountTaskIndex) + } + // t.accountTasks = append(t.accountTasks[:accountTaskIndex], t.accountTasks[accountTaskIndex+1:]...) +} + +func (t *tasks) addCodeTask(h common.Hash) { + t.codeTasks[h] = struct{}{} +} + +func (t *tasks) deleteCodeTask(hash common.Hash) { + if _, ok := t.codeTasks[hash]; ok { + delete(t.codeTasks, hash) + } +} + +func (t *tasks) addStorageTaskBundle(storageBundleIndex uint64, storages *storageTaskBundle) { + t.storageTasks[storageBundleIndex] = storages +} + +func (t *tasks) deleteStorageTaskBundle(storageBundleIndex uint64) { + if _, ok := t.storageTasks[storageBundleIndex]; ok { + delete(t.storageTasks, storageBundleIndex) + } +} + +func (t *tasks) addHealerTask(taskID uint64, task *healTask) { + t.healer[taskID] = task +} + +func (t *tasks) deleteHealerTask(taskID uint64) { + if _, ok := t.healer[taskID]; ok { + delete(t.healer, taskID) + } +} + +func (t *tasks) addHealerTrieTask(taskID uint64, path string, h common.Hash) { + if _, ok := t.healer[taskID]; ok { + t.healer[taskID].trieTasks[path] = h + } +} + +func (t *tasks) getHealerTrieTask(taskID uint64, path string) common.Hash { + if _, ok := t.healer[taskID]; ok { + return t.healer[taskID].trieTasks[path] + } + return common.Hash{} +} + +func (t *tasks) addHealerTrieCodeTask(taskID uint64, hash common.Hash, v struct{}) { + if _, ok := t.healer[taskID]; ok { + t.healer[taskID].codeTasks[hash] = v + } +} + +func (t *tasks) getHealerTrieCodeTask(taskID uint64, h common.Hash) struct{} { + if _, ok := t.healer[taskID]; ok { + return t.healer[taskID].codeTasks[h] + } + return struct{}{} +} + +// SyncProgress is a database entry to allow suspending and resuming a snapshot state +// sync. Opposed to full and fast sync, there is no way to restart a suspended +// snap sync without prior knowledge of the suspension point. +type SyncProgress struct { + Tasks map[uint64]*accountTask // The suspended account tasks (contract tasks within) + + // Status report during syncing phase + AccountSynced uint64 // Number of accounts downloaded + AccountBytes common.StorageSize // Number of account trie bytes persisted to disk + BytecodeSynced uint64 // Number of bytecodes downloaded + BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded + StorageSynced uint64 // Number of storage slots downloaded + StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk + + // Status report during healing phase + TrienodeHealSynced uint64 // Number of state trie nodes downloaded + TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk + BytecodeHealSynced uint64 // Number of bytecodes downloaded + BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk +} + +// FullStateDownloadManager is the helper structure for get blocks request management +type FullStateDownloadManager struct { + bc core.BlockChain + tx kv.RwTx + + db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) + scheme string // Node scheme used in node database + + tasks *tasks + requesting *tasks + processing *tasks + retries *tasks + + root common.Hash // Current state trie root being synced + snapped bool // Flag to signal that snap phase is done + // healer *healTask // Current state healing task being executed + + protocol syncProtocol + scheduler *trie.Sync // State trie sync scheduler defining the tasks + keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with + concurrency int + logger zerolog.Logger + lock sync.RWMutex + + numUncommitted int + bytesUncommitted int + + accountSynced uint64 // Number of accounts downloaded + accountBytes common.StorageSize // Number of account trie bytes persisted to disk + bytecodeSynced uint64 // Number of bytecodes downloaded + bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded + storageSynced uint64 // Number of storage slots downloaded + storageBytes common.StorageSize // Number of storage trie bytes persisted to disk + + pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown + + stateWriter ethdb.Batch // Shared batch writer used for persisting raw states + accountHealed uint64 // Number of accounts downloaded during the healing stage + accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage + storageHealed uint64 // Number of storage slots downloaded during the healing stage + storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage + + trienodeHealRate float64 // Average heal rate for processing trie node data + trienodeHealPend atomic.Uint64 // Number of trie nodes currently pending for processing + trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested + trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated + + trienodeHealSynced uint64 // Number of state trie nodes downloaded + trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk + trienodeHealDups uint64 // Number of state trie nodes already processed + trienodeHealNops uint64 // Number of state trie nodes not requested + bytecodeHealSynced uint64 // Number of bytecodes downloaded + bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk + bytecodeHealDups uint64 // Number of bytecodes already processed + bytecodeHealNops uint64 // Number of bytecodes not requested +} + +func newFullStateDownloadManager(db ethdb.KeyValueStore, + scheme string, + tx kv.RwTx, + bc core.BlockChain, + concurrency int, + logger zerolog.Logger) *FullStateDownloadManager { + + return &FullStateDownloadManager{ + db: db, + scheme: scheme, + bc: bc, + stateWriter: db.NewBatch(), + tx: tx, + keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), + concurrency: concurrency, + logger: logger, + tasks: newTasks(), + requesting: newTasks(), + processing: newTasks(), + retries: newTasks(), + } +} + +func (s *FullStateDownloadManager) setRootHash(root common.Hash) { + s.root = root + s.scheduler = state.NewStateSync(root, s.db, s.onHealState, s.scheme) + s.loadSyncStatus() + // s.sched = state.NewStateSync(root, s.bc.ChainDb(), nil, rawdb.HashScheme) +} + +func (s *FullStateDownloadManager) taskDone(taskID uint64) { + s.tasks.accountTasks[taskID].done = true +} + +// SlimAccount is a modified version of an Account, where the root is replaced +// with a byte slice. This format can be used to represent full-consensus format +// or slim format which replaces the empty root and code hash as nil byte slice. +type SlimAccount struct { + Nonce uint64 + Balance *big.Int + Root []byte // Nil if root equals to types.EmptyRootHash + CodeHash []byte // Nil if hash equals to types.EmptyCodeHash +} + +// SlimAccountRLP encodes the state account in 'slim RLP' format. +func (s *FullStateDownloadManager) SlimAccountRLP(account types.StateAccount) []byte { + slim := SlimAccount{ + Nonce: account.Nonce, + Balance: account.Balance, + } + if account.Root != types.EmptyRootHash { + slim.Root = account.Root[:] + } + if !bytes.Equal(account.CodeHash, types.EmptyCodeHash[:]) { + slim.CodeHash = account.CodeHash + } + data, err := rlp.EncodeToBytes(slim) + if err != nil { + panic(err) + } + return data +} + +// FullAccount decodes the data on the 'slim RLP' format and returns +// the consensus format account. +func FullAccount(data []byte) (*types.StateAccount, error) { + var slim SlimAccount + if err := rlp.DecodeBytes(data, &slim); err != nil { + return nil, err + } + var account types.StateAccount + account.Nonce, account.Balance = slim.Nonce, slim.Balance + + // Interpret the storage root and code hash in slim format. + if len(slim.Root) == 0 { + account.Root = types.EmptyRootHash + } else { + account.Root = common.BytesToHash(slim.Root) + } + if len(slim.CodeHash) == 0 { + account.CodeHash = types.EmptyCodeHash[:] + } else { + account.CodeHash = slim.CodeHash + } + return &account, nil +} + +// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. +func FullAccountRLP(data []byte) ([]byte, error) { + account, err := FullAccount(data) + if err != nil { + return nil, err + } + return rlp.EncodeToBytes(account) +} + +// onHealState is a callback method to invoke when a flat state(account +// or storage slot) is downloaded during the healing stage. The flat states +// can be persisted blindly and can be fixed later in the generation stage. +// Note it's not concurrent safe, please handle the concurrent issue outside. +func (s *FullStateDownloadManager) onHealState(paths [][]byte, value []byte) error { + if len(paths) == 1 { + var account types.StateAccount + if err := rlp.DecodeBytes(value, &account); err != nil { + return nil // Returning the error here would drop the remote peer + } + blob := s.SlimAccountRLP(account) + rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) + s.accountHealed += 1 + s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) + } + if len(paths) == 2 { + rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) + s.storageHealed += 1 + s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) + } + if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { + s.stateWriter.Write() // It's fine to ignore the error here + s.stateWriter.Reset() + } + return nil +} + +func (s *FullStateDownloadManager) commitHealer(force bool) { + if !force && s.scheduler.MemSize() < ethdb.IdealBatchSize { + return + } + batch := s.db.NewBatch() + if err := s.scheduler.Commit(batch); err != nil { + utils.Logger().Error().Err(err).Msg("Failed to commit healing data") + } + if err := batch.Write(); err != nil { + log.Crit("Failed to persist healing data", "err", err) + } + utils.Logger().Debug().Str("type", "trienodes").Interface("bytes", common.StorageSize(batch.ValueSize())).Msg("Persisted set of healing data") +} + +// getNextBatch returns objects with a maximum of n state download +// tasks to send to the remote peer. +func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, + codes []common.Hash, + storages *storageTaskBundle, + healtask *healTask, + codetask *healTask, + err error) { + + s.lock.Lock() + defer s.lock.Unlock() + + cap := StatesPerRequest + + accounts, codes, storages, healtask, codetask = s.getBatchFromRetries(cap) + nItems := len(accounts) + len(codes) + len(storages.roots) + len(healtask.hashes) + len(codetask.hashes) + cap -= nItems + + if cap == 0 { + return + } + + if len(s.tasks.accountTasks) == 0 && s.scheduler.Pending() == 0 { + utils.Logger().Debug().Msg("Snapshot sync already completed") + return + } + + defer func() { // Persist any progress, independent of failure + for _, task := range s.tasks.accountTasks { + s.forwardAccountTask(task) + } + s.cleanAccountTasks() + s.saveSyncStatus() + }() + + // Flush out the last committed raw states + defer func() { + if s.stateWriter.ValueSize() > 0 { + s.stateWriter.Write() + s.stateWriter.Reset() + } + }() + + // commit any trie- and bytecode-healing data. + defer s.commitHealer(true) + + // Whether sync completed or not, disregard any future packets + defer func() { + utils.Logger().Debug().Interface("root", s.root).Msg("Terminating snapshot sync cycle") + }() + + // Refill available tasks from the scheduler. + if len(s.tasks.accountTasks) == 0 && s.scheduler.Pending() == 0 { + utils.Logger().Debug().Msg("Snapshot sync already completed") + return + } + + // if err = s.fillTasks(cap); err != nil { + // return + // } + + includeHealtasks := true + if healtask != nil || codetask != nil { + includeHealtasks = false + } + newAccounts, newCodes, newStorageTaskBundle, unprocessedHealtask, unprocessedCodetask := s.getBatchFromUnprocessed(cap, includeHealtasks) + accounts = append(accounts, newAccounts...) + codes = append(codes, newCodes...) + storages = newStorageTaskBundle + if includeHealtasks { + healtask = unprocessedHealtask + codetask = unprocessedCodetask + } + + return +} + +// saveSyncStatus marshals the remaining sync tasks into leveldb. +func (s *FullStateDownloadManager) saveSyncStatus() { + // Serialize any partial progress to disk before spinning down + for _, task := range s.tasks.accountTasks { + if err := task.genBatch.Write(); err != nil { + utils.Logger().Debug(). + Err(err). + Msg("Failed to persist account slots") + } + for _, subtasks := range task.SubTasks { + for _, subtask := range subtasks { + if err := subtask.genBatch.Write(); err != nil { + utils.Logger().Debug(). + Err(err). + Msg("Failed to persist storage slots") + } + } + } + } + // Store the actual progress markers + progress := &SyncProgress{ + Tasks: s.tasks.accountTasks, + AccountSynced: s.accountSynced, + AccountBytes: s.accountBytes, + BytecodeSynced: s.bytecodeSynced, + BytecodeBytes: s.bytecodeBytes, + StorageSynced: s.storageSynced, + StorageBytes: s.storageBytes, + TrienodeHealSynced: s.trienodeHealSynced, + TrienodeHealBytes: s.trienodeHealBytes, + BytecodeHealSynced: s.bytecodeHealSynced, + BytecodeHealBytes: s.bytecodeHealBytes, + } + status, err := json.Marshal(progress) + if err != nil { + panic(err) // This can only fail during implementation + } + rawdb.WriteSnapshotSyncStatus(s.db, status) +} + +// loadSyncStatus retrieves a previously aborted sync status from the database, +// or generates a fresh one if none is available. +func (s *FullStateDownloadManager) loadSyncStatus() { + var progress SyncProgress + + if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { + if err := json.Unmarshal(status, &progress); err != nil { + utils.Logger().Error(). + Err(err). + Msg("Failed to decode snap sync status") + } else { + for _, task := range progress.Tasks { + utils.Logger().Debug(). + Interface("from", task.Next). + Interface("last", task.Last). + Msg("Scheduled account sync task") + } + s.tasks.accountTasks = progress.Tasks + for _, task := range s.tasks.accountTasks { + // task := task // closure for task.genBatch in the stacktrie writer callback + + task.genBatch = ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.accountBytes += common.StorageSize(len(key) + len(value)) + }, + } + // options := trie.NewStackTrieOptions() + writeFn := func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme) + } + task.genTrie = trie.NewStackTrie(writeFn) + for accountHash, subtasks := range task.SubTasks { + for _, subtask := range subtasks { + subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback + + subtask.genBatch = ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.storageBytes += common.StorageSize(len(key) + len(value)) + }, + } + // owner := accountHash // local assignment for stacktrie writer closure + writeFn = func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(subtask.genBatch, accountHash, path, hash, blob, s.scheme) + } + subtask.genTrie = trie.NewStackTrie(writeFn) + } + } + } + s.lock.Lock() + defer s.lock.Unlock() + + s.snapped = len(s.tasks.accountTasks) == 0 + + s.accountSynced = progress.AccountSynced + s.accountBytes = progress.AccountBytes + s.bytecodeSynced = progress.BytecodeSynced + s.bytecodeBytes = progress.BytecodeBytes + s.storageSynced = progress.StorageSynced + s.storageBytes = progress.StorageBytes + + s.trienodeHealSynced = progress.TrienodeHealSynced + s.trienodeHealBytes = progress.TrienodeHealBytes + s.bytecodeHealSynced = progress.BytecodeHealSynced + s.bytecodeHealBytes = progress.BytecodeHealBytes + return + } + } + // Either we've failed to decode the previous state, or there was none. + // Start a fresh sync by chunking up the account range and scheduling + // them for retrieval. + s.tasks.accountTasks = nil + s.accountSynced, s.accountBytes = 0, 0 + s.bytecodeSynced, s.bytecodeBytes = 0, 0 + s.storageSynced, s.storageBytes = 0, 0 + s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 + s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 + + var next common.Hash + step := new(big.Int).Sub( + new(big.Int).Div( + new(big.Int).Exp(common.Big2, common.Big256, nil), + big.NewInt(int64(accountConcurrency)), + ), common.Big1, + ) + for i := 0; i < accountConcurrency; i++ { + last := common.BigToHash(new(big.Int).Add(next.Big(), step)) + if i == accountConcurrency-1 { + // Make sure we don't overflow if the step is not a proper divisor + last = MaxHash + } + batch := ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.accountBytes += common.StorageSize(len(key) + len(value)) + }, + } + // options := trie.NewStackTrieOptions() + writeFn := func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme) + } + // create a unique id for task + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue + } + if _, ok := s.tasks.accountTasks[taskID]; ok { + continue + } + break + } + s.tasks.addAccountTask(taskID, &accountTask{ + id: taskID, + Next: next, + Last: last, + SubTasks: make(map[common.Hash][]*storageTask), + genBatch: batch, + genTrie: trie.NewStackTrie(writeFn), + }) + utils.Logger().Debug(). + Interface("from", next). + Interface("last", last). + Msg("Created account sync task") + + next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) + } +} + +// cleanAccountTasks removes account range retrieval tasks that have already been +// completed. +func (s *FullStateDownloadManager) cleanAccountTasks() { + // If the sync was already done before, don't even bother + if len(s.tasks.accountTasks) == 0 { + return + } + // Sync wasn't finished previously, check for any task that can be finalized + //for i := 0; i < len(s.tasks.accountTasks); i++ { + for taskID, _ := range s.tasks.accountTasks { + if s.tasks.accountTasks[taskID].done { + //s.tasks.accountTasks = append(s.tasks.accountTasks[:i], s.tasks.accountTasks[i+1:]...) + //i-- + s.tasks.deleteAccountTask(taskID) + } + } + // If everything was just finalized just, generate the account trie and start heal + if len(s.tasks.accountTasks) == 0 { + s.lock.Lock() + s.snapped = true + s.lock.Unlock() + + // Push the final sync report + //s.reportSyncProgress(true) + } +} + +// cleanStorageTasks iterates over all the account tasks and storage sub-tasks +// within, cleaning any that have been completed. +func (s *FullStateDownloadManager) cleanStorageTasks() { + for _, task := range s.tasks.accountTasks { + for account, subtasks := range task.SubTasks { + // Remove storage range retrieval tasks that completed + for j := 0; j < len(subtasks); j++ { + if subtasks[j].done { + subtasks = append(subtasks[:j], subtasks[j+1:]...) + j-- + } + } + if len(subtasks) > 0 { + task.SubTasks[account] = subtasks + continue + } + // If all storage chunks are done, mark the account as done too + for j, hash := range task.res.hashes { + if hash == account { + task.needState[j] = false + } + } + delete(task.SubTasks, account) + task.pend-- + + // If this was the last pending task, forward the account task + if task.pend == 0 { + s.forwardAccountTask(task) + } + } + } +} + +// forwardAccountTask takes a filled account task and persists anything available +// into the database, after which it forwards the next account marker so that the +// task's next chunk may be filled. +func (s *FullStateDownloadManager) forwardAccountTask(task *accountTask) { + // Remove any pending delivery + res := task.res + if res == nil { + return // nothing to forward + } + task.res = nil + + // Persist the received account segments. These flat state maybe + // outdated during the sync, but it can be fixed later during the + // snapshot generation. + oldAccountBytes := s.accountBytes + + batch := ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.accountBytes += common.StorageSize(len(key) + len(value)) + }, + } + for i, hash := range res.hashes { + if task.needCode[i] || task.needState[i] { + break + } + slim := s.SlimAccountRLP(*res.accounts[i]) + rawdb.WriteAccountSnapshot(batch, hash, slim) + + // If the task is complete, drop it into the stack trie to generate + // account trie nodes for it + if !task.needHeal[i] { + full, err := FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted + if err != nil { + panic(err) // Really shouldn't ever happen + } + task.genTrie.Update(hash[:], full) + } + } + // Flush anything written just now and update the stats + if err := batch.Write(); err != nil { + utils.Logger().Error().Err(err).Msg("Failed to persist accounts") + } + s.accountSynced += uint64(len(res.accounts)) + + // Task filling persisted, push it the chunk marker forward to the first + // account still missing data. + for i, hash := range res.hashes { + if task.needCode[i] || task.needState[i] { + return + } + task.Next = incHash(hash) + } + // All accounts marked as complete, track if the entire task is done + task.done = !res.cont + + // Stack trie could have generated trie nodes, push them to disk (we need to + // flush after finalizing task.done. It's fine even if we crash and lose this + // write as it will only cause more data to be downloaded during heal. + if task.done { + task.genTrie.Commit() + } + if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { + if err := task.genBatch.Write(); err != nil { + utils.Logger().Error().Err(err).Msg("Failed to persist stack account") + } + task.genBatch.Reset() + } + utils.Logger().Debug(). + Int("accounts", len(res.accounts)). + Float64("bytes", float64(s.accountBytes-oldAccountBytes)). + Msg("Persisted range of accounts") +} + +// updateStats bumps the various state sync progress counters and displays a log +// message for the user to see. +func (s *FullStateDownloadManager) updateStats(written, duplicate, unexpected int, duration time.Duration) { + // TODO: here it updates the stats for total pending, processed, duplicates and unexpected + + // for now, we just jog current stats + if written > 0 || duplicate > 0 || unexpected > 0 { + utils.Logger().Info(). + Int("count", written). + Int("duplicate", duplicate). + Int("unexpected", unexpected). + Msg("Imported new state entries") + } +} + +// getBatchFromUnprocessed returns objects with a maximum of n unprocessed state download +// tasks to send to the remote peer. +func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, includeHealtasks bool) ( + accounts []*accountTask, + codes []common.Hash, + storages *storageTaskBundle, + healtask *healTask, + codetask *healTask) { + + // over trie nodes as those can be written to disk and forgotten about. + codes = make([]common.Hash, 0, n) + accounts = make([]*accountTask, 0, n) + + for i, task := range s.tasks.accountTasks { + // Stop when we've gathered enough requests + if len(accounts) == n { + return + } + // if already requested + if task.requested { + continue + } + if task.id == 0 { + continue + } + s.tasks.accountTasks[i].requested = true + accounts = append(accounts, task) + s.requesting.addAccountTask(task.id, task) + // s.tasks.deleteAccountTask(task) + } + + cap := n - len(accounts) + + for _, task := range s.tasks.accountTasks { + // Skip tasks that are already retrieving (or done with) all codes + if len(task.codeTasks) == 0 { + continue + } + + for hash := range task.codeTasks { + delete(task.codeTasks, hash) + codes = append(codes, hash) + s.requesting.addCodeTask(hash) + s.tasks.deleteCodeTask(hash) + // Stop when we've gathered enough requests + if len(codes) >= cap { + return + } + } + } + + cap = n - len(accounts) - len(codes) + + for accTaskID, task := range s.tasks.accountTasks { + // Skip tasks that are already retrieving (or done with) all small states + if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { + continue + } + + // TODO: check cap calculations (shouldn't give us big chunk) + if cap > maxRequestSize { + cap = maxRequestSize + } + if cap < minRequestSize { // Don't bother with peers below a bare minimum performance + cap = minRequestSize + } + storageSets := cap / 1024 + + storages = &storageTaskBundle{ + accounts: make([]common.Hash, 0, storageSets), + roots: make([]common.Hash, 0, storageSets), + mainTask: task, + } + + // create a unique id for task bundle + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue + } + if _, ok := s.tasks.storageTasks[taskID]; ok { + continue + } + break + } + storages.id = taskID + + for account, subtasks := range task.SubTasks { + // find the first subtask which is not requested yet + for i, st := range subtasks { + // Skip any subtasks already filling + if st.requested { + continue + } + // Found an incomplete storage chunk, schedule it + storages.accounts = append(storages.accounts, account) + storages.roots = append(storages.roots, st.root) + storages.subtask = st + s.tasks.accountTasks[accTaskID].SubTasks[account][i].requested = true + break // Large contract chunks are downloaded individually + } + if storages.subtask != nil { + break // Large contract chunks are downloaded individually + } + } + if storages.subtask == nil { + // No large contract required retrieval, but small ones available + for account, root := range task.stateTasks { + delete(task.stateTasks, account) + + storages.accounts = append(storages.accounts, account) + storages.roots = append(storages.roots, root) + + if len(storages.accounts) >= storageSets { + break + } + } + } + // If nothing was found, it means this task is actually already fully + // retrieving, but large contracts are hard to detect. Skip to the next. + if len(storages.accounts) == 0 { + continue + } + if storages.subtask != nil { + storages.origin = storages.subtask.Next + storages.limit = storages.subtask.Last + } + s.tasks.addStorageTaskBundle(taskID, storages) + s.requesting.addStorageTaskBundle(taskID, storages) + + cap -= len(storages.accounts) + + if cap <= 0 { + break + } + } + + if len(accounts)+len(codes)+len(storages.accounts) > 0 { + return + } + + if !includeHealtasks { + return + } + + // Sync phase done, run heal phase + cap = n + + // Iterate over pending tasks and try to find a peer to retrieve with + for (len(s.tasks.healer) > 0 && len(s.tasks.healer[0].hashes) > 0) || s.scheduler.Pending() > 0 { + // If there are not enough trie tasks queued to fully assign, fill the + // queue from the state sync scheduler. The trie synced schedules these + // together with bytecodes, so we need to queue them combined. + + // index 0 keeps all tasks, later we split it into multiple batch + if len(s.tasks.healer) == 0 { + s.tasks.healer[0] = &healTask{ + trieTasks: make(map[string]common.Hash, 0), + codeTasks: make(map[common.Hash]struct{}, 0), + } + } + + mPaths, mHashes, mCodes := s.scheduler.Missing(n) + for i, path := range mPaths { + s.tasks.healer[0].trieTasks[path] = mHashes[i] + } + for _, hash := range mCodes { + s.tasks.healer[0].codeTasks[hash] = struct{}{} + } + + // If all the heal tasks are bytecodes or already downloading, bail + if len(s.tasks.healer[0].trieTasks) == 0 { + return + } + // Generate the network query and send it to the peer + if cap > maxTrieRequestCount { + cap = maxTrieRequestCount + } + cap = int(float64(cap) / s.trienodeHealThrottle) + if cap <= 0 { + cap = 1 + } + var ( + hashes = make([]common.Hash, 0, cap) + paths = make([]string, 0, cap) + pathsets = make([]TrieNodePathSet, 0, cap) + ) + for path, hash := range s.tasks.healer[0].trieTasks { + delete(s.tasks.healer[0].trieTasks, path) + + paths = append(paths, path) + hashes = append(hashes, hash) + if len(paths) >= cap { + break + } + } + + // Group requests by account hash + paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) + + // create a unique id for healer task + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue + } + if _, ok := s.tasks.healer[taskID]; ok { + continue + } + break + } + + healtask = &healTask{ + id: taskID, + hashes: hashes, + paths: paths, + pathsets: pathsets, + root: s.root, + task: s.tasks.healer[0], + byteCodeReq: false, + } + + s.tasks.healer[taskID] = healtask + s.requesting.addHealerTask(taskID, healtask) + + cap = n - len(hashes) + } + + // trying to get bytecodes + // Iterate over pending tasks and try to find a peer to retrieve with + for (len(s.tasks.healer) > 0 && len(s.tasks.healer[0].codeTasks) > 0) || s.scheduler.Pending() > 0 { + // If there are not enough trie tasks queued to fully assign, fill the + // queue from the state sync scheduler. The trie synced schedules these + // together with trie nodes, so we need to queue them combined. + + mPaths, mHashes, mCodes := s.scheduler.Missing(cap) + for i, path := range mPaths { + s.tasks.healer[0].trieTasks[path] = mHashes[i] + } + for _, hash := range mCodes { + s.tasks.healer[0].codeTasks[hash] = struct{}{} + } + + // If all the heal tasks are trienodes or already downloading, bail + if len(s.tasks.healer[0].codeTasks) == 0 { + return + } + // Task pending retrieval, try to find an idle peer. If no such peer + // exists, we probably assigned tasks for all (or they are stateless). + // Abort the entire assignment mechanism. + + // Generate the network query and send it to the peer + if cap > maxCodeRequestCount { + cap = maxCodeRequestCount + } + hashes := make([]common.Hash, 0, cap) + for hash := range s.tasks.healer[0].codeTasks { + delete(s.tasks.healer[0].codeTasks, hash) + + hashes = append(hashes, hash) + if len(hashes) >= cap { + break + } + } + + // create a unique id for healer task + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue + } + if _, ok := s.tasks.healer[taskID]; ok { + continue + } + break + } + + codetask = &healTask{ + id: taskID, + hashes: hashes, + task: s.tasks.healer[0], + byteCodeReq: true, + } + + s.tasks.healer[taskID] = codetask + s.requesting.addHealerTask(taskID, healtask) + } + + return +} + +// sortByAccountPath takes hashes and paths, and sorts them. After that, it generates +// the TrieNodePaths and merges paths which belongs to the same account path. +func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { + var syncPaths []trie.SyncPath + for _, path := range paths { + syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) + } + n := &healRequestSort{paths, hashes, syncPaths} + sort.Sort(n) + pathsets := n.Merge() + return n.paths, n.hashes, n.syncPaths, pathsets +} + +// getBatchFromRetries get the block number batch to be requested from retries. +func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( + accounts []*accountTask, + codes []common.Hash, + storages *storageTaskBundle, + healtask *healTask, + codetask *healTask) { + + // over trie nodes as those can be written to disk and forgotten about. + accounts = make([]*accountTask, 0, n) + codes = make([]common.Hash, 0, n) + + for _, task := range s.retries.accountTasks { + // Stop when we've gathered enough requests + if len(accounts) == n { + return + } + accounts = append(accounts, task) + s.requesting.addAccountTask(task.id, task) + s.retries.deleteAccountTask(task.id) + } + + cap := n - len(accounts) + + for code := range s.retries.codeTasks { + // Stop when we've gathered enough requests + if len(codes) >= cap { + return + } + codes = append(codes, code) + s.requesting.addCodeTask(code) + s.retries.deleteCodeTask(code) + } + + cap = n - len(accounts) - len(codes) + + if s.retries.storageTasks != nil && len(s.retries.storageTasks) > 0 { + storages = &storageTaskBundle{ + id: s.retries.storageTasks[0].id, + accounts: s.retries.storageTasks[0].accounts, + roots: s.retries.storageTasks[0].roots, + mainTask: s.retries.storageTasks[0].mainTask, + subtask: s.retries.storageTasks[0].subtask, + limit: s.retries.storageTasks[0].limit, + origin: s.retries.storageTasks[0].origin, + } + s.requesting.addStorageTaskBundle(storages.id, storages) + s.retries.deleteStorageTaskBundle(storages.id) + } + + if len(accounts)+len(codes)+len(storages.accounts) > 0 { + return + } + + cap = n + + if s.retries.healer != nil && len(s.retries.healer) > 0 { + foundHealTask := false + foundByteCodeTask := false + + for id, task := range s.retries.healer { + if !foundHealTask && !task.byteCodeReq { + healtask = &healTask{ + id: id, + hashes: task.hashes, + paths: task.paths, + pathsets: task.pathsets, + root: task.root, + task: task.task, + byteCodeReq: task.byteCodeReq, + } + s.requesting.addHealerTask(id, task) + s.retries.deleteHealerTask(id) + foundHealTask = true + } + if !foundByteCodeTask && task.byteCodeReq { + codetask = &healTask{ + id: id, + hashes: task.hashes, + paths: task.paths, + pathsets: task.pathsets, + root: task.root, + task: task.task, + byteCodeReq: task.byteCodeReq, + } + s.requesting.addHealerTask(id, task) + s.retries.deleteHealerTask(id) + foundByteCodeTask = true + } + if foundHealTask && foundByteCodeTask { + break + } + } + } + + return +} + +// HandleRequestError handles the error result +func (s *FullStateDownloadManager) HandleRequestError(accounts []*accountTask, + codes []common.Hash, + storages *storageTaskBundle, + healtask *healTask, + codetask *healTask, + streamID sttypes.StreamID, err error) { + + s.lock.Lock() + defer s.lock.Unlock() + + for _, task := range accounts { + s.requesting.deleteAccountTask(task.id) + s.retries.addAccountTask(task.id, task) + } + + for _, code := range codes { + s.requesting.deleteCodeTask(code) + s.retries.addCodeTask(code) + } + + if storages != nil { + s.requesting.addStorageTaskBundle(storages.id, storages) + s.retries.deleteStorageTaskBundle(storages.id) + } + + if healtask != nil { + s.retries.addHealerTask(healtask.id, healtask) + s.requesting.deleteHealerTask(healtask.id) + } + + if codetask != nil { + s.retries.addHealerTask(codetask.id, codetask) + s.requesting.deleteHealerTask(codetask.id) + } +} + +// HandleAccountRequestResult handles get account ranges result +func (s *FullStateDownloadManager) HandleAccountRequestResult(task *accountTask, // Task which this request is filling + hashes []common.Hash, // Account hashes in the returned range + accounts []*types.StateAccount, // Expanded accounts in the returned range + cont bool, // Whether the account range has a continuation + loopID int, + streamID sttypes.StreamID) error { + + s.lock.Lock() + defer s.lock.Unlock() + + if err := s.processAccountResponse(task, hashes, accounts, cont); err != nil { + return err + } + + return nil +} + +// processAccountResponse integrates an already validated account range response +// into the account tasks. +func (s *FullStateDownloadManager) processAccountResponse(task *accountTask, // Task which this request is filling + hashes []common.Hash, // Account hashes in the returned range + accounts []*types.StateAccount, // Expanded accounts in the returned range + cont bool, // Whether the account range has a continuation +) error { + + if _, ok := s.tasks.accountTasks[task.id]; ok { + s.tasks.accountTasks[task.id].res = &accountResponse{ + task: task, + hashes: hashes, + accounts: accounts, + cont: cont, + } + } + + // Ensure that the response doesn't overflow into the subsequent task + last := task.Last.Big() + for i, hash := range hashes { + // Mark the range complete if the last is already included. + // Keep iteration to delete the extra states if exists. + cmp := hash.Big().Cmp(last) + if cmp == 0 { + cont = false + continue + } + if cmp > 0 { + // Chunk overflown, cut off excess + hashes = hashes[:i] + accounts = accounts[:i] + cont = false // Mark range completed + break + } + } + // Iterate over all the accounts and assemble which ones need further sub- + // filling before the entire account range can be persisted. + task.needCode = make([]bool, len(accounts)) + task.needState = make([]bool, len(accounts)) + task.needHeal = make([]bool, len(accounts)) + + task.codeTasks = make(map[common.Hash]struct{}) + task.stateTasks = make(map[common.Hash]common.Hash) + + resumed := make(map[common.Hash]struct{}) + + task.pend = 0 + for i, account := range accounts { + // Check if the account is a contract with an unknown code + if !bytes.Equal(account.CodeHash, types.EmptyCodeHash.Bytes()) { + if !rawdb.HasCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)) { + task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} + task.needCode[i] = true + task.pend++ + } + } + // Check if the account is a contract with an unknown storage trie + if account.Root != types.EmptyRootHash { + if !rawdb.HasTrieNode(s.db, hashes[i], nil, account.Root, s.scheme) { + // If there was a previous large state retrieval in progress, + // don't restart it from scratch. This happens if a sync cycle + // is interrupted and resumed later. However, *do* update the + // previous root hash. + if subtasks, ok := task.SubTasks[hashes[i]]; ok { + utils.Logger().Debug().Interface("account", hashes[i]).Interface("root", account.Root).Msg("Resuming large storage retrieval") + for _, subtask := range subtasks { + subtask.root = account.Root + } + task.needHeal[i] = true + resumed[hashes[i]] = struct{}{} + } else { + task.stateTasks[hashes[i]] = account.Root + } + task.needState[i] = true + task.pend++ + } + } + } + // Delete any subtasks that have been aborted but not resumed. This may undo + // some progress if a new peer gives us less accounts than an old one, but for + // now we have to live with that. + for hash := range task.SubTasks { + if _, ok := resumed[hash]; !ok { + utils.Logger().Debug().Interface("account", hash).Msg("Aborting suspended storage retrieval") + delete(task.SubTasks, hash) + } + } + // If the account range contained no contracts, or all have been fully filled + // beforehand, short circuit storage filling and forward to the next task + if task.pend == 0 { + s.forwardAccountTask(task) + return nil + } + // Some accounts are incomplete, leave as is for the storage and contract + // task assigners to pick up and fill + return nil +} + +// HandleBytecodeRequestResult handles get bytecode result +func (s *FullStateDownloadManager) HandleBytecodeRequestResult(task *accountTask, // Task which this request is filling + hashes []common.Hash, // Hashes of the bytecode to avoid double hashing + bytecodes [][]byte, // Actual bytecodes to store into the database (nil = missing) + loopID int, + streamID sttypes.StreamID) error { + + s.lock.Lock() + defer s.lock.Unlock() + + if err := s.processBytecodeResponse(task, hashes, bytecodes); err != nil { + return err + } + + return nil +} + +// processBytecodeResponse integrates an already validated bytecode response +// into the account tasks. +func (s *FullStateDownloadManager) processBytecodeResponse(task *accountTask, // Task which this request is filling + hashes []common.Hash, // Hashes of the bytecode to avoid double hashing + bytecodes [][]byte, // Actual bytecodes to store into the database (nil = missing) +) error { + batch := s.db.NewBatch() + + var ( + codes uint64 + ) + for i, hash := range hashes { + code := bytecodes[i] + + // If the bytecode was not delivered, reschedule it + if code == nil { + task.codeTasks[hash] = struct{}{} + continue + } + // Code was delivered, mark it not needed any more + for j, account := range task.res.accounts { + if task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { + task.needCode[j] = false + task.pend-- + } + } + // Push the bytecode into a database batch + codes++ + rawdb.WriteCode(batch, hash, code) + } + bytes := common.StorageSize(batch.ValueSize()) + if err := batch.Write(); err != nil { + log.Crit("Failed to persist bytecodes", "err", err) + } + s.bytecodeSynced += codes + s.bytecodeBytes += bytes + + utils.Logger().Debug().Interface("count", codes).Float64("bytes", float64(bytes)).Msg("Persisted set of bytecodes") + + // If this delivery completed the last pending task, forward the account task + // to the next chunk + if task.pend == 0 { + s.forwardAccountTask(task) + return nil + } + // Some accounts are still incomplete, leave as is for the storage and contract + // task assigners to pick up and fill. + + return nil +} + +// estimateRemainingSlots tries to determine roughly how many slots are left in +// a contract storage, based on the number of keys and the last hash. This method +// assumes that the hashes are lexicographically ordered and evenly distributed. +func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { + if last == (common.Hash{}) { + return 0, errors.New("last hash empty") + } + space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes))) + space.Div(space, last.Big()) + if !space.IsUint64() { + // Gigantic address space probably due to too few or malicious slots + return 0, errors.New("too few slots for estimation") + } + return space.Uint64() - uint64(hashes), nil +} + +// HandleStorageRequestResult handles get storages result +func (s *FullStateDownloadManager) HandleStorageRequestResult(mainTask *accountTask, // Task which this response belongs to + subTask *storageTask, // Task which this response is filling + accounts []common.Hash, // Account hashes requested, may be only partially filled + roots []common.Hash, // Storage roots requested, may be only partially filled + hashes [][]common.Hash, // Storage slot hashes in the returned range + storageSlots [][][]byte, // Storage slot values in the returned range + cont bool, // Whether the last storage range has a continuation + loopID int, + streamID sttypes.StreamID) error { + + s.lock.Lock() + defer s.lock.Unlock() + + if err := s.processStorageResponse(mainTask, subTask, accounts, roots, hashes, storageSlots, cont); err != nil { + return err + } + + return nil +} + +// processStorageResponse integrates an already validated storage response +// into the account tasks. +func (s *FullStateDownloadManager) processStorageResponse(mainTask *accountTask, // Task which this response belongs to + subTask *storageTask, // Task which this response is filling + accounts []common.Hash, // Account hashes requested, may be only partially filled + roots []common.Hash, // Storage roots requested, may be only partially filled + hashes [][]common.Hash, // Storage slot hashes in the returned range + storageSlots [][][]byte, // Storage slot values in the returned range + cont bool, // Whether the last storage range has a continuation +) error { + batch := ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.storageBytes += common.StorageSize(len(key) + len(value)) + }, + } + var ( + slots int + oldStorageBytes = s.storageBytes + ) + // Iterate over all the accounts and reconstruct their storage tries from the + // delivered slots + for i, account := range accounts { + // If the account was not delivered, reschedule it + if i >= len(hashes) { + mainTask.stateTasks[account] = roots[i] + continue + } + // State was delivered, if complete mark as not needed any more, otherwise + // mark the account as needing healing + for j, hash := range mainTask.res.hashes { + if account != hash { + continue + } + acc := mainTask.res.accounts[j] + + // If the packet contains multiple contract storage slots, all + // but the last are surely complete. The last contract may be + // chunked, so check it's continuation flag. + if subTask == nil && mainTask.needState[j] && (i < len(hashes)-1 || !cont) { + mainTask.needState[j] = false + mainTask.pend-- + } + // If the last contract was chunked, mark it as needing healing + // to avoid writing it out to disk prematurely. + if subTask == nil && !mainTask.needHeal[j] && i == len(hashes)-1 && cont { + mainTask.needHeal[j] = true + } + // If the last contract was chunked, we need to switch to large + // contract handling mode + if subTask == nil && i == len(hashes)-1 && cont { + // If we haven't yet started a large-contract retrieval, create + // the subtasks for it within the main account task + if tasks, ok := mainTask.SubTasks[account]; !ok { + var ( + keys = hashes[i] + chunks = uint64(storageConcurrency) + lastKey common.Hash + ) + if len(keys) > 0 { + lastKey = keys[len(keys)-1] + } + // If the number of slots remaining is low, decrease the + // number of chunks. Somewhere on the order of 10-15K slots + // fit into a packet of 500KB. A key/slot pair is maximum 64 + // bytes, so pessimistically maxRequestSize/64 = 8K. + // + // Chunk so that at least 2 packets are needed to fill a task. + if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil { + if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks { + chunks = n + 1 + } + utils.Logger().Debug(). + Int("initiators", len(keys)). + Interface("tail", lastKey). + Uint64("remaining", estimate). + Uint64("chunks", chunks). + Msg("Chunked large contract") + } else { + utils.Logger().Debug(). + Int("initiators", len(keys)). + Interface("tail", lastKey). + Uint64("chunks", chunks). + Msg("Chunked large contract") + } + r := newHashRange(lastKey, chunks) + + // Our first task is the one that was just filled by this response. + batch := ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.storageBytes += common.StorageSize(len(key) + len(value)) + }, + } + ownerAccount := account // local assignment for stacktrie writer closure + // options := trie.NewStackTrieOptions() + writeFn := func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, ownerAccount, path, hash, blob, s.scheme) + } + tasks = append(tasks, &storageTask{ + Next: common.Hash{}, + Last: r.End(), + root: acc.Root, + genBatch: batch, + genTrie: trie.NewStackTrie(writeFn), + }) + for r.Next() { + batch := ethdb.HookedBatch{ + Batch: s.db.NewBatch(), + OnPut: func(key []byte, value []byte) { + s.storageBytes += common.StorageSize(len(key) + len(value)) + }, + } + // options := trie.NewStackTrieOptions() + writeFn := func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, ownerAccount, path, hash, blob, s.scheme) + } + tasks = append(tasks, &storageTask{ + Next: r.Start(), + Last: r.End(), + root: acc.Root, + genBatch: batch, + genTrie: trie.NewStackTrie(writeFn), + }) + } + for _, task := range tasks { + utils.Logger().Debug(). + Interface("from", task.Next). + Interface("last", task.Last). + Interface("root", acc.Root). + Interface("account", account). + Msg("Created storage sync task") + } + mainTask.SubTasks[account] = tasks + + // Since we've just created the sub-tasks, this response + // is surely for the first one (zero origin) + subTask = tasks[0] + } + } + // If we're in large contract delivery mode, forward the subtask + if subTask != nil { + // Ensure the response doesn't overflow into the subsequent task + last := subTask.Last.Big() + // Find the first overflowing key. While at it, mark res as complete + // if we find the range to include or pass the 'last' + index := sort.Search(len(hashes[i]), func(k int) bool { + cmp := hashes[i][k].Big().Cmp(last) + if cmp >= 0 { + cont = false + } + return cmp > 0 + }) + if index >= 0 { + // cut off excess + hashes[i] = hashes[i][:index] + storageSlots[i] = storageSlots[i][:index] + } + // Forward the relevant storage chunk (even if created just now) + if cont { + subTask.Next = incHash(hashes[i][len(hashes[i])-1]) + } else { + subTask.done = true + } + } + } + // Iterate over all the complete contracts, reconstruct the trie nodes and + // push them to disk. If the contract is chunked, the trie nodes will be + // reconstructed later. + slots += len(hashes[i]) + + if i < len(hashes)-1 || subTask == nil { + // no need to make local reassignment of account: this closure does not outlive the loop + // options := trie.NewStackTrieOptions() + writeFn := func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme) + } + tr := trie.NewStackTrie(writeFn) + for j := 0; j < len(hashes[i]); j++ { + tr.Update(hashes[i][j][:], storageSlots[i][j]) + } + tr.Commit() + } + // Persist the received storage segments. These flat state maybe + // outdated during the sync, but it can be fixed later during the + // snapshot generation. + for j := 0; j < len(hashes[i]); j++ { + rawdb.WriteStorageSnapshot(batch, account, hashes[i][j], storageSlots[i][j]) + + // If we're storing large contracts, generate the trie nodes + // on the fly to not trash the gluing points + if i == len(hashes)-1 && subTask != nil { + subTask.genTrie.Update(hashes[i][j][:], storageSlots[i][j]) + } + } + } + // Large contracts could have generated new trie nodes, flush them to disk + if subTask != nil { + if subTask.done { + root, _ := subTask.genTrie.Commit() + if root == subTask.root { + // If the chunk's root is an overflown but full delivery, clear the heal request + for i, account := range mainTask.res.hashes { + if account == accounts[len(accounts)-1] { + mainTask.needHeal[i] = false + } + } + } + } + if subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || subTask.done { + if err := subTask.genBatch.Write(); err != nil { + log.Error("Failed to persist stack slots", "err", err) + } + subTask.genBatch.Reset() + } + } + // Flush anything written just now and update the stats + if err := batch.Write(); err != nil { + log.Crit("Failed to persist storage slots", "err", err) + } + s.storageSynced += uint64(slots) + + utils.Logger().Debug(). + Int("accounts", len(hashes)). + Int("slots", slots). + Interface("bytes", s.storageBytes-oldStorageBytes). + Msg("Persisted set of storage slots") + + // If this delivery completed the last pending task, forward the account task + // to the next chunk + if mainTask.pend == 0 { + s.forwardAccountTask(mainTask) + return nil + } + // Some accounts are still incomplete, leave as is for the storage and contract + // task assigners to pick up and fill. + + return nil +} + +// HandleTrieNodeHealRequestResult handles get trie nodes heal result +func (s *FullStateDownloadManager) HandleTrieNodeHealRequestResult(task *healTask, // Task which this request is filling + paths []string, // Paths of the trie nodes + hashes []common.Hash, // Hashes of the trie nodes to avoid double hashing + nodes [][]byte, // Actual trie nodes to store into the database (nil = missing) + loopID int, + streamID sttypes.StreamID) error { + + s.lock.Lock() + defer s.lock.Unlock() + + if err := s.processTrienodeHealResponse(task, paths, hashes, nodes); err != nil { + return err + } + + return nil +} + +// processTrienodeHealResponse integrates an already validated trienode response +// into the healer tasks. +func (s *FullStateDownloadManager) processTrienodeHealResponse(task *healTask, // Task which this request is filling + paths []string, // Paths of the trie nodes + hashes []common.Hash, // Hashes of the trie nodes to avoid double hashing + nodes [][]byte, // Actual trie nodes to store into the database (nil = missing) +) error { + var ( + start = time.Now() + fills int + ) + for i, hash := range hashes { + node := nodes[i] + + // If the trie node was not delivered, reschedule it + if node == nil { + task.trieTasks[paths[i]] = hashes[i] + continue + } + fills++ + + // Push the trie node into the state syncer + s.trienodeHealSynced++ + s.trienodeHealBytes += common.StorageSize(len(node)) + + err := s.scheduler.ProcessNode(trie.NodeSyncResult{Path: paths[i], Data: node}) + switch err { + case nil: + case trie.ErrAlreadyProcessed: + s.trienodeHealDups++ + case trie.ErrNotRequested: + s.trienodeHealNops++ + default: + utils.Logger().Err(err).Interface("hash", hash).Msg("Invalid trienode processed") + } + } + s.commitHealer(false) + + // Calculate the processing rate of one filled trie node + rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) + + // Update the currently measured trienode queueing and processing throughput. + // + // The processing rate needs to be updated uniformly independent if we've + // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in + // the face of varying network packets. As such, we cannot just measure the + // time it took to process N trie nodes and update once, we need one update + // per trie node. + // + // Naively, that would be: + // + // for i:=0; i time.Second { + // Periodically adjust the trie node throttler + if float64(pending) > 2*s.trienodeHealRate { + s.trienodeHealThrottle *= trienodeHealThrottleIncrease + } else { + s.trienodeHealThrottle /= trienodeHealThrottleDecrease + } + if s.trienodeHealThrottle > maxTrienodeHealThrottle { + s.trienodeHealThrottle = maxTrienodeHealThrottle + } else if s.trienodeHealThrottle < minTrienodeHealThrottle { + s.trienodeHealThrottle = minTrienodeHealThrottle + } + s.trienodeHealThrottled = time.Now() + + utils.Logger().Debug(). + Float64("rate", s.trienodeHealRate). + Uint64("pending", pending). + Float64("throttle", s.trienodeHealThrottle). + Msg("Updated trie node heal throttler") + } + + return nil +} + +// HandleByteCodeHealRequestResult handles get byte codes heal result +func (s *FullStateDownloadManager) HandleByteCodeHealRequestResult(task *healTask, // Task which this request is filling + hashes []common.Hash, // Hashes of the bytecode to avoid double hashing + codes [][]byte, // Actual bytecodes to store into the database (nil = missing) + loopID int, + streamID sttypes.StreamID) error { + + s.lock.Lock() + defer s.lock.Unlock() + + if err := s.processBytecodeHealResponse(task, hashes, codes); err != nil { + return err + } + + return nil +} + +// processBytecodeHealResponse integrates an already validated bytecode response +// into the healer tasks. +func (s *FullStateDownloadManager) processBytecodeHealResponse(task *healTask, // Task which this request is filling + hashes []common.Hash, // Hashes of the bytecode to avoid double hashing + codes [][]byte, // Actual bytecodes to store into the database (nil = missing) +) error { + for i, hash := range hashes { + node := codes[i] + + // If the trie node was not delivered, reschedule it + if node == nil { + task.codeTasks[hash] = struct{}{} + continue + } + // Push the trie node into the state syncer + s.bytecodeHealSynced++ + s.bytecodeHealBytes += common.StorageSize(len(node)) + + err := s.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) + switch err { + case nil: + case trie.ErrAlreadyProcessed: + s.bytecodeHealDups++ + case trie.ErrNotRequested: + s.bytecodeHealNops++ + default: + log.Error("Invalid bytecode processed", "hash", hash, "err", err) + } + } + s.commitHealer(false) + + return nil +} From c340c704ba6928787ccdd7ff2c4903d7dfad2650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 16 Nov 2023 20:55:52 +0800 Subject: [PATCH 49/56] fix GetNextBatch to complete sync after there is no more pending states,remove extra comments, cleanup and goimports --- api/service/stagedstreamsync/satate_sync.go | 113 ++++++++------------ 1 file changed, 47 insertions(+), 66 deletions(-) diff --git a/api/service/stagedstreamsync/satate_sync.go b/api/service/stagedstreamsync/satate_sync.go index e90640a9a..1bf685826 100644 --- a/api/service/stagedstreamsync/satate_sync.go +++ b/api/service/stagedstreamsync/satate_sync.go @@ -109,9 +109,6 @@ type accountTask struct { Last common.Hash // Last account to sync in this interval SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts - // These fields are internals used during runtime - //req *accountRequest // Pending request to fill this task - //res *accountResponse // Validate response filling this task pend int // Number of pending subtasks for this round needCode []bool // Flags whether the filling accounts need code retrieval @@ -134,26 +131,19 @@ type accountTask struct { // range request. It contains the subtrie for the requested account range and // the database that's going to be filled with the internal nodes on commit. type accountResponse struct { - task *accountTask // Task which this request is filling - + task *accountTask // Task which this request is filling hashes []common.Hash // Account hashes in the returned range accounts []*types.StateAccount // Expanded accounts in the returned range - - cont bool // Whether the account range has a continuation + cont bool // Whether the account range has a continuation } // storageTask represents the sync task for a chunk of the storage snapshot. type storageTask struct { - Next common.Hash // Next account to sync in this interval - Last common.Hash // Last account to sync in this interval - - // These fields are internals used during runtime - root common.Hash // Storage root hash for this instance - //req *storageTaskBundleuest // Pending request to fill this task - - genBatch ethdb.Batch // Batch used by the node generator - genTrie *trie.StackTrie // Node generator from storage slots - + Next common.Hash // Next account to sync in this interval + Last common.Hash // Last account to sync in this interval + root common.Hash // Storage root hash for this instance + genBatch ethdb.Batch // Batch used by the node generator + genTrie *trie.StackTrie // Node generator from storage slots requested bool done bool // Flag whether the task can be removed } @@ -200,7 +190,7 @@ func (t *healRequestSort) Swap(i, j int) { // Merge merges the pathsets, so that several storage requests concerning the // same account are merged into one, to reduce bandwidth. -// OBS: This operation is moot if t has not first been sorted. +// This operation is moot if t has not first been sorted. func (t *healRequestSort) Merge() []TrieNodePathSet { var result []TrieNodePathSet for _, path := range t.syncPaths { @@ -280,7 +270,6 @@ func (t *tasks) deleteAccountTask(accountTaskIndex uint64) { if _, ok := t.accountTasks[accountTaskIndex]; ok { delete(t.accountTasks, accountTaskIndex) } - // t.accountTasks = append(t.accountTasks[:accountTaskIndex], t.accountTasks[accountTaskIndex+1:]...) } func (t *tasks) addCodeTask(h common.Hash) { @@ -375,7 +364,6 @@ type FullStateDownloadManager struct { root common.Hash // Current state trie root being synced snapped bool // Flag to signal that snap phase is done - // healer *healTask // Current state healing task being executed protocol syncProtocol scheduler *trie.Sync // State trie sync scheduler defining the tasks @@ -444,7 +432,6 @@ func (s *FullStateDownloadManager) setRootHash(root common.Hash) { s.root = root s.scheduler = state.NewStateSync(root, s.db, s.onHealState, s.scheme) s.loadSyncStatus() - // s.sched = state.NewStateSync(root, s.bc.ChainDb(), nil, rawdb.HashScheme) } func (s *FullStateDownloadManager) taskDone(taskID uint64) { @@ -554,33 +541,7 @@ func (s *FullStateDownloadManager) commitHealer(force bool) { utils.Logger().Debug().Str("type", "trienodes").Interface("bytes", common.StorageSize(batch.ValueSize())).Msg("Persisted set of healing data") } -// getNextBatch returns objects with a maximum of n state download -// tasks to send to the remote peer. -func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, - codes []common.Hash, - storages *storageTaskBundle, - healtask *healTask, - codetask *healTask, - err error) { - - s.lock.Lock() - defer s.lock.Unlock() - - cap := StatesPerRequest - - accounts, codes, storages, healtask, codetask = s.getBatchFromRetries(cap) - nItems := len(accounts) + len(codes) + len(storages.roots) + len(healtask.hashes) + len(codetask.hashes) - cap -= nItems - - if cap == 0 { - return - } - - if len(s.tasks.accountTasks) == 0 && s.scheduler.Pending() == 0 { - utils.Logger().Debug().Msg("Snapshot sync already completed") - return - } - +func (s *FullStateDownloadManager) SyncCompleted() { defer func() { // Persist any progress, independent of failure for _, task := range s.tasks.accountTasks { s.forwardAccountTask(task) @@ -605,27 +566,50 @@ func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, utils.Logger().Debug().Interface("root", s.root).Msg("Terminating snapshot sync cycle") }() - // Refill available tasks from the scheduler. - if len(s.tasks.accountTasks) == 0 && s.scheduler.Pending() == 0 { - utils.Logger().Debug().Msg("Snapshot sync already completed") + utils.Logger().Debug().Msg("Snapshot sync already completed") +} + +// getNextBatch returns objects with a maximum of n state download +// tasks to send to the remote peer. +func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, + codes []common.Hash, + storages *storageTaskBundle, + healtask *healTask, + codetask *healTask, + err error) { + + s.lock.Lock() + defer s.lock.Unlock() + + cap := StatesPerRequest + + accounts, codes, storages, healtask, codetask = s.getBatchFromRetries(cap) + nItems := len(accounts) + len(codes) + len(storages.roots) + len(healtask.hashes) + len(codetask.hashes) + cap -= nItems + + if cap == 0 { return } - // if err = s.fillTasks(cap); err != nil { - // return - // } + if len(s.tasks.accountTasks) == 0 && s.scheduler.Pending() == 0 { + if nItems == 0 { + s.SyncCompleted() + } + return + } - includeHealtasks := true + // Refill available tasks from the scheduler. + withHealTasks := true if healtask != nil || codetask != nil { - includeHealtasks = false + withHealTasks = false } - newAccounts, newCodes, newStorageTaskBundle, unprocessedHealtask, unprocessedCodetask := s.getBatchFromUnprocessed(cap, includeHealtasks) + newAccounts, newCodes, newStorageTaskBundle, newHealTask, newCodeTask := s.getBatchFromUnprocessed(cap, withHealTasks) accounts = append(accounts, newAccounts...) codes = append(codes, newCodes...) storages = newStorageTaskBundle - if includeHealtasks { - healtask = unprocessedHealtask - codetask = unprocessedCodetask + if withHealTasks { + healtask = newHealTask + codetask = newCodeTask } return @@ -690,7 +674,7 @@ func (s *FullStateDownloadManager) loadSyncStatus() { } s.tasks.accountTasks = progress.Tasks for _, task := range s.tasks.accountTasks { - // task := task // closure for task.genBatch in the stacktrie writer callback + task := task // closure for task.genBatch in the stacktrie writer callback task.genBatch = ethdb.HookedBatch{ Batch: s.db.NewBatch(), @@ -810,11 +794,8 @@ func (s *FullStateDownloadManager) cleanAccountTasks() { return } // Sync wasn't finished previously, check for any task that can be finalized - //for i := 0; i < len(s.tasks.accountTasks); i++ { for taskID, _ := range s.tasks.accountTasks { if s.tasks.accountTasks[taskID].done { - //s.tasks.accountTasks = append(s.tasks.accountTasks[:i], s.tasks.accountTasks[i+1:]...) - //i-- s.tasks.deleteAccountTask(taskID) } } @@ -953,7 +934,7 @@ func (s *FullStateDownloadManager) updateStats(written, duplicate, unexpected in // getBatchFromUnprocessed returns objects with a maximum of n unprocessed state download // tasks to send to the remote peer. -func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, includeHealtasks bool) ( +func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks bool) ( accounts []*accountTask, codes []common.Hash, storages *storageTaskBundle, @@ -1093,7 +1074,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, includeHealtas return } - if !includeHealtasks { + if !withHealTasks { return } From 337410040958bab24874e335cafcc586230c78df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 16 Nov 2023 21:01:03 +0800 Subject: [PATCH 50/56] fix state sync file name spell error --- api/service/stagedstreamsync/{satate_sync.go => state_sync.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename api/service/stagedstreamsync/{satate_sync.go => state_sync.go} (100%) diff --git a/api/service/stagedstreamsync/satate_sync.go b/api/service/stagedstreamsync/state_sync.go similarity index 100% rename from api/service/stagedstreamsync/satate_sync.go rename to api/service/stagedstreamsync/state_sync.go From e141f79818a0268db59f202f372fe20966211f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:19:17 +0800 Subject: [PATCH 51/56] add ProofSet and ProofList to staged stream sync --- api/service/stagedstreamsync/proof.go | 146 ++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 api/service/stagedstreamsync/proof.go diff --git a/api/service/stagedstreamsync/proof.go b/api/service/stagedstreamsync/proof.go new file mode 100644 index 000000000..216d797d4 --- /dev/null +++ b/api/service/stagedstreamsync/proof.go @@ -0,0 +1,146 @@ +package stagedstreamsync + +import ( + "errors" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" +) + +// ProofSet stores a set of trie nodes. It implements trie.Database and can also +// act as a cache for another trie.Database. +type ProofSet struct { + nodes map[string][]byte + order []string + + dataSize int + lock sync.RWMutex +} + +// NewProofSet creates an empty node set +func NewProofSet() *ProofSet { + return &ProofSet{ + nodes: make(map[string][]byte), + } +} + +// Put stores a new node in the set +func (db *ProofSet) Put(key []byte, value []byte) error { + db.lock.Lock() + defer db.lock.Unlock() + + if _, ok := db.nodes[string(key)]; ok { + return nil + } + keystr := string(key) + + db.nodes[keystr] = common.CopyBytes(value) + db.order = append(db.order, keystr) + db.dataSize += len(value) + + return nil +} + +// Delete removes a node from the set +func (db *ProofSet) Delete(key []byte) error { + db.lock.Lock() + defer db.lock.Unlock() + + delete(db.nodes, string(key)) + return nil +} + +// Get returns a stored node +func (db *ProofSet) Get(key []byte) ([]byte, error) { + db.lock.RLock() + defer db.lock.RUnlock() + + if entry, ok := db.nodes[string(key)]; ok { + return entry, nil + } + return nil, errors.New("not found") +} + +// Has returns true if the node set contains the given key +func (db *ProofSet) Has(key []byte) (bool, error) { + _, err := db.Get(key) + return err == nil, nil +} + +// KeyCount returns the number of nodes in the set +func (db *ProofSet) KeyCount() int { + db.lock.RLock() + defer db.lock.RUnlock() + + return len(db.nodes) +} + +// DataSize returns the aggregated data size of nodes in the set +func (db *ProofSet) DataSize() int { + db.lock.RLock() + defer db.lock.RUnlock() + + return db.dataSize +} + +// List converts the node set to a ProofList +func (db *ProofSet) List() ProofList { + db.lock.RLock() + defer db.lock.RUnlock() + + var values ProofList + for _, key := range db.order { + values = append(values, db.nodes[key]) + } + return values +} + +// Store writes the contents of the set to the given database +func (db *ProofSet) Store(target ethdb.KeyValueWriter) { + db.lock.RLock() + defer db.lock.RUnlock() + + for key, value := range db.nodes { + target.Put([]byte(key), value) + } +} + +// ProofList stores an ordered list of trie nodes. It implements ethdb.KeyValueWriter. +type ProofList []rlp.RawValue + +// Store writes the contents of the list to the given database +func (n ProofList) Store(db ethdb.KeyValueWriter) { + for _, node := range n { + db.Put(crypto.Keccak256(node), node) + } +} + +// Set converts the node list to a ProofSet +func (n ProofList) Set() *ProofSet { + db := NewProofSet() + n.Store(db) + return db +} + +// Put stores a new node at the end of the list +func (n *ProofList) Put(key []byte, value []byte) error { + *n = append(*n, value) + return nil +} + +// Delete panics as there's no reason to remove a node from the list. +func (n *ProofList) Delete(key []byte) error { + panic("not supported") +} + +// DataSize returns the aggregated data size of nodes in the list +func (n ProofList) DataSize() int { + var size int + for _, node := range n { + size += len(node) + } + return size +} From 390bdb67d835939bc951139d171478e3e88e0705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:23:03 +0800 Subject: [PATCH 52/56] add client new functions to stream sync adapter, update GetAccountRanges parameters --- api/service/stagedstreamsync/adapter.go | 5 +++++ p2p/stream/protocols/sync/client.go | 27 +++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/api/service/stagedstreamsync/adapter.go b/api/service/stagedstreamsync/adapter.go index ca9c6a678..56c42b661 100644 --- a/api/service/stagedstreamsync/adapter.go +++ b/api/service/stagedstreamsync/adapter.go @@ -9,6 +9,7 @@ import ( "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/p2p/stream/common/streammanager" syncproto "github.com/harmony-one/harmony/p2p/stream/protocols/sync" + "github.com/harmony-one/harmony/p2p/stream/protocols/sync/message" sttypes "github.com/harmony-one/harmony/p2p/stream/types" ) @@ -20,6 +21,10 @@ type syncProtocol interface { GetBlocksByHashes(ctx context.Context, hs []common.Hash, opts ...syncproto.Option) ([]*types.Block, sttypes.StreamID, error) GetReceipts(ctx context.Context, hs []common.Hash, opts ...syncproto.Option) (receipts []types.Receipts, stid sttypes.StreamID, err error) GetNodeData(ctx context.Context, hs []common.Hash, opts ...syncproto.Option) (data [][]byte, stid sttypes.StreamID, err error) + GetAccountRange(ctx context.Context, root common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...syncproto.Option) (accounts []*message.AccountData, proof [][]byte, stid sttypes.StreamID, err error) + GetStorageRanges(ctx context.Context, root common.Hash, accounts []common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...syncproto.Option) (slots [][]*message.StorageData, proof [][]byte, stid sttypes.StreamID, err error) + GetByteCodes(ctx context.Context, hs []common.Hash, bytes uint64, opts ...syncproto.Option) (codes [][]byte, stid sttypes.StreamID, err error) + GetTrieNodes(ctx context.Context, root common.Hash, paths []*message.TrieNodePathSet, bytes uint64, opts ...syncproto.Option) (nodes [][]byte, stid sttypes.StreamID, err error) RemoveStream(stID sttypes.StreamID) // If a stream delivers invalid data, remove the stream StreamFailed(stID sttypes.StreamID, reason string) diff --git a/p2p/stream/protocols/sync/client.go b/p2p/stream/protocols/sync/client.go index 9024142ce..45707e119 100644 --- a/p2p/stream/protocols/sync/client.go +++ b/p2p/stream/protocols/sync/client.go @@ -184,7 +184,7 @@ func (p *Protocol) GetNodeData(ctx context.Context, hs []common.Hash, opts ...Op // GetAccountRange do getAccountRange through sync stream protocol. // returns the accounts along with proofs as result, target stream id, and error -func (p *Protocol) GetAccountRange(ctx context.Context, root common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...Option) (accounts []*message.AccountData, proof []common.Hash, stid sttypes.StreamID, err error) { +func (p *Protocol) GetAccountRange(ctx context.Context, root common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...Option) (accounts []*message.AccountData, proof [][]byte, stid sttypes.StreamID, err error) { timer := p.doMetricClientRequest("getAccountRange") defer p.doMetricPostClientRequest("getAccountRange", err, timer) @@ -207,7 +207,7 @@ func (p *Protocol) GetAccountRange(ctx context.Context, root common.Hash, origin // GetStorageRanges do getStorageRanges through sync stream protocol. // returns the slots along with proofs as result, target stream id, and error -func (p *Protocol) GetStorageRanges(ctx context.Context, root common.Hash, accounts []common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...Option) (slots []*message.StorageData, proof []common.Hash, stid sttypes.StreamID, err error) { +func (p *Protocol) GetStorageRanges(ctx context.Context, root common.Hash, accounts []common.Hash, origin common.Hash, limit common.Hash, bytes uint64, opts ...Option) (slots [][]*message.StorageData, proof [][]byte, stid sttypes.StreamID, err error) { timer := p.doMetricClientRequest("getStorageRanges") defer p.doMetricPostClientRequest("getStorageRanges", err, timer) @@ -233,11 +233,9 @@ func (p *Protocol) GetStorageRanges(ctx context.Context, root common.Hash, accou if err != nil { return } - slots = make([]*message.StorageData, 0) + slots = make([][]*message.StorageData, 0) for _, storage := range storages { - for _, data := range storage.Data { - slots = append(slots, data) - } + slots = append(slots, storage.Data) } return } @@ -735,8 +733,7 @@ func (req *getAccountRangeRequest) Encode() ([]byte, error) { return protobuf.Marshal(msg) } -// []*message.AccountData, []common.Hash -func (req *getAccountRangeRequest) getAccountRangeFromResponse(resp sttypes.Response) ([]*message.AccountData, []common.Hash, error) { +func (req *getAccountRangeRequest) getAccountRangeFromResponse(resp sttypes.Response) ([]*message.AccountData, [][]byte, error) { sResp, ok := resp.(*syncResponse) if !ok || sResp == nil { return nil, nil, errors.New("not sync response") @@ -744,7 +741,7 @@ func (req *getAccountRangeRequest) getAccountRangeFromResponse(resp sttypes.Resp return req.parseGetAccountRangeResponse(sResp) } -func (req *getAccountRangeRequest) parseGetAccountRangeResponse(resp *syncResponse) ([]*message.AccountData, []common.Hash, error) { +func (req *getAccountRangeRequest) parseGetAccountRangeResponse(resp *syncResponse) ([]*message.AccountData, [][]byte, error) { if errResp := resp.pb.GetErrorResponse(); errResp != nil { return nil, nil, errors.New(errResp.Error) } @@ -752,9 +749,9 @@ func (req *getAccountRangeRequest) parseGetAccountRangeResponse(resp *syncRespon if grResp == nil { return nil, nil, errors.New("response not GetAccountRange") } - proofs := make([]common.Hash, 0) + proofs := make([][]byte, 0) for _, proofBytes := range grResp.Proof { - var proof common.Hash + var proof []byte if err := rlp.DecodeBytes(proofBytes, &proof); err != nil { return nil, nil, errors.Wrap(err, "[GetAccountRangeResponse]") } @@ -817,7 +814,7 @@ func (req *getStorageRangesRequest) Encode() ([]byte, error) { } // []*message.AccountData, []common.Hash -func (req *getStorageRangesRequest) getStorageRangesFromResponse(resp sttypes.Response) ([]*message.StoragesData, []common.Hash, error) { +func (req *getStorageRangesRequest) getStorageRangesFromResponse(resp sttypes.Response) ([]*message.StoragesData, [][]byte, error) { sResp, ok := resp.(*syncResponse) if !ok || sResp == nil { return nil, nil, errors.New("not sync response") @@ -825,7 +822,7 @@ func (req *getStorageRangesRequest) getStorageRangesFromResponse(resp sttypes.Re return req.parseGetStorageRangesResponse(sResp) } -func (req *getStorageRangesRequest) parseGetStorageRangesResponse(resp *syncResponse) ([]*message.StoragesData, []common.Hash, error) { +func (req *getStorageRangesRequest) parseGetStorageRangesResponse(resp *syncResponse) ([]*message.StoragesData, [][]byte, error) { if errResp := resp.pb.GetErrorResponse(); errResp != nil { return nil, nil, errors.New(errResp.Error) } @@ -833,9 +830,9 @@ func (req *getStorageRangesRequest) parseGetStorageRangesResponse(resp *syncResp if grResp == nil { return nil, nil, errors.New("response not GetStorageRanges") } - proofs := make([]common.Hash, 0) + proofs := make([][]byte, 0) for _, proofBytes := range grResp.Proof { - var proof common.Hash + var proof []byte if err := rlp.DecodeBytes(proofBytes, &proof); err != nil { return nil, nil, errors.Wrap(err, "[GetStorageRangesResponse]") } From 0901e92bf8cc17085e072dbc90294b46e49dd0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:32:03 +0800 Subject: [PATCH 53/56] add state sync full, complete full state sync stage --- .../stagedstreamsync/stage_statesync_full.go | 449 ++++++++++++++ .../{state_sync.go => state_sync_full.go} | 583 +++++++++++++++--- api/service/stagedstreamsync/syncing.go | 2 +- p2p/stream/protocols/sync/chain.go | 2 +- 4 files changed, 951 insertions(+), 85 deletions(-) create mode 100644 api/service/stagedstreamsync/stage_statesync_full.go rename api/service/stagedstreamsync/{state_sync.go => state_sync_full.go} (80%) diff --git a/api/service/stagedstreamsync/stage_statesync_full.go b/api/service/stagedstreamsync/stage_statesync_full.go new file mode 100644 index 000000000..3e190bdc9 --- /dev/null +++ b/api/service/stagedstreamsync/stage_statesync_full.go @@ -0,0 +1,449 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/pkg/errors" + + //sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/prometheus/client_golang/prometheus" + "github.com/rs/zerolog" +) + +type StageFullStateSync struct { + configs StageFullStateSyncCfg +} + +type StageFullStateSyncCfg struct { + bc core.BlockChain + db kv.RwDB + concurrency int + protocol syncProtocol + logger zerolog.Logger + logProgress bool +} + +func NewStageFullStateSync(cfg StageFullStateSyncCfg) *StageFullStateSync { + return &StageFullStateSync{ + configs: cfg, + } +} + +func NewStageFullStateSyncCfg(bc core.BlockChain, + db kv.RwDB, + concurrency int, + protocol syncProtocol, + logger zerolog.Logger, + logProgress bool) StageFullStateSyncCfg { + + return StageFullStateSyncCfg{ + bc: bc, + db: db, + concurrency: concurrency, + protocol: protocol, + logger: logger, + logProgress: logProgress, + } +} + +// Exec progresses States stage in the forward direction +func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + // for short range sync, skip this step + if !s.state.initSync { + return nil + } // only execute this stage in fast/snap sync mode and once we reach to pivot + + if s.state.status.pivotBlock == nil || + s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64() || + s.state.status.statesSynced { + return nil + } + + s.state.Debug("STATE SYNC ======================================================>", "started") + // maxHeight := s.state.status.targetBN + // currentHead := s.state.CurrentBlockNumber() + // if currentHead >= maxHeight { + // return nil + // } + // currProgress := s.state.CurrentBlockNumber() + // targetHeight := s.state.currentCycle.TargetHeight + + // if errV := CreateView(ctx, sss.configs.db, tx, func(etx kv.Tx) error { + // if currProgress, err = s.CurrentStageProgress(etx); err != nil { + // return err + // } + // return nil + // }); errV != nil { + // return errV + // } + + // if currProgress >= targetHeight { + // return nil + // } + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = sss.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // isLastCycle := targetHeight >= maxHeight + startTime := time.Now() + + if sss.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + // Fetch states from neighbors + pivotRootHash := s.state.status.pivotBlock.Root() + currentBlockRootHash := s.state.bc.CurrentFastBlock().Root() + scheme := sss.configs.bc.TrieDB().Scheme() + sdm := newFullStateDownloadManager(sss.configs.bc.ChainDb(), scheme, tx, sss.configs.bc, sss.configs.concurrency, s.state.logger) + sdm.setRootHash(currentBlockRootHash) + s.state.Debug("StateSync/setRootHash", pivotRootHash) + s.state.Debug("StateSync/currentFastBlockRoot", currentBlockRootHash) + s.state.Debug("StateSync/pivotBlockNumber", s.state.status.pivotBlock.NumberU64()) + s.state.Debug("StateSync/currentFastBlockNumber", s.state.bc.CurrentFastBlock().NumberU64()) + var wg sync.WaitGroup + for i := 0; i < s.state.config.Concurrency; i++ { + wg.Add(1) + go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime, s) + } + wg.Wait() + + // insert block + if err := sss.configs.bc.WriteHeadBlock(s.state.status.pivotBlock); err != nil { + sss.configs.logger.Warn().Err(err). + Uint64("pivot block number", s.state.status.pivotBlock.NumberU64()). + Msg(WrapStagedSyncMsg("insert pivot block failed")) + s.state.Debug("StateSync/pivot/insert/error", err) + // TODO: panic("pivot block is failed to insert in chain.") + return err + } + + // states should be fully synced in this stage + s.state.status.statesSynced = true + + s.state.Debug("StateSync/pivot/num", s.state.status.pivotBlock.NumberU64()) + s.state.Debug("StateSync/pivot/insert", "done") + + /* + gbm := s.state.gbm + + // Setup workers to fetch states from remote node + var wg sync.WaitGroup + curHeight := s.state.CurrentBlockNumber() + + for bn := curHeight + 1; bn <= gbm.targetBN; bn++ { + root := gbm.GetRootHash(bn) + if root == emptyHash { + continue + } + sdm.setRootHash(root) + for i := 0; i < s.state.config.Concurrency; i++ { + wg.Add(1) + go sss.runStateWorkerLoop(ctx, sdm, &wg, i, startTime, s) + } + wg.Wait() + } + */ + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// runStateWorkerLoop creates a work loop for download states +func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *FullStateDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time, s *StageState) { + + s.state.Debug("runStateWorkerLoop/info", "started") + + defer wg.Done() + + for { + select { + case <-ctx.Done(): + s.state.Debug("runStateWorkerLoop/ctx/done", "Finished") + return + default: + } + accountTasks, codes, storages, healtask, codetask, err := sdm.GetNextBatch() + s.state.Debug("runStateWorkerLoop/batch/len", len(accountTasks)+len(codes)+len(storages.accounts)) + s.state.Debug("runStateWorkerLoop/batch/heals/len", len(healtask.hashes)+len(codetask.hashes)) + s.state.Debug("runStateWorkerLoop/batch/err", err) + if len(accountTasks)+len(codes)+len(storages.accounts)+len(healtask.hashes)+len(codetask.hashes) == 0 || err != nil { + select { + case <-ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + return + } + } + s.state.Debug("runStateWorkerLoop/batch/accounts", accountTasks) + s.state.Debug("runStateWorkerLoop/batch/codes", codes) + + if len(accountTasks) > 0 { + + task := accountTasks[0] + origin := task.Next + limit := task.Last + root := sdm.root + cap := maxRequestSize + retAccounts, proof, stid, err := sss.configs.protocol.GetAccountRange(ctx, root, origin, limit, uint64(cap)) + if err != nil { + return + } + if err := sdm.HandleAccountRequestResult(task, retAccounts, proof, origin[:], limit[:], loopID, stid); err != nil { + return + } + + } else if len(codes)+len(storages.accounts) > 0 { + + if len(codes) > 0 { + stid, err := sss.downloadByteCodes(ctx, sdm, codes, loopID) + if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "downloadByteCodes failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("downloadByteCodes failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } + } + + if len(storages.accounts) > 0 { + root := sdm.root + roots := storages.roots + accounts := storages.accounts + cap := maxRequestSize + origin := storages.origin + limit := storages.limit + mainTask := storages.mainTask + subTask := storages.subtask + + slots, proof, stid, err := sss.configs.protocol.GetStorageRanges(ctx, root, accounts, origin, limit, uint64(cap)) + if err != nil { + return + } + if err := sdm.HandleStorageRequestResult(mainTask, subTask, accounts, roots, origin, limit, slots, proof, loopID, stid); err != nil { + return + } + } + + // data, stid, err := sss.downloadStates(ctx, accounts, codes, storages) + // if err != nil { + // s.state.Debug("runStateWorkerLoop/downloadStates/error", err) + // if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + // sss.configs.protocol.StreamFailed(stid, "downloadStates failed") + // } + // utils.Logger().Error(). + // Err(err). + // Str("stream", string(stid)). + // Msg(WrapStagedSyncMsg("downloadStates failed")) + // err = errors.Wrap(err, "request error") + // sdm.HandleRequestError(codes, paths, stid, err) + // } else if data == nil || len(data) == 0 { + // s.state.Debug("runStateWorkerLoop/downloadStates/data", "nil array") + // utils.Logger().Warn(). + // Str("stream", string(stid)). + // Msg(WrapStagedSyncMsg("downloadStates failed, received empty data bytes")) + // err := errors.New("downloadStates received empty data bytes") + // sdm.HandleRequestError(codes, paths, stid, err) + // } else { + // s.state.Debug("runStateWorkerLoop/downloadStates/data/len", len(data)) + // sdm.HandleRequestResult(nodes, paths, data, loopID, stid) + // if sss.configs.logProgress { + // //calculating block download speed + // dt := time.Now().Sub(startTime).Seconds() + // speed := float64(0) + // if dt > 0 { + // speed = float64(len(data)) / dt + // } + // stateDownloadSpeed := fmt.Sprintf("%.2f", speed) + + // fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + // fmt.Println("state download speed:", stateDownloadSpeed, "states/s") + // } + // } + + } else { + // assign trie node Heal Tasks + if len(healtask.hashes) > 0 { + root := sdm.root + task := healtask.task + hashes := healtask.hashes + pathsets := healtask.pathsets + paths := healtask.paths + + nodes, stid, err := sss.configs.protocol.GetTrieNodes(ctx, root, pathsets, maxRequestSize) + if err != nil { + return + } + if err := sdm.HandleTrieNodeHealRequestResult(task, paths, hashes, nodes, loopID, stid); err != nil { + return + } + } + + if len(codetask.hashes) > 0 { + task := codetask.task + hashes := codetask.hashes + codes, stid, err := sss.configs.protocol.GetByteCodes(ctx, hashes, maxRequestSize) + if err != nil { + return + } + if err := sdm.HandleBytecodeRequestResult(task, hashes, codes, loopID, stid); err != nil { + return + } + } + } + } +} + +func (sss *StageFullStateSync) downloadByteCodes(ctx context.Context, sdm *FullStateDownloadManager, codeTasks []*byteCodeTasksBundle, loopID int) (stid sttypes.StreamID, err error) { + for _, codeTask := range codeTasks { + // try to get byte codes from remote peer + // if any of them failed, the stid will be the id of the failed stream + retCodes, stid, err := sss.configs.protocol.GetByteCodes(ctx, codeTask.hashes, maxRequestSize) + if err != nil { + return stid, err + } + if err = sdm.HandleBytecodeRequestResult(codeTask.task, codeTask.hashes, retCodes, loopID, stid); err != nil { + return stid, err + } + } + return +} + +func (sss *StageFullStateSync) downloadStorages(ctx context.Context, sdm *FullStateDownloadManager, codeTasks []*byteCodeTasksBundle, loopID int) (stid sttypes.StreamID, err error) { + for _, codeTask := range codeTasks { + // try to get byte codes from remote peer + // if any of them failed, the stid will be the id of failed stream + retCodes, stid, err := sss.configs.protocol.GetByteCodes(ctx, codeTask.hashes, maxRequestSize) + if err != nil { + return stid, err + } + if err = sdm.HandleBytecodeRequestResult(codeTask.task, codeTask.hashes, retCodes, loopID, stid); err != nil { + return stid, err + } + } + return +} + +// func (sss *StageFullStateSync) downloadStates(ctx context.Context, +// root common.Hash, +// origin common.Hash, +// accounts []*accountTask, +// codes []common.Hash, +// storages *storageTaskBundle) ([][]byte, sttypes.StreamID, error) { + +// ctx, cancel := context.WithTimeout(ctx, 10*time.Second) +// defer cancel() + +// // if there is any account task, first we have to complete that +// if len(accounts) > 0 { + +// } +// // hashes := append(codes, nodes...) +// // data, stid, err := sss.configs.protocol.GetNodeData(ctx, hashes) +// // if err != nil { +// // return nil, stid, err +// // } +// // if err := validateGetNodeDataResult(hashes, data); err != nil { +// // return nil, stid, err +// // } +// return data, stid, nil +// } + +func (stg *StageFullStateSync) insertChain(gbm *blockDownloadManager, + protocol syncProtocol, + lbls prometheus.Labels, + targetBN uint64) { + +} + +func (stg *StageFullStateSync) saveProgress(s *StageState, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, s.state.CurrentBlockNumber()); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block States stage failed") + return ErrSaveStateProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageFullStateSync) Revert(ctx context.Context, firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageFullStateSync) CleanUp(ctx context.Context, firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/state_sync.go b/api/service/stagedstreamsync/state_sync_full.go similarity index 80% rename from api/service/stagedstreamsync/state_sync.go rename to api/service/stagedstreamsync/state_sync_full.go index 1bf685826..daf0f4869 100644 --- a/api/service/stagedstreamsync/state_sync.go +++ b/api/service/stagedstreamsync/state_sync_full.go @@ -3,6 +3,7 @@ package stagedstreamsync import ( "bytes" "encoding/json" + "fmt" gomath "math" "math/big" "math/rand" @@ -17,11 +18,14 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + + //"github.com/ethereum/go-ethereum/trie/trienode" "github.com/harmony-one/harmony/common/math" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/rawdb" "github.com/harmony-one/harmony/core/state" "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/p2p/stream/protocols/sync/message" sttypes "github.com/harmony-one/harmony/p2p/stream/types" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/log/v3" @@ -191,7 +195,7 @@ func (t *healRequestSort) Swap(i, j int) { // Merge merges the pathsets, so that several storage requests concerning the // same account are merged into one, to reduce bandwidth. // This operation is moot if t has not first been sorted. -func (t *healRequestSort) Merge() []TrieNodePathSet { +func (t *healRequestSort) Merge() []*message.TrieNodePathSet { var result []TrieNodePathSet for _, path := range t.syncPaths { pathset := TrieNodePathSet(path) @@ -211,7 +215,20 @@ func (t *healRequestSort) Merge() []TrieNodePathSet { } } } - return result + // convert to array of pointers + result_ptr := make([]*message.TrieNodePathSet, 0) + for _, p := range result { + result_ptr = append(result_ptr, &message.TrieNodePathSet{ + Pathset: p, + }) + } + return result_ptr +} + +type byteCodeTasksBundle struct { + id uint64 //unique id for bytecode task bundle + task *accountTask + hashes []common.Hash } type storageTaskBundle struct { @@ -231,16 +248,16 @@ type healTask struct { codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash paths []string hashes []common.Hash - pathsets []TrieNodePathSet + pathsets []*message.TrieNodePathSet task *healTask root common.Hash byteCodeReq bool } type tasks struct { - accountTasks map[uint64]*accountTask // Current account task set being synced - storageTasks map[uint64]*storageTaskBundle // Set of trie node tasks currently queued for retrieval, indexed by path - codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by hash + accountTasks map[uint64]*accountTask // Current account task set being synced + storageTasks map[uint64]*storageTaskBundle // Set of trie node tasks currently queued for retrieval, indexed by path + codeTasks map[uint64]*byteCodeTasksBundle // Set of byte code tasks currently queued for retrieval, indexed by hash healer map[uint64]*healTask snapped bool // Flag to signal that snap phase is done } @@ -249,7 +266,7 @@ func newTasks() *tasks { return &tasks{ accountTasks: make(map[uint64]*accountTask, 0), storageTasks: make(map[uint64]*storageTaskBundle, 0), - codeTasks: make(map[common.Hash]struct{}), + codeTasks: make(map[uint64]*byteCodeTasksBundle), healer: make(map[uint64]*healTask, 0), snapped: false, } @@ -272,13 +289,13 @@ func (t *tasks) deleteAccountTask(accountTaskIndex uint64) { } } -func (t *tasks) addCodeTask(h common.Hash) { - t.codeTasks[h] = struct{}{} +func (t *tasks) addCodeTask(id uint64, bytecodeTask *byteCodeTasksBundle) { + t.codeTasks[id] = bytecodeTask } -func (t *tasks) deleteCodeTask(hash common.Hash) { - if _, ok := t.codeTasks[hash]; ok { - delete(t.codeTasks, hash) +func (t *tasks) deleteCodeTask(id uint64) { + if _, ok := t.codeTasks[id]; ok { + delete(t.codeTasks, id) } } @@ -500,33 +517,6 @@ func FullAccountRLP(data []byte) ([]byte, error) { return rlp.EncodeToBytes(account) } -// onHealState is a callback method to invoke when a flat state(account -// or storage slot) is downloaded during the healing stage. The flat states -// can be persisted blindly and can be fixed later in the generation stage. -// Note it's not concurrent safe, please handle the concurrent issue outside. -func (s *FullStateDownloadManager) onHealState(paths [][]byte, value []byte) error { - if len(paths) == 1 { - var account types.StateAccount - if err := rlp.DecodeBytes(value, &account); err != nil { - return nil // Returning the error here would drop the remote peer - } - blob := s.SlimAccountRLP(account) - rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) - s.accountHealed += 1 - s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) - } - if len(paths) == 2 { - rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) - s.storageHealed += 1 - s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) - } - if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { - s.stateWriter.Write() // It's fine to ignore the error here - s.stateWriter.Reset() - } - return nil -} - func (s *FullStateDownloadManager) commitHealer(force bool) { if !force && s.scheduler.MemSize() < ethdb.IdealBatchSize { return @@ -572,7 +562,7 @@ func (s *FullStateDownloadManager) SyncCompleted() { // getNextBatch returns objects with a maximum of n state download // tasks to send to the remote peer. func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, - codes []common.Hash, + codes []*byteCodeTasksBundle, storages *storageTaskBundle, healtask *healTask, codetask *healTask, @@ -936,13 +926,13 @@ func (s *FullStateDownloadManager) updateStats(written, duplicate, unexpected in // tasks to send to the remote peer. func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks bool) ( accounts []*accountTask, - codes []common.Hash, + codes []*byteCodeTasksBundle, storages *storageTaskBundle, healtask *healTask, codetask *healTask) { // over trie nodes as those can be written to disk and forgotten about. - codes = make([]common.Hash, 0, n) + codes = make([]*byteCodeTasksBundle, 0, n) accounts = make([]*accountTask, 0, n) for i, task := range s.tasks.accountTasks { @@ -961,9 +951,12 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks accounts = append(accounts, task) s.requesting.addAccountTask(task.id, task) // s.tasks.deleteAccountTask(task) + + // one task account is enough for an stream + return } - cap := n - len(accounts) + cap := n // - len(accounts) for _, task := range s.tasks.accountTasks { // Skip tasks that are already retrieving (or done with) all codes @@ -971,19 +964,42 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks continue } + var hashes []common.Hash for hash := range task.codeTasks { delete(task.codeTasks, hash) - codes = append(codes, hash) - s.requesting.addCodeTask(hash) - s.tasks.deleteCodeTask(hash) - // Stop when we've gathered enough requests - if len(codes) >= cap { - return + hashes = append(hashes, hash) + } + + // create a unique id for task bundle + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue } + if _, ok := s.tasks.codeTasks[taskID]; ok { + continue + } + break + } + + bytecodeTask := &byteCodeTasksBundle{ + id: taskID, + hashes: hashes, + task: task, + } + codes = append(codes, bytecodeTask) + + s.requesting.addCodeTask(taskID, bytecodeTask) + //s.tasks.deleteCodeTask(taskID) + + // Stop when we've gathered enough requests + if len(codes) >= cap { + return } } - cap = n - len(accounts) - len(codes) + cap = n - len(codes) // - len(accounts) for accTaskID, task := range s.tasks.accountTasks { // Skip tasks that are already retrieving (or done with) all small states @@ -1118,7 +1134,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks var ( hashes = make([]common.Hash, 0, cap) paths = make([]string, 0, cap) - pathsets = make([]TrieNodePathSet, 0, cap) + pathsets = make([]*message.TrieNodePathSet, 0, cap) ) for path, hash := range s.tasks.healer[0].trieTasks { delete(s.tasks.healer[0].trieTasks, path) @@ -1228,7 +1244,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates // the TrieNodePaths and merges paths which belongs to the same account path. -func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { +func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []*message.TrieNodePathSet) { var syncPaths []trie.SyncPath for _, path := range paths { syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) @@ -1242,14 +1258,14 @@ func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common // getBatchFromRetries get the block number batch to be requested from retries. func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( accounts []*accountTask, - codes []common.Hash, + codes []*byteCodeTasksBundle, storages *storageTaskBundle, healtask *healTask, codetask *healTask) { // over trie nodes as those can be written to disk and forgotten about. - accounts = make([]*accountTask, 0, n) - codes = make([]common.Hash, 0, n) + accounts = make([]*accountTask, 0) + codes = make([]*byteCodeTasksBundle, 0) for _, task := range s.retries.accountTasks { // Stop when we've gathered enough requests @@ -1263,14 +1279,14 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( cap := n - len(accounts) - for code := range s.retries.codeTasks { + for _, code := range s.retries.codeTasks { // Stop when we've gathered enough requests if len(codes) >= cap { return } codes = append(codes, code) - s.requesting.addCodeTask(code) - s.retries.deleteCodeTask(code) + s.requesting.addCodeTask(code.id, code) + s.retries.deleteCodeTask(code.id) } cap = n - len(accounts) - len(codes) @@ -1339,7 +1355,7 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( // HandleRequestError handles the error result func (s *FullStateDownloadManager) HandleRequestError(accounts []*accountTask, - codes []common.Hash, + codes []*byteCodeTasksBundle, storages *storageTaskBundle, healtask *healTask, codetask *healTask, @@ -1354,8 +1370,8 @@ func (s *FullStateDownloadManager) HandleRequestError(accounts []*accountTask, } for _, code := range codes { - s.requesting.deleteCodeTask(code) - s.retries.addCodeTask(code) + s.requesting.deleteCodeTask(code.id) + s.retries.addCodeTask(code.id, code) } if storages != nil { @@ -1374,18 +1390,99 @@ func (s *FullStateDownloadManager) HandleRequestError(accounts []*accountTask, } } +// UnpackAccountRanges retrieves the accounts from the range packet and converts from slim +// wire representation to consensus format. The returned data is RLP encoded +// since it's expected to be serialized to disk without further interpretation. +// +// Note, this method does a round of RLP decoding and re-encoding, so only use it +// once and cache the results if need be. Ideally discard the packet afterwards +// to not double the memory use. +func (s *FullStateDownloadManager) UnpackAccountRanges(retAccounts []*message.AccountData) ([]common.Hash, [][]byte, error) { + var ( + hashes = make([]common.Hash, len(retAccounts)) + accounts = make([][]byte, len(retAccounts)) + ) + for i, acc := range retAccounts { + val, err := FullAccountRLP(acc.Body) + if err != nil { + return nil, nil, fmt.Errorf("invalid account %x: %v", acc.Body, err) + } + hashes[i] = common.BytesToHash(acc.Hash) + accounts[i] = val + } + return hashes, accounts, nil +} + // HandleAccountRequestResult handles get account ranges result -func (s *FullStateDownloadManager) HandleAccountRequestResult(task *accountTask, // Task which this request is filling - hashes []common.Hash, // Account hashes in the returned range - accounts []*types.StateAccount, // Expanded accounts in the returned range - cont bool, // Whether the account range has a continuation +func (s *FullStateDownloadManager) HandleAccountRequestResult(task *accountTask, + retAccounts []*message.AccountData, + proof [][]byte, + origin []byte, + last []byte, loopID int, streamID sttypes.StreamID) error { + hashes, accounts, err := s.UnpackAccountRanges(retAccounts) + if err != nil { + return err + } + + size := common.StorageSize(len(hashes) * common.HashLength) + for _, account := range accounts { + size += common.StorageSize(len(account)) + } + for _, node := range proof { + size += common.StorageSize(len(node)) + } + utils.Logger().Trace(). + Int("hashes", len(hashes)). + Int("accounts", len(accounts)). + Int("proofs", len(proof)). + Interface("bytes", size). + Msg("Delivering range of accounts") + s.lock.Lock() defer s.lock.Unlock() - if err := s.processAccountResponse(task, hashes, accounts, cont); err != nil { + // Response is valid, but check if peer is signalling that it does not have + // the requested data. For account range queries that means the state being + // retrieved was either already pruned remotely, or the peer is not yet + // synced to our head. + if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { + utils.Logger().Debug(). + Interface("root", s.root). + Msg("Peer rejected account range request") + s.lock.Unlock() + return nil + } + root := s.root + s.lock.Unlock() + + // Reconstruct a partial trie from the response and verify it + keys := make([][]byte, len(hashes)) + for i, key := range hashes { + keys[i] = common.CopyBytes(key[:]) + } + nodes := make(ProofList, len(proof)) + for i, node := range proof { + nodes[i] = node + } + cont, err := trie.VerifyRangeProof(root, origin[:], last[:], keys, accounts, nodes.Set()) + if err != nil { + utils.Logger().Warn().Err(err).Msg("Account range failed proof") + // Signal this request as failed, and ready for rescheduling + return err + } + accs := make([]*types.StateAccount, len(accounts)) + for i, account := range accounts { + acc := new(types.StateAccount) + if err := rlp.DecodeBytes(account, acc); err != nil { + panic(err) // We created these blobs, we must be able to decode them + } + accs[i] = acc + } + + if err := s.processAccountResponse(task, hashes, accs, cont); err != nil { return err } @@ -1491,16 +1588,72 @@ func (s *FullStateDownloadManager) processAccountResponse(task *accountTask, // } // HandleBytecodeRequestResult handles get bytecode result -func (s *FullStateDownloadManager) HandleBytecodeRequestResult(task *accountTask, // Task which this request is filling - hashes []common.Hash, // Hashes of the bytecode to avoid double hashing +// it is a callback method to invoke when a batch of contract +// bytes codes are received from a remote peer. +func (s *FullStateDownloadManager) HandleBytecodeRequestResult(task interface{}, // Task which this request is filling + reqHashes []common.Hash, // Hashes of the bytecode to avoid double hashing bytecodes [][]byte, // Actual bytecodes to store into the database (nil = missing) loopID int, streamID sttypes.StreamID) error { + s.lock.RLock() + syncing := !s.snapped + s.lock.RUnlock() + + if syncing { + return s.onByteCodes(task.(*accountTask), bytecodes, reqHashes) + } + return s.onHealByteCodes(task.(*healTask), reqHashes, bytecodes) +} + +// onByteCodes is a callback method to invoke when a batch of contract +// bytes codes are received from a remote peer in the syncing phase. +func (s *FullStateDownloadManager) onByteCodes(task *accountTask, bytecodes [][]byte, reqHashes []common.Hash) error { + var size common.StorageSize + for _, code := range bytecodes { + size += common.StorageSize(len(code)) + } + + utils.Logger().Trace().Int("bytecodes", len(bytecodes)).Interface("bytes", size).Msg("Delivering set of bytecodes") + s.lock.Lock() defer s.lock.Unlock() - if err := s.processBytecodeResponse(task, hashes, bytecodes); err != nil { + // Response is valid, but check if peer is signalling that it does not have + // the requested data. For bytecode range queries that means the peer is not + // yet synced. + if len(bytecodes) == 0 { + utils.Logger().Debug().Msg("Peer rejected bytecode request") + return nil + } + + // Cross reference the requested bytecodes with the response to find gaps + // that the serving node is missing + hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) + hash := make([]byte, 32) + + codes := make([][]byte, len(reqHashes)) + for i, j := 0, 0; i < len(bytecodes); i++ { + // Find the next hash that we've been served, leaving misses with nils + hasher.Reset() + hasher.Write(bytecodes[i]) + hasher.Read(hash) + + for j < len(reqHashes) && !bytes.Equal(hash, reqHashes[j][:]) { + j++ + } + if j < len(reqHashes) { + codes[j] = bytecodes[i] + j++ + continue + } + // We've either ran out of hashes, or got unrequested data + utils.Logger().Warn().Int("count", len(bytecodes)-i).Msg("Unexpected bytecodes") + // Signal this request as failed, and ready for rescheduling + return errors.New("unexpected bytecode") + } + // Response validated, send it to the scheduler for filling + if err := s.processBytecodeResponse(task, reqHashes, codes); err != nil { return err } @@ -1574,21 +1727,143 @@ func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { return space.Uint64() - uint64(hashes), nil } -// HandleStorageRequestResult handles get storages result -func (s *FullStateDownloadManager) HandleStorageRequestResult(mainTask *accountTask, // Task which this response belongs to - subTask *storageTask, // Task which this response is filling - accounts []common.Hash, // Account hashes requested, may be only partially filled - roots []common.Hash, // Storage roots requested, may be only partially filled - hashes [][]common.Hash, // Storage slot hashes in the returned range - storageSlots [][][]byte, // Storage slot values in the returned range - cont bool, // Whether the last storage range has a continuation +// Unpack retrieves the storage slots from the range packet and returns them in +// a split flat format that's more consistent with the internal data structures. +func (s *FullStateDownloadManager) UnpackStorages(slots [][]*message.StorageData) ([][]common.Hash, [][][]byte) { + var ( + hashset = make([][]common.Hash, len(slots)) + slotset = make([][][]byte, len(slots)) + ) + for i, slots := range slots { + hashset[i] = make([]common.Hash, len(slots)) + slotset[i] = make([][]byte, len(slots)) + for j, slot := range slots { + hashset[i][j] = common.BytesToHash(slot.Hash) + slotset[i][j] = slot.Body + } + } + return hashset, slotset +} + +// HandleStorageRequestResult handles get storages result when ranges of storage slots +// are received from a remote peer. +func (s *FullStateDownloadManager) HandleStorageRequestResult(mainTask *accountTask, + subTask *storageTask, + reqAccounts []common.Hash, + roots []common.Hash, + origin common.Hash, + limit common.Hash, + receivedSlots [][]*message.StorageData, + proof [][]byte, loopID int, streamID sttypes.StreamID) error { s.lock.Lock() defer s.lock.Unlock() - if err := s.processStorageResponse(mainTask, subTask, accounts, roots, hashes, storageSlots, cont); err != nil { + hashes, slots := s.UnpackStorages(receivedSlots) + + // Gather some trace stats to aid in debugging issues + var ( + hashCount int + slotCount int + size common.StorageSize + ) + for _, hashset := range hashes { + size += common.StorageSize(common.HashLength * len(hashset)) + hashCount += len(hashset) + } + for _, slotset := range slots { + for _, slot := range slotset { + size += common.StorageSize(len(slot)) + } + slotCount += len(slotset) + } + for _, node := range proof { + size += common.StorageSize(len(node)) + } + + utils.Logger().Trace(). + Int("accounts", len(hashes)). + Int("hashes", hashCount). + Int("slots", slotCount). + Int("proofs", len(proof)). + Interface("size", size). + Msg("Delivering ranges of storage slots") + + s.lock.Lock() + defer s.lock.Unlock() + + // Reject the response if the hash sets and slot sets don't match, or if the + // peer sent more data than requested. + if len(hashes) != len(slots) { + utils.Logger().Warn(). + Int("hashset", len(hashes)). + Int("slotset", len(slots)). + Msg("Hash and slot set size mismatch") + return errors.New("hash and slot set size mismatch") + } + if len(hashes) > len(reqAccounts) { + utils.Logger().Warn(). + Int("hashset", len(hashes)). + Int("requested", len(reqAccounts)). + Msg("Hash set larger than requested") + return errors.New("hash set larger than requested") + } + // Response is valid, but check if peer is signalling that it does not have + // the requested data. For storage range queries that means the state being + // retrieved was either already pruned remotely, or the peer is not yet + // synced to our head. + if len(hashes) == 0 && len(proof) == 0 { + utils.Logger().Debug().Msg("Peer rejected storage request") + return nil + } + + // Reconstruct the partial tries from the response and verify them + var cont bool + + // If a proof was attached while the response is empty, it indicates that the + // requested range specified with 'origin' is empty. Construct an empty state + // response locally to finalize the range. + if len(hashes) == 0 && len(proof) > 0 { + hashes = append(hashes, []common.Hash{}) + slots = append(slots, [][]byte{}) + } + for i := 0; i < len(hashes); i++ { + // Convert the keys and proofs into an internal format + keys := make([][]byte, len(hashes[i])) + for j, key := range hashes[i] { + keys[j] = common.CopyBytes(key[:]) + } + nodes := make(ProofList, 0, len(proof)) + if i == len(hashes)-1 { + for _, node := range proof { + nodes = append(nodes, node) + } + } + var err error + if len(nodes) == 0 { + // No proof has been attached, the response must cover the entire key + // space and hash to the origin root. + _, err = trie.VerifyRangeProof(roots[i], nil, nil, keys, slots[i], nil) + if err != nil { + utils.Logger().Warn().Err(err).Msg("Storage slots failed proof") + return err + } + } else { + // A proof was attached, the response is only partial, check that the + // returned data is indeed part of the storage trie + proofdb := nodes.Set() + + cont, err = trie.VerifyRangeProof(roots[i], origin[:], limit[:], keys, slots[i], proofdb) + if err != nil { + utils.Logger().Warn().Err(err).Msg("Storage range failed proof") + return err + } + } + } + + if err := s.processStorageResponse(mainTask, subTask, reqAccounts, roots, hashes, slots, cont); err != nil { return err } @@ -1835,18 +2110,72 @@ func (s *FullStateDownloadManager) processStorageResponse(mainTask *accountTask, return nil } -// HandleTrieNodeHealRequestResult handles get trie nodes heal result +// HandleTrieNodeHealRequestResult handles get trie nodes heal result when a batch of trie nodes +// are received from a remote peer. func (s *FullStateDownloadManager) HandleTrieNodeHealRequestResult(task *healTask, // Task which this request is filling - paths []string, // Paths of the trie nodes - hashes []common.Hash, // Hashes of the trie nodes to avoid double hashing - nodes [][]byte, // Actual trie nodes to store into the database (nil = missing) + reqPaths []string, + reqHashes []common.Hash, + trienodes [][]byte, loopID int, streamID sttypes.StreamID) error { s.lock.Lock() defer s.lock.Unlock() - if err := s.processTrienodeHealResponse(task, paths, hashes, nodes); err != nil { + var size common.StorageSize + for _, node := range trienodes { + size += common.StorageSize(len(node)) + } + + utils.Logger().Trace(). + Int("trienodes", len(trienodes)). + Interface("bytes", size). + Msg("Delivering set of healing trienodes") + + // Response is valid, but check if peer is signalling that it does not have + // the requested data. For bytecode range queries that means the peer is not + // yet synced. + if len(trienodes) == 0 { + utils.Logger().Debug().Msg("Peer rejected trienode heal request") + return nil + } + + // Cross reference the requested trienodes with the response to find gaps + // that the serving node is missing + var ( + hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) + hash = make([]byte, 32) + nodes = make([][]byte, len(reqHashes)) + fills uint64 + ) + for i, j := 0, 0; i < len(trienodes); i++ { + // Find the next hash that we've been served, leaving misses with nils + hasher.Reset() + hasher.Write(trienodes[i]) + hasher.Read(hash) + + for j < len(reqHashes) && !bytes.Equal(hash, reqHashes[j][:]) { + j++ + } + if j < len(reqHashes) { + nodes[j] = trienodes[i] + fills++ + j++ + continue + } + // We've either ran out of hashes, or got unrequested data + utils.Logger().Warn().Int("count", len(trienodes)-i).Msg("Unexpected healing trienodes") + + // Signal this request as failed, and ready for rescheduling + return errors.New("unexpected healing trienode") + } + // Response validated, send it to the scheduler for filling + s.trienodeHealPend.Add(fills) + defer func() { + s.trienodeHealPend.Add(^(fills - 1)) + }() + + if err := s.processTrienodeHealResponse(task, reqPaths, reqHashes, nodes); err != nil { return err } @@ -1959,6 +2288,67 @@ func (s *FullStateDownloadManager) HandleByteCodeHealRequestResult(task *healTas return nil } +// onHealByteCodes is a callback method to invoke when a batch of contract +// bytes codes are received from a remote peer in the healing phase. +func (s *FullStateDownloadManager) onHealByteCodes(task *healTask, + reqHashes []common.Hash, + bytecodes [][]byte) error { + + var size common.StorageSize + for _, code := range bytecodes { + size += common.StorageSize(len(code)) + } + + utils.Logger().Trace(). + Int("bytecodes", len(bytecodes)). + Interface("bytes", size). + Msg("Delivering set of healing bytecodes") + + s.lock.Lock() + s.lock.Unlock() + + // Response is valid, but check if peer is signalling that it does not have + // the requested data. For bytecode range queries that means the peer is not + // yet synced. + if len(bytecodes) == 0 { + utils.Logger().Debug().Msg("Peer rejected bytecode heal request") + return nil + } + + // Cross reference the requested bytecodes with the response to find gaps + // that the serving node is missing + hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) + hash := make([]byte, 32) + + codes := make([][]byte, len(reqHashes)) + for i, j := 0, 0; i < len(bytecodes); i++ { + // Find the next hash that we've been served, leaving misses with nils + hasher.Reset() + hasher.Write(bytecodes[i]) + hasher.Read(hash) + + for j < len(reqHashes) && !bytes.Equal(hash, reqHashes[j][:]) { + j++ + } + if j < len(reqHashes) { + codes[j] = bytecodes[i] + j++ + continue + } + // We've either ran out of hashes, or got unrequested data + utils.Logger().Warn().Int("count", len(bytecodes)-i).Msg("Unexpected healing bytecodes") + + // Signal this request as failed, and ready for rescheduling + return errors.New("unexpected healing bytecode") + } + + if err := s.processBytecodeHealResponse(task, reqHashes, codes); err != nil { + return err + } + + return nil +} + // processBytecodeHealResponse integrates an already validated bytecode response // into the healer tasks. func (s *FullStateDownloadManager) processBytecodeHealResponse(task *healTask, // Task which this request is filling @@ -1992,3 +2382,30 @@ func (s *FullStateDownloadManager) processBytecodeHealResponse(task *healTask, / return nil } + +// onHealState is a callback method to invoke when a flat state(account +// or storage slot) is downloaded during the healing stage. The flat states +// can be persisted blindly and can be fixed later in the generation stage. +// Note it's not concurrent safe, please handle the concurrent issue outside. +func (s *FullStateDownloadManager) onHealState(paths [][]byte, value []byte) error { + if len(paths) == 1 { + var account types.StateAccount + if err := rlp.DecodeBytes(value, &account); err != nil { + return nil // Returning the error here would drop the remote peer + } + blob := s.SlimAccountRLP(account) + rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) + s.accountHealed += 1 + s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) + } + if len(paths) == 2 { + rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) + s.storageHealed += 1 + s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) + } + if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { + s.stateWriter.Write() // It's fine to ignore the error here + s.stateWriter.Reset() + } + return nil +} diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go index 73f050080..e6879a523 100644 --- a/api/service/stagedstreamsync/syncing.go +++ b/api/service/stagedstreamsync/syncing.go @@ -367,7 +367,7 @@ func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bo } // add consensus last mile blocks - if s.consensus != nil { + if s.consensus != nil && s.isBeaconNode { if hashes, err := s.addConsensusLastMile(s.Blockchain(), s.consensus); err != nil { utils.Logger().Error().Err(err). Msg("[STAGED_STREAM_SYNC] Add consensus last mile failed") diff --git a/p2p/stream/protocols/sync/chain.go b/p2p/stream/protocols/sync/chain.go index aa4dced3f..3c147c91a 100644 --- a/p2p/stream/protocols/sync/chain.go +++ b/p2p/stream/protocols/sync/chain.go @@ -199,7 +199,7 @@ func (ch *chainHelperImpl) getReceipts(hs []common.Hash) ([]types.Receipts, erro return receipts, nil } -// getAccountRangeRequest +// getAccountRange func (ch *chainHelperImpl) getAccountRange(root common.Hash, origin common.Hash, limit common.Hash, bytes uint64) ([]*message.AccountData, [][]byte, error) { if bytes > softResponseLimit { bytes = softResponseLimit From f3ce9f3ac927268465a8afe81c41c6a9c88c5dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Mon, 11 Dec 2023 22:30:19 +0800 Subject: [PATCH 54/56] return back deleted codes, fix rebase issues, goimports --- api/service/stagedstreamsync/range.go | 2 +- .../stagedstreamsync/stage_statesync.go | 4 +- core/blockchain.go | 2 + core/blockchain_impl.go | 185 ++++++------------ 4 files changed, 67 insertions(+), 126 deletions(-) diff --git a/api/service/stagedstreamsync/range.go b/api/service/stagedstreamsync/range.go index de18b02ab..d05a92ed4 100644 --- a/api/service/stagedstreamsync/range.go +++ b/api/service/stagedstreamsync/range.go @@ -81,4 +81,4 @@ func incHash(h common.Hash) common.Hash { a.SetBytes32(h[:]) a.AddUint64(&a, 1) return common.Hash(a.Bytes32()) -} \ No newline at end of file +} diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 086d0fb41..4928b71b0 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -58,8 +58,8 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo // for short range sync, skip this step if !s.state.initSync { return nil - } // only execute this stage in fast/snap sync mode and once we reach to pivot - + } // only execute this stage in fast/snap sync mode and once we reach to pivot + if s.state.status.pivotBlock == nil || s.state.CurrentBlockNumber() != s.state.status.pivotBlock.NumberU64() || s.state.status.statesSynced { diff --git a/core/blockchain.go b/core/blockchain.go index 1f7233f42..f47133bad 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -105,6 +105,8 @@ type BlockChain interface { // Rollback is designed to remove a chain of links from the database that aren't // certain enough to be valid. Rollback(chain []common.Hash) error + // writeHeadBlock writes a new head block + WriteHeadBlock(block *types.Block) error // WriteBlockWithoutState writes only the block and its metadata to the database, // but does not write any state. This is used to construct competing side forks // up to the point where they exceed the canonical total difficulty. diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 15527c3fe..c7f01d413 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -34,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/mclock" "github.com/ethereum/go-ethereum/common/prque" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/metrics" @@ -69,8 +70,9 @@ import ( ) var ( - headBlockGauge = metrics.NewRegisteredGauge("chain/head/block", nil) - headHeaderGauge = metrics.NewRegisteredGauge("chain/head/header", nil) + headBlockGauge = metrics.NewRegisteredGauge("chain/head/block", nil) + headHeaderGauge = metrics.NewRegisteredGauge("chain/head/header", nil) + headFastBlockGauge = metrics.NewRegisteredGauge("chain/head/receipt", nil) accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) @@ -185,7 +187,8 @@ type BlockChainImpl struct { pendingCrossLinksMutex sync.RWMutex // pending crosslinks lock pendingSlashingCandidatesMU sync.RWMutex // pending slashing candidates - currentBlock atomic.Value // Current head of the block chain + currentBlock atomic.Value // Current head of the block chain + currentFastBlock atomic.Value // Current head of the fast-sync chain (may be above the block chain!) stateCache state.Database // State database to reuse between imports (contains state cache) bodyCache *lru.Cache // Cache for the most recent block bodies @@ -319,6 +322,7 @@ func newBlockChainWithOptions( } var nilBlock *types.Block bc.currentBlock.Store(nilBlock) + bc.currentFastBlock.Store(nilBlock) if err := bc.loadLastState(); err != nil { return nil, err } @@ -612,8 +616,22 @@ func (bc *BlockChainImpl) loadLastState() error { return errors.Wrap(err, "headerChain SetCurrentHeader") } + // Restore the last known head fast block + bc.currentFastBlock.Store(currentBlock) + headFastBlockGauge.Update(int64(currentBlock.NumberU64())) + if head := rawdb.ReadHeadFastBlockHash(bc.db); head != (common.Hash{}) { + if block := bc.GetBlockByHash(head); block != nil { + bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) + } + } + + // Issue a status log for the user + currentFastBlock := bc.CurrentFastBlock() + headerTd := bc.GetTd(currentHeader.Hash(), currentHeader.Number().Uint64()) blockTd := bc.GetTd(currentBlock.Hash(), currentBlock.NumberU64()) + fastTd := bc.GetTd(currentFastBlock.Hash(), currentFastBlock.NumberU64()) utils.Logger().Info(). Str("number", currentHeader.Number().String()). @@ -627,6 +645,12 @@ func (bc *BlockChainImpl) loadLastState() error { Str("td", blockTd.String()). Str("age", common.PrettyAge(time.Unix(currentBlock.Time().Int64(), 0)).String()). Msg("Loaded most recent local full block") + utils.Logger().Info(). + Str("number", currentFastBlock.Number().String()). + Str("hash", currentFastBlock.Hash().Hex()). + Str("td", fastTd.String()). + Str("age", common.PrettyAge(time.Unix(currentFastBlock.Time().Int64(), 0)).String()). + Msg("Loaded most recent local fast block") return nil } @@ -663,16 +687,30 @@ func (bc *BlockChainImpl) setHead(head uint64) error { headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } } + // Rewind the fast block in a simpleton way to the target head + if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && currentHeader.Number().Uint64() < currentFastBlock.NumberU64() { + newHeadFastBlock := bc.GetBlock(currentHeader.Hash(), currentHeader.Number().Uint64()) + bc.currentFastBlock.Store(newHeadFastBlock) + headFastBlockGauge.Update(int64(newHeadFastBlock.NumberU64())) + } // If either blocks reached nil, reset to the genesis state if currentBlock := bc.CurrentBlock(); currentBlock == nil { bc.currentBlock.Store(bc.genesisBlock) headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } + if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock == nil { + bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) + } currentBlock := bc.CurrentBlock() + currentFastBlock := bc.CurrentFastBlock() if err := rawdb.WriteHeadBlockHash(bc.db, currentBlock.Hash()); err != nil { return err } + if err := rawdb.WriteHeadFastBlockHash(bc.db, currentFastBlock.Hash()); err != nil { + return err + } return bc.loadLastState() } @@ -738,6 +776,8 @@ func (bc *BlockChainImpl) resetWithGenesisBlock(genesis *types.Block) error { } bc.currentBlock.Store(bc.genesisBlock) headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) + bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) return nil } @@ -839,6 +879,10 @@ func (bc *BlockChainImpl) ExportN(w io.Writer, first uint64, last uint64) error return nil } +func (bc *BlockChainImpl) WriteHeadBlock(block *types.Block) error { + return bc.writeHeadBlock(block) +} + // writeHeadBlock writes a new head block func (bc *BlockChainImpl) writeHeadBlock(block *types.Block) error { // If the block is on a side chain or an unknown one, force other heads onto it too @@ -881,6 +925,9 @@ func (bc *BlockChainImpl) writeHeadBlock(block *types.Block) error { if err := rawdb.WriteHeadFastBlockHash(bc.db, block.Hash()); err != nil { return err } + + bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) } return nil } @@ -894,6 +941,9 @@ func (bc *BlockChainImpl) tikvFastForward(block *types.Block, logs []*types.Log) return errors.Wrap(err, "HeaderChain SetCurrentHeader") } + bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) + var events []interface{} events = append(events, ChainEvent{block, block.Hash(), logs}) events = append(events, ChainHeadEvent{block}) @@ -1195,6 +1245,14 @@ func (bc *BlockChainImpl) Rollback(chain []common.Hash) error { } } } + if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && currentFastBlock.Hash() == hash { + newFastBlock := bc.GetBlock(currentFastBlock.ParentHash(), currentFastBlock.NumberU64()-1) + if newFastBlock != nil { + bc.currentFastBlock.Store(newFastBlock) + headFastBlockGauge.Update(int64(newFastBlock.NumberU64())) + rawdb.WriteHeadFastBlockHash(bc.db, newFastBlock.Hash()) + } + } if currentBlock := bc.CurrentBlock(); currentBlock != nil && currentBlock.Hash() == hash { newBlock := bc.GetBlock(currentBlock.ParentHash(), currentBlock.NumberU64()-1) if newBlock != nil { @@ -1792,7 +1850,7 @@ func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (i // Write the block to the chain and get the status. substart = time.Now() - status, err := bc.writeBlockWithState( + status, err := bc.WriteBlockWithState( block, receipts, cxReceipts, stakeMsgs, payout, state, ) if err != nil { @@ -1848,125 +1906,6 @@ func (bc *BlockChainImpl) insertChain(chain types.Blocks, verifyHeaders bool) (i return 0, events, coalescedLogs, nil } -// insertChainWithoutBlockExecution adds a set of blocks to blockchain without adding states -func (bc *BlockChainImpl) insertChainWithoutBlockExecution(chain types.Blocks, verifyHeaders bool) (int, []interface{}, []*types.Log, error) { - // Sanity check that we have something meaningful to import - if len(chain) == 0 { - return 0, nil, nil, nil - } - // Do a sanity check that the provided chain is actually ordered and linked - for i := 1; i < len(chain); i++ { - if chain[i].NumberU64() != chain[i-1].NumberU64()+1 || chain[i].ParentHash() != chain[i-1].Hash() { - // Chain broke ancestry, log a message (programming error) and skip insertion - utils.Logger().Error(). - Str("number", chain[i].Number().String()). - Str("hash", chain[i].Hash().Hex()). - Str("parent", chain[i].ParentHash().Hex()). - Str("prevnumber", chain[i-1].Number().String()). - Str("prevhash", chain[i-1].Hash().Hex()). - Msg("insertChain: non contiguous block insert") - - return 0, nil, nil, fmt.Errorf("non contiguous insert: item %d is #%d [%x…], item %d is #%d [%x…] (parent [%x…])", i-1, chain[i-1].NumberU64(), - chain[i-1].Hash().Bytes()[:4], i, chain[i].NumberU64(), chain[i].Hash().Bytes()[:4], chain[i].ParentHash().Bytes()[:4]) - } - } - - bc.chainmu.Lock() - defer bc.chainmu.Unlock() - - var verifyHeadersResults <-chan error - - // If the block header chain has not been verified, conduct header verification here. - if verifyHeaders { - headers := make([]*block.Header, len(chain)) - seals := make([]bool, len(chain)) - - for i, block := range chain { - headers[i] = block.Header() - seals[i] = true - } - // Note that VerifyHeaders verifies headers in the chain in parallel - abort, results := bc.Engine().VerifyHeaders(bc, headers, seals) - verifyHeadersResults = results - defer close(abort) - } - - // Start a parallel signature recovery (signer will fluke on fork transition, minimal perf loss) - //senderCacher.recoverFromBlocks(types.MakeSigner(bc.chainConfig, chain[0].Number()), chain) - - // Iterate over the blocks and insert when the verifier permits - for i, block := range chain { - // If the chain is terminating, stop processing blocks - if atomic.LoadInt32(&bc.procInterrupt) == 1 { - utils.Logger().Debug().Msg("Premature abort during blocks processing") - break - } - - var err error - if verifyHeaders { - err = <-verifyHeadersResults - } - if err == nil { - err = bc.Validator().ValidateBody(block) - } - switch { - case err == ErrKnownBlock: - // Block and state both already known. However if the current block is below - // this number we did a rollback and we should reimport it nonetheless. - if bc.CurrentBlock().NumberU64() >= block.NumberU64() { - continue - } - - case err == consensus_engine.ErrFutureBlock: - // Allow up to MaxFuture second in the future blocks. If this limit is exceeded - // the chain is discarded and processed at a later time if given. - max := big.NewInt(time.Now().Unix() + maxTimeFutureBlocks) - if block.Time().Cmp(max) > 0 { - return i, nil, nil, fmt.Errorf("future block: %v > %v", block.Time(), max) - } - bc.futureBlocks.Add(block.Hash(), block) - continue - - case err == consensus_engine.ErrUnknownAncestor && bc.futureBlocks.Contains(block.ParentHash()): - bc.futureBlocks.Add(block.Hash(), block) - continue - - case err == consensus_engine.ErrPrunedAncestor: - var winner []*types.Block - parent := bc.GetBlock(block.ParentHash(), block.NumberU64()-1) - for parent != nil && !bc.HasState(parent.Root()) { - winner = append(winner, parent) - parent = bc.GetBlock(parent.ParentHash(), parent.NumberU64()-1) - } - for j := 0; j < len(winner)/2; j++ { - winner[j], winner[len(winner)-1-j] = winner[len(winner)-1-j], winner[j] - } - // Prune in case non-empty winner chain - if len(winner) > 0 { - // Import all the pruned blocks to make the state available - bc.chainmu.Unlock() - _, _, _, err := bc.insertChainWithoutBlockExecution(winner, true /* verifyHeaders */) - bc.chainmu.Lock() - if err != nil { - return i, nil, nil, err - } - } - - case err != nil: - bc.reportBlock(block, nil, err) - return i, nil, nil, err - } - - // Create a new statedb using the parent block and report an - // error if it fails. - if err = bc.WriteBlockWithoutState(block); err != nil { - return i, nil, nil, err - } - } - - return 0, nil, nil, nil -} - // insertStats tracks and reports on block insertion. type insertStats struct { queued, processed, ignored int From 191c55b403eaf64d91f3eda049da6a92202c1c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:08:50 +0800 Subject: [PATCH 55/56] fix full state sync requests cap, add error handling to stage state sync, goimports --- .../stagedstreamsync/stage_statesync_full.go | 206 +++++++++++------- .../stagedstreamsync/state_sync_full.go | 137 ++++++------ p2p/stream/protocols/sync/chain.go | 2 +- 3 files changed, 197 insertions(+), 148 deletions(-) diff --git a/api/service/stagedstreamsync/stage_statesync_full.go b/api/service/stagedstreamsync/stage_statesync_full.go index 3e190bdc9..5a8bcd11d 100644 --- a/api/service/stagedstreamsync/stage_statesync_full.go +++ b/api/service/stagedstreamsync/stage_statesync_full.go @@ -206,84 +206,92 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full cap := maxRequestSize retAccounts, proof, stid, err := sss.configs.protocol.GetAccountRange(ctx, root, origin, limit, uint64(cap)) if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "GetAccountRange failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetAccountRange failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } else if retAccounts == nil || len(retAccounts) == 0 { + s.state.Debug("runStateWorkerLoop/GetAccountRange/data", "nil array") + utils.Logger().Warn(). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetAccountRange failed, received empty accounts")) + err := errors.New("GetAccountRange received empty slots") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } if err := sdm.HandleAccountRequestResult(task, retAccounts, proof, origin[:], limit[:], loopID, stid); err != nil { + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetAccountRange handle result failed")) + err = errors.Wrap(err, "handle result error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } - } else if len(codes)+len(storages.accounts) > 0 { + } else if len(codes) > 0 { - if len(codes) > 0 { - stid, err := sss.downloadByteCodes(ctx, sdm, codes, loopID) - if err != nil { - if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { - sss.configs.protocol.StreamFailed(stid, "downloadByteCodes failed") - } - utils.Logger().Error(). - Err(err). - Str("stream", string(stid)). - Msg(WrapStagedSyncMsg("downloadByteCodes failed")) - err = errors.Wrap(err, "request error") - sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) - return + stid, err := sss.downloadByteCodes(ctx, sdm, codes, loopID) + if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "downloadByteCodes failed") } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("downloadByteCodes failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return } - if len(storages.accounts) > 0 { - root := sdm.root - roots := storages.roots - accounts := storages.accounts - cap := maxRequestSize - origin := storages.origin - limit := storages.limit - mainTask := storages.mainTask - subTask := storages.subtask - - slots, proof, stid, err := sss.configs.protocol.GetStorageRanges(ctx, root, accounts, origin, limit, uint64(cap)) - if err != nil { - return - } - if err := sdm.HandleStorageRequestResult(mainTask, subTask, accounts, roots, origin, limit, slots, proof, loopID, stid); err != nil { - return + } else if len(storages.accounts) > 0 { + + root := sdm.root + roots := storages.roots + accounts := storages.accounts + cap := maxRequestSize + origin := storages.origin + limit := storages.limit + mainTask := storages.mainTask + subTask := storages.subtask + + slots, proof, stid, err := sss.configs.protocol.GetStorageRanges(ctx, root, accounts, origin, limit, uint64(cap)) + if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "GetStorageRanges failed") } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetStorageRanges failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } else if slots == nil || len(slots) == 0 { + s.state.Debug("runStateWorkerLoop/GetStorageRanges/data", "nil array") + utils.Logger().Warn(). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetStorageRanges failed, received empty slots")) + err := errors.New("GetStorageRanges received empty slots") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } + if err := sdm.HandleStorageRequestResult(mainTask, subTask, accounts, roots, origin, limit, slots, proof, loopID, stid); err != nil { + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetStorageRanges handle result failed")) + err = errors.Wrap(err, "handle result error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return } - - // data, stid, err := sss.downloadStates(ctx, accounts, codes, storages) - // if err != nil { - // s.state.Debug("runStateWorkerLoop/downloadStates/error", err) - // if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { - // sss.configs.protocol.StreamFailed(stid, "downloadStates failed") - // } - // utils.Logger().Error(). - // Err(err). - // Str("stream", string(stid)). - // Msg(WrapStagedSyncMsg("downloadStates failed")) - // err = errors.Wrap(err, "request error") - // sdm.HandleRequestError(codes, paths, stid, err) - // } else if data == nil || len(data) == 0 { - // s.state.Debug("runStateWorkerLoop/downloadStates/data", "nil array") - // utils.Logger().Warn(). - // Str("stream", string(stid)). - // Msg(WrapStagedSyncMsg("downloadStates failed, received empty data bytes")) - // err := errors.New("downloadStates received empty data bytes") - // sdm.HandleRequestError(codes, paths, stid, err) - // } else { - // s.state.Debug("runStateWorkerLoop/downloadStates/data/len", len(data)) - // sdm.HandleRequestResult(nodes, paths, data, loopID, stid) - // if sss.configs.logProgress { - // //calculating block download speed - // dt := time.Now().Sub(startTime).Seconds() - // speed := float64(0) - // if dt > 0 { - // speed = float64(len(data)) / dt - // } - // stateDownloadSpeed := fmt.Sprintf("%.2f", speed) - - // fmt.Print("\033[u\033[K") // restore the cursor position and clear the line - // fmt.Println("state download speed:", stateDownloadSpeed, "states/s") - // } - // } } else { // assign trie node Heal Tasks @@ -296,9 +304,32 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full nodes, stid, err := sss.configs.protocol.GetTrieNodes(ctx, root, pathsets, maxRequestSize) if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "GetTrieNodes failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetTrieNodes failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } else if nodes == nil || len(nodes) == 0 { + s.state.Debug("runStateWorkerLoop/GetTrieNodes/data", "nil array") + utils.Logger().Warn(). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetTrieNodes failed, received empty nodes")) + err := errors.New("GetTrieNodes received empty nodes") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } if err := sdm.HandleTrieNodeHealRequestResult(task, paths, hashes, nodes, loopID, stid); err != nil { + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetTrieNodes handle result failed")) + err = errors.Wrap(err, "handle result error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } } @@ -306,11 +337,34 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full if len(codetask.hashes) > 0 { task := codetask.task hashes := codetask.hashes - codes, stid, err := sss.configs.protocol.GetByteCodes(ctx, hashes, maxRequestSize) + retCodes, stid, err := sss.configs.protocol.GetByteCodes(ctx, hashes, maxRequestSize) if err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + sss.configs.protocol.StreamFailed(stid, "GetByteCodes failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetByteCodes failed")) + err = errors.Wrap(err, "request error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) + return + } else if retCodes == nil || len(retCodes) == 0 { + s.state.Debug("runStateWorkerLoop/GetByteCodes/data", "nil array") + utils.Logger().Warn(). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetByteCodes failed, received empty codes")) + err := errors.New("GetByteCodes received empty codes") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } - if err := sdm.HandleBytecodeRequestResult(task, hashes, codes, loopID, stid); err != nil { + if err := sdm.HandleBytecodeRequestResult(task, hashes, retCodes, loopID, stid); err != nil { + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("GetByteCodes handle result failed")) + err = errors.Wrap(err, "handle result error") + sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } } @@ -326,20 +380,8 @@ func (sss *StageFullStateSync) downloadByteCodes(ctx context.Context, sdm *FullS if err != nil { return stid, err } - if err = sdm.HandleBytecodeRequestResult(codeTask.task, codeTask.hashes, retCodes, loopID, stid); err != nil { - return stid, err - } - } - return -} - -func (sss *StageFullStateSync) downloadStorages(ctx context.Context, sdm *FullStateDownloadManager, codeTasks []*byteCodeTasksBundle, loopID int) (stid sttypes.StreamID, err error) { - for _, codeTask := range codeTasks { - // try to get byte codes from remote peer - // if any of them failed, the stid will be the id of failed stream - retCodes, stid, err := sss.configs.protocol.GetByteCodes(ctx, codeTask.hashes, maxRequestSize) - if err != nil { - return stid, err + if len(retCodes) == 0 { + return stid, errors.New("empty codes array") } if err = sdm.HandleBytecodeRequestResult(codeTask.task, codeTask.hashes, retCodes, loopID, stid); err != nil { return stid, err diff --git a/api/service/stagedstreamsync/state_sync_full.go b/api/service/stagedstreamsync/state_sync_full.go index daf0f4869..c98dcbafd 100644 --- a/api/service/stagedstreamsync/state_sync_full.go +++ b/api/service/stagedstreamsync/state_sync_full.go @@ -571,13 +571,10 @@ func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, s.lock.Lock() defer s.lock.Unlock() - cap := StatesPerRequest - - accounts, codes, storages, healtask, codetask = s.getBatchFromRetries(cap) + accounts, codes, storages, healtask, codetask = s.getBatchFromRetries() nItems := len(accounts) + len(codes) + len(storages.roots) + len(healtask.hashes) + len(codetask.hashes) - cap -= nItems - if cap == 0 { + if nItems > 0 { return } @@ -593,7 +590,7 @@ func (s *FullStateDownloadManager) GetNextBatch() (accounts []*accountTask, if healtask != nil || codetask != nil { withHealTasks = false } - newAccounts, newCodes, newStorageTaskBundle, newHealTask, newCodeTask := s.getBatchFromUnprocessed(cap, withHealTasks) + newAccounts, newCodes, newStorageTaskBundle, newHealTask, newCodeTask := s.getBatchFromUnprocessed(withHealTasks) accounts = append(accounts, newAccounts...) codes = append(codes, newCodes...) storages = newStorageTaskBundle @@ -924,7 +921,7 @@ func (s *FullStateDownloadManager) updateStats(written, duplicate, unexpected in // getBatchFromUnprocessed returns objects with a maximum of n unprocessed state download // tasks to send to the remote peer. -func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks bool) ( +func (s *FullStateDownloadManager) getBatchFromUnprocessed(withHealTasks bool) ( accounts []*accountTask, codes []*byteCodeTasksBundle, storages *storageTaskBundle, @@ -932,31 +929,43 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks codetask *healTask) { // over trie nodes as those can be written to disk and forgotten about. - codes = make([]*byteCodeTasksBundle, 0, n) - accounts = make([]*accountTask, 0, n) + codes = make([]*byteCodeTasksBundle, 0) + accounts = make([]*accountTask, 0) for i, task := range s.tasks.accountTasks { // Stop when we've gathered enough requests - if len(accounts) == n { - return - } + // if len(accounts) == n { + // return + // } + // if already requested if task.requested { continue } - if task.id == 0 { - continue + + // create a unique id for healer task + var taskID uint64 + for { + taskID = uint64(rand.Int63()) + if taskID == 0 { + continue + } + if _, ok := s.tasks.accountTasks[taskID]; ok { + continue + } + break } + s.tasks.accountTasks[i].requested = true accounts = append(accounts, task) s.requesting.addAccountTask(task.id, task) - // s.tasks.deleteAccountTask(task) + s.tasks.addAccountTask(task.id, task) // one task account is enough for an stream return } - cap := n // - len(accounts) + totalHashes := int(0) for _, task := range s.tasks.accountTasks { // Skip tasks that are already retrieving (or done with) all codes @@ -969,6 +978,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks delete(task.codeTasks, hash) hashes = append(hashes, hash) } + totalHashes += len(hashes) // create a unique id for task bundle var taskID uint64 @@ -991,15 +1001,18 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks codes = append(codes, bytecodeTask) s.requesting.addCodeTask(taskID, bytecodeTask) - //s.tasks.deleteCodeTask(taskID) + s.tasks.addCodeTask(taskID, bytecodeTask) // Stop when we've gathered enough requests - if len(codes) >= cap { + if totalHashes >= maxCodeRequestCount { return } } - cap = n - len(codes) // - len(accounts) + // if we found some codes, can assign it to node + if totalHashes > 0 { + return + } for accTaskID, task := range s.tasks.accountTasks { // Skip tasks that are already retrieving (or done with) all small states @@ -1008,13 +1021,13 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks } // TODO: check cap calculations (shouldn't give us big chunk) - if cap > maxRequestSize { - cap = maxRequestSize - } - if cap < minRequestSize { // Don't bother with peers below a bare minimum performance - cap = minRequestSize - } - storageSets := cap / 1024 + // if cap > maxRequestSize { + // cap = maxRequestSize + // } + // if cap < minRequestSize { // Don't bother with peers below a bare minimum performance + // cap = minRequestSize + // } + storageSets := maxRequestSize / 1024 storages = &storageTaskBundle{ accounts: make([]common.Hash, 0, storageSets), @@ -1079,14 +1092,10 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks s.tasks.addStorageTaskBundle(taskID, storages) s.requesting.addStorageTaskBundle(taskID, storages) - cap -= len(storages.accounts) - - if cap <= 0 { - break - } + return } - if len(accounts)+len(codes)+len(storages.accounts) > 0 { + if len(storages.accounts) > 0 { return } @@ -1095,7 +1104,6 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks } // Sync phase done, run heal phase - cap = n // Iterate over pending tasks and try to find a peer to retrieve with for (len(s.tasks.healer) > 0 && len(s.tasks.healer[0].hashes) > 0) || s.scheduler.Pending() > 0 { @@ -1111,7 +1119,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks } } - mPaths, mHashes, mCodes := s.scheduler.Missing(n) + mPaths, mHashes, mCodes := s.scheduler.Missing(maxTrieRequestCount) for i, path := range mPaths { s.tasks.healer[0].trieTasks[path] = mHashes[i] } @@ -1124,10 +1132,10 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks return } // Generate the network query and send it to the peer - if cap > maxTrieRequestCount { - cap = maxTrieRequestCount - } - cap = int(float64(cap) / s.trienodeHealThrottle) + // if cap > maxTrieRequestCount { + // cap = maxTrieRequestCount + // } + cap := int(float64(maxTrieRequestCount) / s.trienodeHealThrottle) if cap <= 0 { cap = 1 } @@ -1175,7 +1183,9 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks s.tasks.healer[taskID] = healtask s.requesting.addHealerTask(taskID, healtask) - cap = n - len(hashes) + if len(hashes) > 0 { + return + } } // trying to get bytecodes @@ -1185,7 +1195,7 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks // queue from the state sync scheduler. The trie synced schedules these // together with trie nodes, so we need to queue them combined. - mPaths, mHashes, mCodes := s.scheduler.Missing(cap) + mPaths, mHashes, mCodes := s.scheduler.Missing(maxTrieRequestCount) for i, path := range mPaths { s.tasks.healer[0].trieTasks[path] = mHashes[i] } @@ -1202,9 +1212,10 @@ func (s *FullStateDownloadManager) getBatchFromUnprocessed(n int, withHealTasks // Abort the entire assignment mechanism. // Generate the network query and send it to the peer - if cap > maxCodeRequestCount { - cap = maxCodeRequestCount - } + // if cap > maxCodeRequestCount { + // cap = maxCodeRequestCount + // } + cap := maxCodeRequestCount hashes := make([]common.Hash, 0, cap) for hash := range s.tasks.healer[0].codeTasks { delete(s.tasks.healer[0].codeTasks, hash) @@ -1256,7 +1267,7 @@ func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common } // getBatchFromRetries get the block number batch to be requested from retries. -func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( +func (s *FullStateDownloadManager) getBatchFromRetries() ( accounts []*accountTask, codes []*byteCodeTasksBundle, storages *storageTaskBundle, @@ -1269,27 +1280,29 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( for _, task := range s.retries.accountTasks { // Stop when we've gathered enough requests - if len(accounts) == n { - return - } + // if len(accounts) == n { + // return + // } accounts = append(accounts, task) s.requesting.addAccountTask(task.id, task) s.retries.deleteAccountTask(task.id) + return } - cap := n - len(accounts) + if len(accounts) > 0 { + return + } for _, code := range s.retries.codeTasks { - // Stop when we've gathered enough requests - if len(codes) >= cap { - return - } codes = append(codes, code) s.requesting.addCodeTask(code.id, code) s.retries.deleteCodeTask(code.id) + return } - cap = n - len(accounts) - len(codes) + if len(codes) > 0 { + return + } if s.retries.storageTasks != nil && len(s.retries.storageTasks) > 0 { storages = &storageTaskBundle{ @@ -1303,20 +1316,17 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( } s.requesting.addStorageTaskBundle(storages.id, storages) s.retries.deleteStorageTaskBundle(storages.id) + return } - if len(accounts)+len(codes)+len(storages.accounts) > 0 { + if len(storages.accounts) > 0 { return } - cap = n - if s.retries.healer != nil && len(s.retries.healer) > 0 { - foundHealTask := false - foundByteCodeTask := false for id, task := range s.retries.healer { - if !foundHealTask && !task.byteCodeReq { + if !task.byteCodeReq { healtask = &healTask{ id: id, hashes: task.hashes, @@ -1328,9 +1338,9 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( } s.requesting.addHealerTask(id, task) s.retries.deleteHealerTask(id) - foundHealTask = true + return } - if !foundByteCodeTask && task.byteCodeReq { + if task.byteCodeReq { codetask = &healTask{ id: id, hashes: task.hashes, @@ -1342,10 +1352,7 @@ func (s *FullStateDownloadManager) getBatchFromRetries(n int) ( } s.requesting.addHealerTask(id, task) s.retries.deleteHealerTask(id) - foundByteCodeTask = true - } - if foundHealTask && foundByteCodeTask { - break + return } } } diff --git a/p2p/stream/protocols/sync/chain.go b/p2p/stream/protocols/sync/chain.go index 3c147c91a..451952bcc 100644 --- a/p2p/stream/protocols/sync/chain.go +++ b/p2p/stream/protocols/sync/chain.go @@ -199,7 +199,7 @@ func (ch *chainHelperImpl) getReceipts(hs []common.Hash) ([]types.Receipts, erro return receipts, nil } -// getAccountRange +// getAccountRange func (ch *chainHelperImpl) getAccountRange(root common.Hash, origin common.Hash, limit common.Hash, bytes uint64) ([]*message.AccountData, [][]byte, error) { if bytes > softResponseLimit { bytes = softResponseLimit From 419aad1fb7d4d7e503dad91940507fce6834bee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CGheisMohammadi=E2=80=9D?= <36589218+GheisMohammadi@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:15:11 +0800 Subject: [PATCH 56/56] remove state debug logs --- .../stagedstreamsync/stage_statesync.go | 1 - .../stagedstreamsync/stage_statesync_full.go | 22 ------------------- 2 files changed, 23 deletions(-) diff --git a/api/service/stagedstreamsync/stage_statesync.go b/api/service/stagedstreamsync/stage_statesync.go index 4928b71b0..c4e66e10e 100644 --- a/api/service/stagedstreamsync/stage_statesync.go +++ b/api/service/stagedstreamsync/stage_statesync.go @@ -120,7 +120,6 @@ func (sss *StageStateSync) Exec(ctx context.Context, bool, invalidBlockRevert bo sss.configs.logger.Warn().Err(err). Uint64("pivot block number", s.state.status.pivotBlock.NumberU64()). Msg(WrapStagedSyncMsg("insert pivot block failed")) - s.state.Debug("StateSync/pivot/insert/error", err) // TODO: panic("pivot block is failed to insert in chain.") return err } diff --git a/api/service/stagedstreamsync/stage_statesync_full.go b/api/service/stagedstreamsync/stage_statesync_full.go index 5a8bcd11d..d304ca1c3 100644 --- a/api/service/stagedstreamsync/stage_statesync_full.go +++ b/api/service/stagedstreamsync/stage_statesync_full.go @@ -67,7 +67,6 @@ func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRever return nil } - s.state.Debug("STATE SYNC ======================================================>", "started") // maxHeight := s.state.status.targetBN // currentHead := s.state.CurrentBlockNumber() // if currentHead >= maxHeight { @@ -106,15 +105,10 @@ func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRever } // Fetch states from neighbors - pivotRootHash := s.state.status.pivotBlock.Root() currentBlockRootHash := s.state.bc.CurrentFastBlock().Root() scheme := sss.configs.bc.TrieDB().Scheme() sdm := newFullStateDownloadManager(sss.configs.bc.ChainDb(), scheme, tx, sss.configs.bc, sss.configs.concurrency, s.state.logger) sdm.setRootHash(currentBlockRootHash) - s.state.Debug("StateSync/setRootHash", pivotRootHash) - s.state.Debug("StateSync/currentFastBlockRoot", currentBlockRootHash) - s.state.Debug("StateSync/pivotBlockNumber", s.state.status.pivotBlock.NumberU64()) - s.state.Debug("StateSync/currentFastBlockNumber", s.state.bc.CurrentFastBlock().NumberU64()) var wg sync.WaitGroup for i := 0; i < s.state.config.Concurrency; i++ { wg.Add(1) @@ -127,7 +121,6 @@ func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRever sss.configs.logger.Warn().Err(err). Uint64("pivot block number", s.state.status.pivotBlock.NumberU64()). Msg(WrapStagedSyncMsg("insert pivot block failed")) - s.state.Debug("StateSync/pivot/insert/error", err) // TODO: panic("pivot block is failed to insert in chain.") return err } @@ -135,9 +128,6 @@ func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRever // states should be fully synced in this stage s.state.status.statesSynced = true - s.state.Debug("StateSync/pivot/num", s.state.status.pivotBlock.NumberU64()) - s.state.Debug("StateSync/pivot/insert", "done") - /* gbm := s.state.gbm @@ -171,21 +161,15 @@ func (sss *StageFullStateSync) Exec(ctx context.Context, bool, invalidBlockRever // runStateWorkerLoop creates a work loop for download states func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *FullStateDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time, s *StageState) { - s.state.Debug("runStateWorkerLoop/info", "started") - defer wg.Done() for { select { case <-ctx.Done(): - s.state.Debug("runStateWorkerLoop/ctx/done", "Finished") return default: } accountTasks, codes, storages, healtask, codetask, err := sdm.GetNextBatch() - s.state.Debug("runStateWorkerLoop/batch/len", len(accountTasks)+len(codes)+len(storages.accounts)) - s.state.Debug("runStateWorkerLoop/batch/heals/len", len(healtask.hashes)+len(codetask.hashes)) - s.state.Debug("runStateWorkerLoop/batch/err", err) if len(accountTasks)+len(codes)+len(storages.accounts)+len(healtask.hashes)+len(codetask.hashes) == 0 || err != nil { select { case <-ctx.Done(): @@ -194,8 +178,6 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full return } } - s.state.Debug("runStateWorkerLoop/batch/accounts", accountTasks) - s.state.Debug("runStateWorkerLoop/batch/codes", codes) if len(accountTasks) > 0 { @@ -217,7 +199,6 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } else if retAccounts == nil || len(retAccounts) == 0 { - s.state.Debug("runStateWorkerLoop/GetAccountRange/data", "nil array") utils.Logger().Warn(). Str("stream", string(stid)). Msg(WrapStagedSyncMsg("GetAccountRange failed, received empty accounts")) @@ -275,7 +256,6 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } else if slots == nil || len(slots) == 0 { - s.state.Debug("runStateWorkerLoop/GetStorageRanges/data", "nil array") utils.Logger().Warn(). Str("stream", string(stid)). Msg(WrapStagedSyncMsg("GetStorageRanges failed, received empty slots")) @@ -315,7 +295,6 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } else if nodes == nil || len(nodes) == 0 { - s.state.Debug("runStateWorkerLoop/GetTrieNodes/data", "nil array") utils.Logger().Warn(). Str("stream", string(stid)). Msg(WrapStagedSyncMsg("GetTrieNodes failed, received empty nodes")) @@ -350,7 +329,6 @@ func (sss *StageFullStateSync) runStateWorkerLoop(ctx context.Context, sdm *Full sdm.HandleRequestError(accountTasks, codes, storages, healtask, codetask, stid, err) return } else if retCodes == nil || len(retCodes) == 0 { - s.state.Debug("runStateWorkerLoop/GetByteCodes/data", "nil array") utils.Logger().Warn(). Str("stream", string(stid)). Msg(WrapStagedSyncMsg("GetByteCodes failed, received empty codes"))