initial version of offline state pruning

feature/state_pruning_codes
“GheisMohammadi” 2 years ago
parent 86c4a21a6e
commit 02f8f49634
No known key found for this signature in database
GPG Key ID: 15073AED3829FE90
  1. 7
      cmd/harmony/main.go
  2. 168
      cmd/harmony/snapshotdb.go
  3. 131
      core/state/pruner/bloom.go
  4. 516
      core/state/pruner/pruner.go

@ -106,7 +106,9 @@ func init() {
rootCmd.AddCommand(dumpConfigLegacyCmd)
rootCmd.AddCommand(dumpDBCmd)
rootCmd.AddCommand(inspectDBCmd)
rootCmd.AddCommand(snapshotCmd)
snapshotCmd.AddCommand(pruneStateCmd)
snapshotCmd.AddCommand(verifyStateCmd)
if err := registerRootCmdFlags(); err != nil {
os.Exit(2)
}
@ -119,6 +121,9 @@ func init() {
if err := registerInspectionFlags(); err != nil {
os.Exit(2)
}
if err := registerSnapshotCmdFlags(); err != nil {
os.Exit(2)
}
}
func main() {

@ -0,0 +1,168 @@
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/ethereum/go-ethereum/trie"
"github.com/spf13/cobra"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/rawdb"
"github.com/harmony-one/harmony/core/state/pruner"
"github.com/harmony-one/harmony/core/state/snapshot"
"github.com/harmony-one/harmony/internal/cli"
"github.com/harmony-one/harmony/internal/utils"
)
var bloomFilterSizeFlag = cli.IntFlag{
Name: "bloomfilter.size",
Shorthand: "b",
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
DefValue: 2048,
}
var stateRootFlag = cli.StringFlag{
Name: "stateroot",
Shorthand: "r",
Usage: "state root hash",
DefValue: "",
}
var snapshotCmd = &cobra.Command{
Use: "snapshot",
Short: "A set of commands based on the snapshot",
Long: "A set of commands based on the snapshot",
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("Error: must also specify a subcommand (prune-state, verify, ...)")
},
}
var pruneStateCmd = &cobra.Command{
Use: "prune-state srcdb cachedir",
Short: "prune stale harmony state data based on snapshot",
Long: "will prune historical state data with the help of state snapshot. All trie nodes that do not belong to the specified version state will be deleted from the database",
Example: "harmony prune-state /srcDir/harmony_db_0 /prune_cache",
Args: cobra.RangeArgs(1, 2),
Run: func(cmd *cobra.Command, args []string) {
srcDBDir, cachedir := args[0], args[1]
bloomFilterSize := cli.GetIntFlagValue(cmd, bloomFilterSizeFlag)
stateRoot := cli.GetStringFlagValue(cmd, stateRootFlag)
chaindb, err := rawdb.NewLevelDBDatabase(srcDBDir, LEVELDB_CACHE_SIZE, LEVELDB_HANDLES, "", false)
if err != nil {
fmt.Println("open src db error:", err)
os.Exit(-1)
}
defer chaindb.Close()
prunerconfig := pruner.Config{
Datadir: ResolvePath(""),
Cachedir: ResolvePath(cachedir),
BloomSize: uint64(bloomFilterSize),
}
pruner, err := pruner.NewPruner(chaindb, prunerconfig)
if err != nil {
utils.Logger().Error().Err(err).Msg("Failed to open snapshot tree")
return
}
var targetRoot common.Hash
if len(stateRoot) >= 3 {
targetRoot, err = parseRoot(stateRoot)
if err != nil {
utils.Logger().Error().Err(err).Msg("Failed to resolve state root")
return
}
} else {
targetRoot = rawdb.ReadHeadBlockHash(chaindb)
}
if err = pruner.Prune(targetRoot); err != nil {
utils.Logger().Error().Err(err).Msg("Failed to prune state")
return
}
return
},
}
var verifyStateCmd = &cobra.Command{
Use: "verify-state srcdb",
Short: "Recalculate state hash based on snapshot for verification",
Long: "Recalculate state hash based on snapshot for verification",
Example: "harmony verify-state /srcDir/harmony_db_0",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
if len(args) > 1 {
fmt.Println("too many arguments")
return
}
srcDBDir := args[0]
chaindb, err := rawdb.NewLevelDBDatabase(srcDBDir, LEVELDB_CACHE_SIZE, LEVELDB_HANDLES, "", false)
if err != nil {
fmt.Println("open src db error:", err)
os.Exit(-1)
}
defer chaindb.Close()
headRoot := rawdb.ReadHeadBlockHash(chaindb)
stateRoot := cli.GetStringFlagValue(cmd, stateRootFlag)
var targetRoot common.Hash
if len(stateRoot) >= 3 {
var err error
if targetRoot, err = parseRoot(stateRoot); err != nil {
utils.Logger().Error().Err(err).Msg("Failed to resolve state root")
return
}
} else {
targetRoot = headRoot
}
snapconfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapconfig, chaindb, trie.NewDatabase(chaindb), headRoot)
if err != nil {
utils.Logger().Error().Err(err).Msg("Failed to open snapshot tree")
return
}
if err := snaptree.Verify(targetRoot); err != nil {
utils.Logger().Error().Err(err).Interface("root", targetRoot).Msg("Failed to verify state")
return
}
utils.Logger().Info().Interface("root", targetRoot).Msg("Verified the state")
if err := snapshot.CheckDanglingStorage(chaindb); err != nil {
utils.Logger().Error().Err(err).Interface("root", targetRoot).Msg("Failed to check dangling storage")
}
return
},
}
func ResolvePath(filename string) string {
if filepath.IsAbs(filename) {
return filename
}
return filepath.Join(filepath.Dir("."), filename)
}
func parseRoot(input string) (common.Hash, error) {
var h common.Hash
if err := h.UnmarshalText([]byte(input)); err != nil {
return h, err
}
return h, nil
}
func registerSnapshotCmdFlags() error {
if err := cli.RegisterFlags(pruneStateCmd, []cli.Flag{bloomFilterSizeFlag, stateRootFlag}); err != nil {
return err
}
if err := cli.RegisterFlags(verifyStateCmd, []cli.Flag{stateRootFlag}); err != nil {
return err
}
return nil
}

@ -0,0 +1,131 @@
// Copyright 2021 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pruner
import (
"encoding/binary"
"errors"
"os"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/rawdb"
"github.com/ethereum/go-ethereum/log"
bloomfilter "github.com/holiman/bloomfilter/v2"
)
// stateBloomHasher is a wrapper around a byte blob to satisfy the interface API
// requirements of the bloom library used. It's used to convert a trie hash or
// contract code hash into a 64 bit mini hash.
type stateBloomHasher []byte
func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f stateBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f stateBloomHasher) Reset() { panic("not implemented") }
func (f stateBloomHasher) BlockSize() int { panic("not implemented") }
func (f stateBloomHasher) Size() int { return 8 }
func (f stateBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
// stateBloom is a bloom filter used during the state conversion(snapshot->state).
// The keys of all generated entries will be recorded here so that in the pruning
// stage the entries belong to the specific version can be avoided for deletion.
//
// The false-positive is allowed here. The "false-positive" entries means they
// actually don't belong to the specific version but they are not deleted in the
// pruning. The downside of the false-positive allowance is we may leave some "dangling"
// nodes in the disk. But in practice the it's very unlike the dangling node is
// state root. So in theory this pruned state shouldn't be visited anymore. Another
// potential issue is for fast sync. If we do another fast sync upon the pruned
// database, it's problematic which will stop the expansion during the syncing.
//
// After the entire state is generated, the bloom filter should be persisted into
// the disk. It indicates the whole generation procedure is finished.
type stateBloom struct {
bloom *bloomfilter.Filter
}
// newStateBloomWithSize creates a brand new state bloom for state generation.
// The bloom filter will be created by the passing bloom filter size. According
// to the https://hur.st/bloomfilter/?n=600000000&p=&m=2048MB&k=4, the parameters
// are picked so that the false-positive rate for mainnet is low enough.
func newStateBloomWithSize(size uint64) (*stateBloom, error) {
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
if err != nil {
return nil, err
}
log.Info("Initialized state bloom", "size", common.StorageSize(float64(bloom.M()/8)))
return &stateBloom{bloom: bloom}, nil
}
// NewStateBloomFromDisk loads the state bloom from the given file.
// In this case the assumption is held the bloom filter is complete.
func NewStateBloomFromDisk(filename string) (*stateBloom, error) {
bloom, _, err := bloomfilter.ReadFile(filename)
if err != nil {
return nil, err
}
return &stateBloom{bloom: bloom}, nil
}
// Commit flushes the bloom filter content into the disk and marks the bloom
// as complete.
func (bloom *stateBloom) Commit(filename, tempname string) error {
// Write the bloom out into a temporary file
_, err := bloom.bloom.WriteFile(tempname)
if err != nil {
return err
}
// Ensure the file is synced to disk
f, err := os.OpenFile(tempname, os.O_RDWR, 0666)
if err != nil {
return err
}
if err := f.Sync(); err != nil {
f.Close()
return err
}
f.Close()
// Move the temporary file into it's final location
return os.Rename(tempname, filename)
}
// Put implements the KeyValueWriter interface. But here only the key is needed.
func (bloom *stateBloom) Put(key []byte, value []byte) error {
// If the key length is not 32bytes, ensure it's contract code
// entry with new scheme.
if len(key) != common.HashLength {
isCode, codeKey := rawdb.IsCodeKey(key)
if !isCode {
return errors.New("invalid entry")
}
bloom.bloom.Add(stateBloomHasher(codeKey))
return nil
}
bloom.bloom.Add(stateBloomHasher(key))
return nil
}
// Delete removes the key from the key-value data store.
func (bloom *stateBloom) Delete(key []byte) error { panic("not supported") }
// Contain is the wrapper of the underlying contains function which
// reports whether the key is contained.
// - If it says yes, the key may be contained
// - If it says no, the key is definitely not contained.
func (bloom *stateBloom) Contain(key []byte) (bool, error) {
return bloom.bloom.Contains(stateBloomHasher(key)), nil
}

@ -0,0 +1,516 @@
// Copyright 2021 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package pruner
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"os"
"path/filepath"
"strings"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/harmony-one/harmony/core/rawdb"
"github.com/harmony-one/harmony/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/harmony-one/harmony/block"
)
const (
// stateBloomFilePrefix is the filename prefix of state bloom filter.
stateBloomFilePrefix = "statebloom"
// stateBloomFilePrefix is the filename suffix of state bloom filter.
stateBloomFileSuffix = "bf.gz"
// stateBloomFileTempSuffix is the filename suffix of state bloom filter
// while it is being written out to detect write aborts.
stateBloomFileTempSuffix = ".tmp"
// rangeCompactionThreshold is the minimal deleted entry number for
// triggering range compaction. It's a quite arbitrary number but just
// to avoid triggering range compaction because of small deletion.
rangeCompactionThreshold = 100000
)
// Config includes all the configurations for pruning.
type Config struct {
Datadir string // The directory of the state database
Cachedir string // The directory of state clean cache
BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
}
// Pruner is an offline tool to prune the stale state with the
// help of the snapshot. The workflow of pruner is very simple:
//
// - iterate the snapshot, reconstruct the relevant state
// - iterate the database, delete all other state entries which
// don't belong to the target state and the genesis state
//
// It can take several hours(around 2 hours for mainnet) to finish
// the whole pruning work. It's recommended to run this offline tool
// periodically in order to release the disk usage and improve the
// disk read performance to some extent.
type Pruner struct {
config Config
chainHeader *block.Header
db ethdb.Database
stateBloom *stateBloom
snaptree *snapshot.Tree
}
// NewPruner creates the pruner instance.
func NewPruner(db ethdb.Database, config Config) (*Pruner, error) {
headBlock := rawdb.ReadHeadBlock(db)
if headBlock == nil {
return nil, errors.New("failed to load head block")
}
snapconfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapconfig, db, trie.NewDatabase(db), headBlock.Root())
if err != nil {
return nil, err // The relevant snapshot(s) might not exist
}
// Sanitize the bloom filter size if it's too small.
if config.BloomSize < 256 {
log.Warn("Sanitizing bloomfilter size", "provided(MB)", config.BloomSize, "updated(MB)", 256)
config.BloomSize = 256
}
stateBloom, err := newStateBloomWithSize(config.BloomSize)
if err != nil {
return nil, err
}
return &Pruner{
config: config,
chainHeader: headBlock.Header(),
db: db,
stateBloom: stateBloom,
snaptree: snaptree,
}, nil
}
func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error {
// Delete all stale trie nodes in the disk. With the help of state bloom
// the trie nodes(and codes) belong to the active state will be filtered
// out. A very small part of stale tries will also be filtered because of
// the false-positive rate of bloom filter. But the assumption is held here
// that the false-positive is low enough(~0.05%). The probablity of the
// dangling node is the state root is super low. So the dangling nodes in
// theory will never ever be visited again.
var (
count int
size common.StorageSize
pstart = time.Now()
logged = time.Now()
batch = maindb.NewBatch()
iter = maindb.NewIterator(nil, nil)
)
for iter.Next() {
key := iter.Key()
// All state entries don't belong to specific state and genesis are deleted here
// - trie node
// - legacy contract code
// - new-scheme contract code
isCode, codeKey := rawdb.IsCodeKey(key)
if len(key) == common.HashLength || isCode {
checkKey := key
if isCode {
checkKey = codeKey
}
if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist {
log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey))
} else {
if ok, err := stateBloom.Contain(checkKey); err != nil {
return err
} else if ok {
continue
}
}
count += 1
size += common.StorageSize(len(key) + len(iter.Value()))
batch.Delete(key)
var eta time.Duration // Realistically will never remain uninited
if done := binary.BigEndian.Uint64(key[:8]); done > 0 {
var (
left = math.MaxUint64 - binary.BigEndian.Uint64(key[:8])
speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero
)
eta = time.Duration(left/speed) * time.Millisecond
}
if time.Since(logged) > 8*time.Second {
log.Info("Pruning state data", "nodes", count, "size", size,
"elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta))
logged = time.Now()
}
// Recreate the iterator after every batch commit in order
// to allow the underlying compactor to delete the entries.
if batch.ValueSize() >= ethdb.IdealBatchSize {
batch.Write()
batch.Reset()
iter.Release()
iter = maindb.NewIterator(nil, key)
}
}
}
if batch.ValueSize() > 0 {
batch.Write()
batch.Reset()
}
iter.Release()
log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart)))
// Pruning is done, now drop the "useless" layers from the snapshot.
// Firstly, flushing the target layer into the disk. After that all
// diff layers below the target will all be merged into the disk.
if err := snaptree.Cap(root, 0); err != nil {
return err
}
// Secondly, flushing the snapshot journal into the disk. All diff
// layers upon are dropped silently. Eventually the entire snapshot
// tree is converted into a single disk layer with the pruning target
// as the root.
if _, err := snaptree.Journal(root); err != nil {
return err
}
// Delete the state bloom, it marks the entire pruning procedure is
// finished. If any crashes or manual exit happens before this,
// `RecoverPruning` will pick it up in the next restarts to redo all
// the things.
os.RemoveAll(bloomPath)
// Start compactions, will remove the deleted data from the disk immediately.
// Note for small pruning, the compaction is skipped.
if count >= rangeCompactionThreshold {
cstart := time.Now()
for b := 0x00; b <= 0xf0; b += 0x10 {
var (
start = []byte{byte(b)}
end = []byte{byte(b + 0x10)}
)
if b == 0xf0 {
end = nil
}
log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart)))
if err := maindb.Compact(start, end); err != nil {
log.Error("Database compaction failed", "error", err)
return err
}
}
log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart)))
}
log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// Prune deletes all historical state nodes except the nodes belong to the
// specified state version. If user doesn't specify the state version, use
// the bottom-most snapshot diff layer as the target.
func (p *Pruner) Prune(root common.Hash) error {
// If the state bloom filter is already committed previously,
// reuse it for pruning instead of generating a new one. It's
// mandatory because a part of state may already be deleted,
// the recovery procedure is necessary.
_, stateBloomRoot, err := findBloomFilter(p.config.Datadir)
if err != nil {
return err
}
if stateBloomRoot != (common.Hash{}) {
return RecoverPruning(p.config.Datadir, p.db, p.config.Cachedir)
}
// If the target state root is not specified, use the HEAD-127 as the
// target. The reason for picking it is:
// - in most of the normal cases, the related state is available
// - the probability of this layer being reorg is very low
var layers []snapshot.Snapshot
if root == (common.Hash{}) {
// Retrieve all snapshot layers from the current HEAD.
// In theory there are 128 difflayers + 1 disk layer present,
// so 128 diff layers are expected to be returned.
layers = p.snaptree.Snapshots(p.chainHeader.Root(), 128, true)
if len(layers) != 128 {
// Reject if the accumulated diff layers are less than 128. It
// means in most of normal cases, there is no associated state
// with bottom-most diff layer.
return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers))
}
// Use the bottom-most diff layer as the target
root = layers[len(layers)-1].Root()
}
// Ensure the root is really present. The weak assumption
// is the presence of root can indicate the presence of the
// entire trie.
if !rawdb.HasLegacyTrieNode(p.db, root) {
// The special case is for clique based networks(rinkeby, goerli
// and some other private networks), it's possible that two
// consecutive blocks will have same root. In this case snapshot
// difflayer won't be created. So HEAD-127 may not paired with
// head-127 layer. Instead the paired layer is higher than the
// bottom-most diff layer. Try to find the bottom-most snapshot
// layer with state available.
//
// Note HEAD and HEAD-1 is ignored. Usually there is the associated
// state available, but we don't want to use the topmost state
// as the pruning target.
var found bool
for i := len(layers) - 2; i >= 2; i-- {
if rawdb.HasLegacyTrieNode(p.db, layers[i].Root()) {
root = layers[i].Root()
found = true
log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i)
break
}
}
if !found {
if len(layers) > 0 {
return errors.New("no snapshot paired state")
}
return fmt.Errorf("associated state[%x] is not present", root)
}
} else {
if len(layers) > 0 {
log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.chainHeader.Number().Uint64()-127)
} else {
log.Info("Selecting user-specified state as the pruning target", "root", root)
}
}
// Before start the pruning, delete the clean trie cache first.
// It's necessary otherwise in the next restart we will hit the
// deleted state root in the "clean cache" so that the incomplete
// state is picked for usage.
deleteCleanTrieCache(p.config.Cachedir)
// All the state roots of the middle layer should be forcibly pruned,
// otherwise the dangling state will be left.
middleRoots := make(map[common.Hash]struct{})
for _, layer := range layers {
if layer.Root() == root {
break
}
middleRoots[layer.Root()] = struct{}{}
}
// Traverse the target state, re-construct the whole state trie and
// commit to the given bloom filter.
start := time.Now()
if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil {
return err
}
// Traverse the genesis, put all genesis state entries into the
// bloom filter too.
if err := extractGenesis(p.db, p.stateBloom); err != nil {
return err
}
filterName := bloomFilterName(p.config.Datadir, root)
log.Info("Writing state bloom to disk", "name", filterName)
if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil {
return err
}
log.Info("State bloom filter committed", "name", filterName)
return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start)
}
// RecoverPruning will resume the pruning procedure during the system restart.
// This function is used in this case: user tries to prune state data, but the
// system was interrupted midway because of crash or manual-kill. In this case
// if the bloom filter for filtering active state is already constructed, the
// pruning can be resumed. What's more if the bloom filter is constructed, the
// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
// in the disk.
func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error {
stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir)
if err != nil {
return err
}
if stateBloomPath == "" {
return nil // nothing to recover
}
headBlock := rawdb.ReadHeadBlock(db)
if headBlock == nil {
return errors.New("failed to load head block")
}
// Initialize the snapshot tree in recovery mode to handle this special case:
// - Users run the `prune-state` command multiple times
// - Neither these `prune-state` running is finished(e.g. interrupted manually)
// - The state bloom filter is already generated, a part of state is deleted,
// so that resuming the pruning here is mandatory
// - The state HEAD is rewound already because of multiple incomplete `prune-state`
// In this case, even the state HEAD is not exactly matched with snapshot, it
// still feasible to recover the pruning correctly.
snapconfig := snapshot.Config{
CacheSize: 256,
Recovery: true,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapconfig, db, trie.NewDatabase(db), headBlock.Root())
if err != nil {
return err // The relevant snapshot(s) might not exist
}
stateBloom, err := NewStateBloomFromDisk(stateBloomPath)
if err != nil {
return err
}
log.Info("Loaded state bloom filter", "path", stateBloomPath)
// Before start the pruning, delete the clean trie cache first.
// It's necessary otherwise in the next restart we will hit the
// deleted state root in the "clean cache" so that the incomplete
// state is picked for usage.
deleteCleanTrieCache(trieCachePath)
// All the state roots of the middle layers should be forcibly pruned,
// otherwise the dangling state will be left.
var (
found bool
layers = snaptree.Snapshots(headBlock.Root(), 128, true)
middleRoots = make(map[common.Hash]struct{})
)
for _, layer := range layers {
if layer.Root() == stateBloomRoot {
found = true
break
}
middleRoots[layer.Root()] = struct{}{}
}
if !found {
log.Error("Pruning target state is not existent")
return errors.New("non-existent target state")
}
return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now())
}
// extractGenesis loads the genesis state and commits all the state entries
// into the given bloomfilter.
func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
genesisHash := rawdb.ReadCanonicalHash(db, 0)
if genesisHash == (common.Hash{}) {
return errors.New("missing genesis hash")
}
genesis := rawdb.ReadBlock(db, genesisHash, 0)
if genesis == nil {
return errors.New("missing genesis block")
}
t, err := trie.NewStateTrie(trie.StateTrieID(genesis.Root()), trie.NewDatabase(db))
if err != nil {
return err
}
accIter := t.NodeIterator(nil)
for accIter.Next(true) {
hash := accIter.Hash()
// Embedded nodes don't have hash.
if hash != (common.Hash{}) {
stateBloom.Put(hash.Bytes(), nil)
}
// If it's a leaf node, yes we are touching an account,
// dig into the storage trie further.
if accIter.Leaf() {
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
return err
}
if acc.Root != types.EmptyRootHash {
id := trie.StorageTrieID(genesis.Root(), common.BytesToHash(accIter.LeafKey()), acc.Root)
storageTrie, err := trie.NewStateTrie(id, trie.NewDatabase(db))
if err != nil {
return err
}
storageIter := storageTrie.NodeIterator(nil)
for storageIter.Next(true) {
hash := storageIter.Hash()
if hash != (common.Hash{}) {
stateBloom.Put(hash.Bytes(), nil)
}
}
if storageIter.Error() != nil {
return storageIter.Error()
}
}
if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) {
stateBloom.Put(acc.CodeHash, nil)
}
}
}
return accIter.Error()
}
func bloomFilterName(datadir string, hash common.Hash) string {
return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix))
}
func isBloomFilter(filename string) (bool, common.Hash) {
filename = filepath.Base(filename)
if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) {
return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1])
}
return false, common.Hash{}
}
func findBloomFilter(datadir string) (string, common.Hash, error) {
var (
stateBloomPath string
stateBloomRoot common.Hash
)
if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error {
if info != nil && !info.IsDir() {
ok, root := isBloomFilter(path)
if ok {
stateBloomPath = path
stateBloomRoot = root
}
}
return nil
}); err != nil {
return "", common.Hash{}, err
}
return stateBloomPath, stateBloomRoot, nil
}
const warningLog = `
WARNING!
The clean trie cache is not found. Please delete it by yourself after the
pruning. Remember don't start the Geth without deleting the clean trie cache
otherwise the entire database may be damaged!
Check the command description "harmony prune-state --help" for more details.
`
func deleteCleanTrieCache(path string) {
if !common.FileExist(path) {
log.Warn(warningLog)
return
}
os.RemoveAll(path)
log.Info("Deleted trie clean cache", "path", path)
}
Loading…
Cancel
Save