diff --git a/.gitignore b/.gitignore index 0e26bf59a..bbdb53772 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,7 @@ explorer_storage_* # pprof profiles profiles/*.pb.gz + +# cache db +cache/ +cache_*_db diff --git a/.hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.key b/.hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.key new file mode 100644 index 000000000..77581cba0 --- /dev/null +++ b/.hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.key @@ -0,0 +1 @@ +a010bc1550956a3a4935cdc6f8633ea770bd0321094cfaccd25d3ea3b5382417cb5c150de0357ee8b6eeeec135ae0ec379701c7f35ae33da9030e93a630dc3a988577d1a1b0a9e60976f79d5cefa68123ac77c636f2ccab149fc8fc7 \ No newline at end of file diff --git a/.hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.pass b/.hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.key b/.hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.key new file mode 100644 index 000000000..ae7af7f0e --- /dev/null +++ b/.hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.key @@ -0,0 +1 @@ +e176a1a8d9c533ad7efbaa9caca1d839146495683f669be35dddfebe2f39497c73fc2a6d7da116acd3698f582fcfe6b7b9fc18fd286a42bcdf007dc7a618bab958eb5c97f6082104ff723705d36744289b9885a692c01731cb45c698 \ No newline at end of file diff --git a/.hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.pass b/.hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.key b/.hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.key new file mode 100644 index 000000000..8788231f8 --- /dev/null +++ b/.hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.key @@ -0,0 +1 @@ +eb388b59c8a4ed1d6a1713551a3404e775fdf27b5f92d302b01874e2ced1465ff9baa93eba1d3e24620d71e0557231087bbfea0c89cdb829c3868e990abaaa7595a61a13ba40d61262c5395066bccf2681f65b9f53a621a37e1b8123 \ No newline at end of file diff --git a/.hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.pass b/.hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.key b/.hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.key new file mode 100644 index 000000000..86f064874 --- /dev/null +++ b/.hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.key @@ -0,0 +1 @@ +b64f0601353691a6bbe68658103c044ec0e075ebe2cdb8328a07269c1cf4e5574ac81fb48fb49c90cf1e0fbf0e5ed97a5806a30505104717a68d7ae341c08a7ef98bf5d4c607c236ef80d9dbfc0d3212191e0c0436b4d78890b7da68 \ No newline at end of file diff --git a/.hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.pass b/.hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.key b/.hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.key new file mode 100644 index 000000000..6a86cab67 --- /dev/null +++ b/.hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.key @@ -0,0 +1 @@ +2ea158c2fd1d4cefcfe4dc4d987099ef93731decee88db87d007d6a5da2b87e4528abe3013814fc7183651ac1e301b2f3caa8f03071d2bd110746be8c1004dd88f22495449ae6d8c5a7cce5783a6964a4663c9319570433a68ef2f31 \ No newline at end of file diff --git a/.hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.pass b/.hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.key b/.hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.key new file mode 100644 index 000000000..1f6ad8e86 --- /dev/null +++ b/.hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.key @@ -0,0 +1 @@ +81e67ac67dc10c4d89baa7ae800d6afffbc4105766a2202ee30998669e118ce2842ae64cb8a20282d10974c1bba6ce3cdda6e2bb00124bf4ab154aeebe081a7c44ce5b010b0b069bfa37d35beccffe44209ac376b4b14885f5b3625a \ No newline at end of file diff --git a/.hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.pass b/.hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.pass new file mode 100644 index 000000000..e69de29bb diff --git a/.hmy/extkeystore/one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t.key b/.hmy/extkeystore/one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t.key new file mode 100644 index 000000000..90157f52a --- /dev/null +++ b/.hmy/extkeystore/one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t.key @@ -0,0 +1 @@ +{"address":"f71171ffe8988c5827b5d069a3d8e36ed62f0247","crypto":{"cipher":"aes-128-ctr","ciphertext":"5c7590efba91015b67234ffa063dc56e0460af0b68fa0265ac46d46a3c28448e","cipherparams":{"iv":"e8927574296604528514d7dabc6895ad"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"d2573eadafda1a530300683d610ab67465fded0e33d0a9dff101481dd4e9c043"},"mac":"0666235e715fc0c52b0cc480891b2c7514e4731d99fe93bee3d2678679f0de19"},"id":"94a50e8b-5973-4989-aaac-420cdb243863","version":3} \ No newline at end of file diff --git a/.hmy/extkeystore/one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj.key b/.hmy/extkeystore/one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj.key new file mode 100644 index 000000000..2699190f5 --- /dev/null +++ b/.hmy/extkeystore/one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj.key @@ -0,0 +1 @@ +{"address":"2f5ca76074257e6cf1cb7f6485d3ae8f620b49a8","crypto":{"cipher":"aes-128-ctr","ciphertext":"0f4905db05ba9cf8d99e0f090dc36bc3e15b2fe6e0247b4800c0d092bcd61a6d","cipherparams":{"iv":"fa9e222a88e5954274195fce7d2e3cc2"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"f6b32095d86b01d85764f099f6f82e2a9ae141eaf058025047e55f9ba90d0ac5"},"mac":"05f3de89292e8b16ee63181e762feb0b9fd46856b8370ebbe2c9eb806b3d26b7"},"id":"5342b382-696e-46d3-9894-272a6da3f5b3","version":3} \ No newline at end of file diff --git a/.hmy/extkeystore/one19zzwsxr0uf2fe34y8qkadek2v0eh6h5mg2deg6.key b/.hmy/extkeystore/one19zzwsxr0uf2fe34y8qkadek2v0eh6h5mg2deg6.key new file mode 100644 index 000000000..7a697ae7a --- /dev/null +++ b/.hmy/extkeystore/one19zzwsxr0uf2fe34y8qkadek2v0eh6h5mg2deg6.key @@ -0,0 +1 @@ +{"address":"2884e8186fe2549cc6a4382dd6e6ca63f37d5e9b","crypto":{"cipher":"aes-128-ctr","ciphertext":"7308d5b4904e912d0f312208b9e8a95eee04d9cacf80f2c89dd802bdcd6b9cb5","cipherparams":{"iv":"ff07874ea030a9c3c9a0c8ce4d87f06a"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"972333a39a53e1dab69d50d2cd9a97e19595896e2b79e88c9ffd81b66fe93bea"},"mac":"828fba7c39d73996a5b37d399013b7659c4a9cebc40d6e1837e6253a01b1c4f1"},"id":"76c4c1ba-cd6d-4613-a89e-f9faaa7d130d","version":3} \ No newline at end of file diff --git a/.hmy/extkeystore/one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x.key b/.hmy/extkeystore/one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x.key new file mode 100644 index 000000000..c64fdf2ef --- /dev/null +++ b/.hmy/extkeystore/one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x.key @@ -0,0 +1 @@ +{"address":"ef0136a1770729f14e13f2f9be9e148c63803c8f","crypto":{"cipher":"aes-128-ctr","ciphertext":"367d95f415cf795aa7498025efb7b44ef31a698f3820bb77e3cd447d57dcfc80","cipherparams":{"iv":"206f60b938aad4f2a6763ba2199ce737"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"02c7e01ef989794d695d7ee87a5413fbea79751a8e725f529ac39e6b611da0ab"},"mac":"65e93d97f3a89bc45da4b1bcf8c38ef3b0dbee457c91207bf2c9ff787aa12ebe"},"id":"9b6fb83f-90b2-4ae6-a24a-5b13436db30f","version":3} \ No newline at end of file diff --git a/.hmy/extkeystore/one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf.key b/.hmy/extkeystore/one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf.key new file mode 100644 index 000000000..92a8a23fe --- /dev/null +++ b/.hmy/extkeystore/one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf.key @@ -0,0 +1 @@ +{"address":"ce6612f2581ec9d029594ea4308c8ab17cb90aa6","crypto":{"cipher":"aes-128-ctr","ciphertext":"7c4ba34f62d62734df18c383bafc03c6a8483f99ebbb1103517a7d6060c68c10","cipherparams":{"iv":"a300902fe8cc9bb4bfc686eb77b6bf26"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"693073fcc4cdb230aae22796094a1f8b17c298f78b3cabff3f8c78b81da8a222"},"mac":"624ed38319f3b46405a7d5e831fdb60dbe4d7f66f16ff9cc67bb0919ab3862e9"},"id":"19af9349-6c8d-4dd3-8053-68b03a646d59","version":3} \ No newline at end of file diff --git a/.hmy/extkeystore/one1lctumupg2y009pjmnhnmn4nqjk0zf0dspjanf7.key b/.hmy/extkeystore/one1lctumupg2y009pjmnhnmn4nqjk0zf0dspjanf7.key new file mode 100644 index 000000000..c849d243c --- /dev/null +++ b/.hmy/extkeystore/one1lctumupg2y009pjmnhnmn4nqjk0zf0dspjanf7.key @@ -0,0 +1 @@ +{"address":"fe17cdf028511ef2865b9de7b9d660959e24bdb0","crypto":{"cipher":"aes-128-ctr","ciphertext":"5cb69fbe4f2aaa0500d4b197e8411fa4d9fa75ff4b4bf067760c1ff79e57a0b6","cipherparams":{"iv":"5d1e9bda47aae3385d31bf036ba6db67"},"kdf":"scrypt","kdfparams":{"dklen":32,"n":262144,"p":1,"r":8,"salt":"779fba35725c4711bc5772102234f3b718c46d751bf106153c06e61984e8e827"},"mac":"c1de2e2e1e5aebd5a083aec65e18eec21c67d257799a8d6402c3878045678ea8"},"id":"c2e234e2-bc52-4c38-8f82-6a6cfb65f342","version":3} \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 9d4982491..514f8e2fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ install: - (cd $GOPATH/src/github.com/harmony-one/bls; make BLS_SWAP_G=1 -j4) - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26 - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1 - - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.41.1 +# - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.41.1 - make go-get - go install golang.org/x/tools/cmd/goimports@latest - go install github.com/harmony-ek/gencodec@latest diff --git a/Makefile b/Makefile index 4c764411e..052d6ad42 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ RPMBUILD=$(HOME)/rpmbuild DEBBUILD=$(HOME)/debbuild SHELL := bash -.PHONY: all help libs exe race trace-pointer debug debug-kill test test-go test-api test-api-attach linux_static deb_init deb_build deb debpub_dev debpub_prod rpm_init rpm_build rpm rpmpub_dev rpmpub_prod clean distclean docker +.PHONY: all help libs exe race trace-pointer debug debug-ext debug-kill test test-go test-api test-api-attach linux_static deb_init deb_build deb debpub_dev debpub_prod rpm_init rpm_build rpm rpmpub_dev rpmpub_prod clean distclean docker all: libs bash ./scripts/go_executable_build.sh -S @@ -23,8 +23,9 @@ help: @echo "exe - build the harmony binary & bootnode" @echo "race - build the harmony binary & bootnode with race condition checks" @echo "trace-pointer - build the harmony binary & bootnode with pointer analysis" - @echo "debug - start a localnet with 2 shards (s0 rpc endpoint = localhost:9599; s1 rpc endpoint = localhost:9598)" + @echo "debug - start a localnet with 2 shards (s0 rpc endpoint = localhost:9700; s1 rpc endpoint = localhost:9800)" @echo "debug-kill - force kill the localnet" + @echo "debug-ext - start a localnet with 2 shards and external (s0 rpc endpoint = localhost:9598; s1 rpc endpoint = localhost:9596)" @echo "clean - remove node files & logs created by localnet" @echo "distclean - remove node files & logs created by localnet, and all libs" @echo "test - run the entire test suite (go test & Node API test)" @@ -60,6 +61,9 @@ debug: debug-kill: bash ./test/kill_node.sh +debug-ext: + bash ./test/debug-external.sh + clean: rm -rf ./tmp_log* rm -rf ./.dht* diff --git a/api/service/blockproposal/service.go b/api/service/blockproposal/service.go index 4d42c558a..1cbb5accf 100644 --- a/api/service/blockproposal/service.go +++ b/api/service/blockproposal/service.go @@ -18,19 +18,22 @@ type Service struct { // New returns a block proposal service. func New(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, waitForConsensusReady func(readySignal chan consensus.ProposalType, commitSigsChan chan []byte, stopChan chan struct{}, stoppedChan chan struct{})) *Service { - return &Service{readySignal: readySignal, commitSigsChan: commitSigsChan, waitForConsensusReady: waitForConsensusReady} + return &Service{ + readySignal: readySignal, + commitSigsChan: commitSigsChan, + waitForConsensusReady: waitForConsensusReady, + stopChan: make(chan struct{}), + stoppedChan: make(chan struct{}), + } } // Start starts block proposal service. func (s *Service) Start() error { - s.stopChan = make(chan struct{}) - s.stoppedChan = make(chan struct{}) - - s.run(s.stopChan, s.stoppedChan) + s.run() return nil } -func (s *Service) run(stopChan chan struct{}, stoppedChan chan struct{}) { +func (s *Service) run() { s.waitForConsensusReady(s.readySignal, s.commitSigsChan, s.stopChan, s.stoppedChan) } diff --git a/api/service/consensus/service.go b/api/service/consensus/service.go index d48c660a7..49a1da64f 100644 --- a/api/service/consensus/service.go +++ b/api/service/consensus/service.go @@ -3,31 +3,26 @@ package consensus import ( msg_pb "github.com/harmony-one/harmony/api/proto/message" "github.com/harmony-one/harmony/consensus" - "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/utils" ) // Service is the consensus service. type Service struct { - blockChannel chan *types.Block // The channel to receive new blocks from Node - consensus *consensus.Consensus - stopChan chan struct{} - stoppedChan chan struct{} - startChan chan struct{} - messageChan chan *msg_pb.Message + consensus *consensus.Consensus + stopChan chan struct{} + messageChan chan *msg_pb.Message } // New returns consensus service. -func New(blockChannel chan *types.Block, consensus *consensus.Consensus, startChan chan struct{}) *Service { - return &Service{blockChannel: blockChannel, consensus: consensus, startChan: startChan} +func New(consensus *consensus.Consensus) *Service { + return &Service{consensus: consensus} } // Start starts consensus service. func (s *Service) Start() error { utils.Logger().Info().Msg("[consensus/service] Starting consensus service.") s.stopChan = make(chan struct{}) - s.stoppedChan = make(chan struct{}) - s.consensus.Start(s.blockChannel, s.stopChan, s.stoppedChan, s.startChan) + s.consensus.Start(s.stopChan) s.consensus.WaitForNewRandomness() return nil } @@ -35,8 +30,7 @@ func (s *Service) Start() error { // Stop stops consensus service. func (s *Service) Stop() error { utils.Logger().Info().Msg("Stopping consensus service.") - s.stopChan <- struct{}{} - <-s.stoppedChan + close(s.stopChan) utils.Logger().Info().Msg("Consensus service stopped.") - return s.consensus.Close() + return nil } diff --git a/api/service/explorer/schema.go b/api/service/explorer/schema.go index 09848573d..3104d75dc 100644 --- a/api/service/explorer/schema.go +++ b/api/service/explorer/schema.go @@ -41,7 +41,7 @@ func readCheckpointBitmap(db databaseReader) (*roaring64.Bitmap, error) { } // writeCheckpointBitmap write explorer checkpoint bitmap to storage -func writeCheckpointBitmap(db databaseWriter, rb *roaring64.Bitmap) error { +func writeCheckpointBitmap(db databaseWriter, rb Bitmap) error { bitmapByte, err := rb.MarshalBinary() if err != nil { return err diff --git a/api/service/explorer/storage.go b/api/service/explorer/storage.go index 72d028180..ebb4e2577 100644 --- a/api/service/explorer/storage.go +++ b/api/service/explorer/storage.go @@ -34,11 +34,54 @@ const ( // explorer db is doing migration and unavailable var ErrExplorerNotReady = errors.New("explorer db not ready") +type Bitmap interface { + Clone() *roaring64.Bitmap + MarshalBinary() ([]byte, error) + CheckedAdd(x uint64) bool + Contains(x uint64) bool +} + +type ThreadSafeBitmap struct { + b Bitmap + mu sync.Mutex +} + +func (a *ThreadSafeBitmap) Clone() *roaring64.Bitmap { + a.mu.Lock() + defer a.mu.Unlock() + return a.b.Clone() +} + +func (a *ThreadSafeBitmap) CheckedAdd(x uint64) bool { + a.mu.Lock() + defer a.mu.Unlock() + return a.b.CheckedAdd(x) +} + +func (a *ThreadSafeBitmap) Contains(x uint64) bool { + a.mu.Lock() + defer a.mu.Unlock() + return a.b.Contains(x) +} + +func (a *ThreadSafeBitmap) MarshalBinary() ([]byte, error) { + a.mu.Lock() + defer a.mu.Unlock() + return a.b.MarshalBinary() +} + +func NewThreadSafeBitmap(bitmap Bitmap) Bitmap { + return &ThreadSafeBitmap{ + b: bitmap, + mu: sync.Mutex{}, + } +} + type ( storage struct { db database bc core.BlockChain - rb *roaring64.Bitmap + rb Bitmap //*roaring64.Bitmap // TODO: optimize this with priority queue tm *taskManager @@ -89,11 +132,12 @@ func newStorage(hc *harmonyconfig.HarmonyConfig, bc core.BlockChain, dbPath stri return nil, err } + safeBitmap := NewThreadSafeBitmap(bitmap) return &storage{ db: db, bc: bc, - rb: bitmap, - tm: newTaskManager(bitmap), + rb: safeBitmap, + tm: newTaskManager(safeBitmap), resultC: make(chan blockResult, numWorker), resultT: make(chan *traceResult, numWorker), available: abool.New(), @@ -211,14 +255,14 @@ type taskManager struct { blocksLP []*types.Block // blocks with low priorities lock sync.Mutex - rb *roaring64.Bitmap + rb Bitmap rbChangedCount int C chan struct{} T chan *traceResult } -func newTaskManager(bitmap *roaring64.Bitmap) *taskManager { +func newTaskManager(bitmap Bitmap) *taskManager { return &taskManager{ rb: bitmap, C: make(chan struct{}, numWorker), diff --git a/api/service/legacysync/epoch_syncing.go b/api/service/legacysync/epoch_syncing.go index 8bf7ae145..eefca9a5c 100644 --- a/api/service/legacysync/epoch_syncing.go +++ b/api/service/legacysync/epoch_syncing.go @@ -2,7 +2,6 @@ package legacysync import ( "fmt" - "math" "sync" "time" @@ -14,6 +13,8 @@ import ( "github.com/harmony-one/harmony/p2p" "github.com/harmony-one/harmony/shard" "github.com/pkg/errors" + + libp2p_peer "github.com/libp2p/go-libp2p/core/peer" ) type EpochSync struct { @@ -47,11 +48,12 @@ func (ss *EpochSync) isSynchronized(_ bool) SyncCheckResult { if ss.syncConfig == nil { return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync } - otherHeight1 := getMaxPeerHeight(ss.syncConfig) - if otherHeight1 == math.MaxUint64 { + otherHeight1, errMaxHeight := getMaxPeerHeight(ss.syncConfig) + if errMaxHeight != nil { utils.Logger().Error(). Uint64("OtherHeight", otherHeight1). Int("Peers count", ss.syncConfig.PeersCount()). + Err(errMaxHeight). Msg("[EPOCHSYNC] No peers for get height") return SyncCheckResult{} } @@ -91,15 +93,15 @@ func (ss *EpochSync) SyncLoop(bc core.BlockChain, consensus *consensus.Consensus func syncLoop(bc core.BlockChain, syncConfig *SyncConfig) (timeout int) { isBeacon := bc.ShardID() == 0 - maxHeight := getMaxPeerHeight(syncConfig) - for { - if maxHeight == 0 || maxHeight == math.MaxUint64 { - utils.Logger().Info(). - Msgf("[EPOCHSYNC] No peers to sync (isBeacon: %t, ShardID: %d, peersCount: %d)", - isBeacon, bc.ShardID(), syncConfig.PeersCount()) - return 10 - } + maxHeight, errMaxHeight := getMaxPeerHeight(syncConfig) + if errMaxHeight != nil { + utils.Logger().Info(). + Msgf("[EPOCHSYNC] No peers to sync (isBeacon: %t, ShardID: %d, peersCount: %d)", + isBeacon, bc.ShardID(), syncConfig.PeersCount()) + return 10 + } + for { curEpoch := bc.CurrentBlock().Epoch().Uint64() otherEpoch := shard.Schedule.CalcEpochNumber(maxHeight).Uint64() if otherEpoch == curEpoch+1 { @@ -110,7 +112,7 @@ func syncLoop(bc core.BlockChain, syncConfig *SyncConfig) (timeout int) { } if otherEpoch < curEpoch { for _, peerCfg := range syncConfig.GetPeers() { - syncConfig.RemovePeer(peerCfg, fmt.Sprintf("[EPOCHSYNC]: current height is higher that others, remove peers: %s", peerCfg.String())) + syncConfig.RemovePeer(peerCfg, fmt.Sprintf("[EPOCHSYNC]: current height is higher than others, remove peers: %s", peerCfg.String())) } return 2 } @@ -202,8 +204,8 @@ func processWithPayload(payload [][]byte, bc core.BlockChain) error { } // CreateSyncConfig creates SyncConfig for StateSync object. -func (ss *EpochSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitForEachPeerToConnect bool) error { +func (ss *EpochSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, selfPeerID libp2p_peer.ID, waitForEachPeerToConnect bool) error { var err error - ss.syncConfig, err = createSyncConfig(ss.syncConfig, peers, shardID, waitForEachPeerToConnect) + ss.syncConfig, err = createSyncConfig(ss.syncConfig, peers, shardID, selfPeerID, waitForEachPeerToConnect) return err } diff --git a/api/service/legacysync/helpers.go b/api/service/legacysync/helpers.go index ca66e0ddc..14dac994d 100644 --- a/api/service/legacysync/helpers.go +++ b/api/service/legacysync/helpers.go @@ -9,11 +9,12 @@ import ( "github.com/harmony-one/harmony/api/service/legacysync/downloader" "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/p2p" + libp2p_peer "github.com/libp2p/go-libp2p/core/peer" "github.com/pkg/errors" ) // getMaxPeerHeight gets the maximum blockchain heights from peers -func getMaxPeerHeight(syncConfig *SyncConfig) uint64 { +func getMaxPeerHeight(syncConfig *SyncConfig) (uint64, error) { maxHeight := uint64(math.MaxUint64) var ( wg sync.WaitGroup @@ -48,10 +49,15 @@ func getMaxPeerHeight(syncConfig *SyncConfig) uint64 { return }) wg.Wait() - return maxHeight + + if maxHeight == uint64(math.MaxUint64) { + return 0, fmt.Errorf("get max peer height failed") + } + + return maxHeight, nil } -func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32, waitForEachPeerToConnect bool) (*SyncConfig, error) { +func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32, selfPeerID libp2p_peer.ID, waitForEachPeerToConnect bool) (*SyncConfig, error) { // sanity check to ensure no duplicate peers if err := checkPeersDuplicity(peers); err != nil { return syncConfig, err @@ -61,6 +67,7 @@ func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32, targetSize, peers := limitNumPeers(peers, randSeed) utils.Logger().Debug(). + Str("self peer ID", string(selfPeerID)). Int("peers count", len(peers)). Int("target size", targetSize). Uint32("shardID", shardID). @@ -72,7 +79,7 @@ func createSyncConfig(syncConfig *SyncConfig, peers []p2p.Peer, shardID uint32, if syncConfig != nil { syncConfig.CloseConnections() } - syncConfig = NewSyncConfig(shardID, nil) + syncConfig = NewSyncConfig(shardID, selfPeerID, nil) if !waitForEachPeerToConnect { var wg sync.WaitGroup diff --git a/api/service/legacysync/syncing.go b/api/service/legacysync/syncing.go index 8afaa3a95..3375ccdc5 100644 --- a/api/service/legacysync/syncing.go +++ b/api/service/legacysync/syncing.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/hex" "fmt" + "math" "math/rand" "reflect" "sort" @@ -17,6 +18,7 @@ import ( "github.com/harmony-one/harmony/api/service/legacysync/downloader" pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" "github.com/harmony-one/harmony/consensus" + consensus2 "github.com/harmony-one/harmony/consensus" "github.com/harmony-one/harmony/consensus/engine" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/types" @@ -25,6 +27,7 @@ import ( "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/node/worker" "github.com/harmony-one/harmony/p2p" + libp2p_peer "github.com/libp2p/go-libp2p/core/peer" "github.com/pkg/errors" ) @@ -110,14 +113,16 @@ type SyncConfig struct { // SyncPeerConfig itself is guarded by its own mutex. mtx sync.RWMutex - peers []*SyncPeerConfig - shardID uint32 + peers []*SyncPeerConfig + shardID uint32 + selfPeerID libp2p_peer.ID } -func NewSyncConfig(shardID uint32, peers []*SyncPeerConfig) *SyncConfig { +func NewSyncConfig(shardID uint32, selfPeerID libp2p_peer.ID, peers []*SyncPeerConfig) *SyncConfig { return &SyncConfig{ - peers: peers, - shardID: shardID, + peers: peers, + shardID: shardID, + selfPeerID: selfPeerID, } } @@ -135,6 +140,9 @@ func (sc *SyncConfig) AddPeer(peer *SyncPeerConfig) { if peer.IsEqual(p2) { return } + if peer.peer.PeerID == sc.selfPeerID { + return + } } sc.peers = append(sc.peers, peer) } @@ -192,7 +200,7 @@ func (sc *SyncConfig) RemovePeer(peer *SyncPeerConfig, reason string) { } // CreateStateSync returns the implementation of StateSyncInterface interface. -func CreateStateSync(bc blockChain, ip string, port string, peerHash [20]byte, isExplorer bool, role nodeconfig.Role) *StateSync { +func CreateStateSync(bc blockChain, ip string, port string, peerHash [20]byte, peerID libp2p_peer.ID, isExplorer bool, role nodeconfig.Role) *StateSync { stateSync := &StateSync{} stateSync.blockChain = bc stateSync.selfip = ip @@ -201,7 +209,7 @@ func CreateStateSync(bc blockChain, ip string, port string, peerHash [20]byte, i stateSync.commonBlocks = make(map[int]*types.Block) stateSync.lastMileBlocks = []*types.Block{} stateSync.isExplorer = isExplorer - stateSync.syncConfig = NewSyncConfig(bc.ShardID(), nil) + stateSync.syncConfig = NewSyncConfig(bc.ShardID(), peerID, nil) stateSync.syncStatus = newSyncStatus(role) return stateSync @@ -366,9 +374,9 @@ func (peerConfig *SyncPeerConfig) GetBlocks(hashes [][]byte) ([][]byte, error) { } // CreateSyncConfig creates SyncConfig for StateSync object. -func (ss *StateSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitForEachPeerToConnect bool) error { +func (ss *StateSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, selfPeerID libp2p_peer.ID, waitForEachPeerToConnect bool) error { var err error - ss.syncConfig, err = createSyncConfig(ss.syncConfig, peers, shardID, waitForEachPeerToConnect) + ss.syncConfig, err = createSyncConfig(ss.syncConfig, peers, shardID, selfPeerID, waitForEachPeerToConnect) return err } @@ -1054,13 +1062,16 @@ func (ss *StateSync) RegisterNodeInfo() int { // IsSameBlockchainHeight checks whether the node is out of sync from other peers func (ss *StateSync) IsSameBlockchainHeight(bc core.BlockChain) (uint64, bool) { - otherHeight := getMaxPeerHeight(ss.syncConfig) + otherHeight, err := getMaxPeerHeight(ss.syncConfig) + if err != nil { + return 0, false + } currentHeight := bc.CurrentBlock().NumberU64() return otherHeight, currentHeight == otherHeight } // GetMaxPeerHeight .. -func (ss *StateSync) GetMaxPeerHeight() uint64 { +func (ss *StateSync) GetMaxPeerHeight() (uint64, error) { return getMaxPeerHeight(ss.syncConfig) } @@ -1073,8 +1084,17 @@ func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeaco for { start := time.Now() - otherHeight := getMaxPeerHeight(ss.syncConfig) currentHeight := bc.CurrentBlock().NumberU64() + otherHeight, errMaxHeight := getMaxPeerHeight(ss.syncConfig) + if errMaxHeight != nil { + utils.Logger().Error(). + Bool("isBeacon", isBeacon). + Uint32("ShardID", bc.ShardID()). + Uint64("currentHeight", currentHeight). + Int("peers count", ss.syncConfig.PeersCount()). + Msgf("[SYNC] get max height failed") + break + } if currentHeight >= otherHeight { utils.Logger().Info(). Msgf("[SYNC] Node is now IN SYNC! (isBeacon: %t, ShardID: %d, otherHeight: %d, currentHeight: %d)", @@ -1123,20 +1143,19 @@ func (ss *StateSync) SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeaco func (ss *StateSync) addConsensusLastMile(bc core.BlockChain, consensus *consensus.Consensus) error { curNumber := bc.CurrentBlock().NumberU64() - blockIter, err := consensus.GetLastMileBlockIter(curNumber + 1) - if err != nil { - return err - } - for { - block := blockIter.Next() - if block == nil { - break - } - if _, err := bc.InsertChain(types.Blocks{block}, true); err != nil { - return errors.Wrap(err, "failed to InsertChain") + err := consensus.GetLastMileBlockIter(curNumber+1, func(blockIter *consensus2.LastMileBlockIter) error { + for { + block := blockIter.Next() + if block == nil { + break + } + if _, err := bc.InsertChain(types.Blocks{block}, true); err != nil { + return errors.Wrap(err, "failed to InsertChain") + } } - } - return nil + return nil + }) + return err } // GetSyncingPort returns the syncing port. @@ -1212,7 +1231,9 @@ func (status *syncStatus) Get(fallback func() SyncCheckResult) SyncCheckResult { defer status.lock.Unlock() if status.expired() { result := fallback() - status.update(result) + if result.OtherHeight > 0 && result.OtherHeight < uint64(math.MaxUint64) { + status.update(result) + } } return status.lastResult } @@ -1274,8 +1295,15 @@ func (ss *StateSync) isSynchronized(doubleCheck bool) SyncCheckResult { if ss.syncConfig == nil { return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync } - otherHeight1 := getMaxPeerHeight(ss.syncConfig) lastHeight := ss.blockChain.CurrentBlock().NumberU64() + otherHeight1, errMaxHeight1 := getMaxPeerHeight(ss.syncConfig) + if errMaxHeight1 != nil { + return SyncCheckResult{ + IsSynchronized: false, + OtherHeight: 0, + HeightDiff: 0, + } + } wasOutOfSync := lastHeight+inSyncThreshold < otherHeight1 if !doubleCheck { @@ -1296,7 +1324,10 @@ func (ss *StateSync) isSynchronized(doubleCheck bool) SyncCheckResult { // double check the sync status after 1 second to confirm (avoid false alarm) time.Sleep(1 * time.Second) - otherHeight2 := getMaxPeerHeight(ss.syncConfig) + otherHeight2, errMaxHeight2 := getMaxPeerHeight(ss.syncConfig) + if errMaxHeight2 != nil { + otherHeight2 = otherHeight1 + } currentHeight := ss.blockChain.CurrentBlock().NumberU64() isOutOfSync := currentHeight+inSyncThreshold < otherHeight2 diff --git a/api/service/legacysync/syncing_test.go b/api/service/legacysync/syncing_test.go index bcab494a1..bc17aeaec 100644 --- a/api/service/legacysync/syncing_test.go +++ b/api/service/legacysync/syncing_test.go @@ -12,6 +12,7 @@ import ( "time" nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + peer "github.com/libp2p/go-libp2p-core/peer" "github.com/ethereum/go-ethereum/common" "github.com/harmony-one/harmony/api/service/legacysync/downloader" @@ -128,7 +129,8 @@ func (mockBlockchain) ShardID() uint32 { } func TestCreateStateSync(t *testing.T) { - stateSync := CreateStateSync(mockBlockchain{}, "127.0.0.1", "8000", [20]byte{}, false, nodeconfig.Validator) + pID, _ := peer.IDFromBytes([]byte{}) + stateSync := CreateStateSync(mockBlockchain{}, "127.0.0.1", "8000", [20]byte{}, pID, false, nodeconfig.Validator) if stateSync == nil { t.Error("Unable to create stateSync") diff --git a/api/service/manager.go b/api/service/manager.go index 57ca15d60..d879b6a6c 100644 --- a/api/service/manager.go +++ b/api/service/manager.go @@ -23,6 +23,7 @@ const ( Prometheus Synchronize CrosslinkSending + StagedStreamSync ) func (t Type) String() string { @@ -45,6 +46,8 @@ func (t Type) String() string { return "Synchronize" case CrosslinkSending: return "CrosslinkSending" + case StagedStreamSync: + return "StagedStreamSync" default: return "Unknown" } diff --git a/api/service/stagedstreamsync/adapter.go b/api/service/stagedstreamsync/adapter.go new file mode 100644 index 000000000..ae7632889 --- /dev/null +++ b/api/service/stagedstreamsync/adapter.go @@ -0,0 +1,34 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/event" + "github.com/harmony-one/harmony/consensus/engine" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/p2p/stream/common/streammanager" + syncproto "github.com/harmony-one/harmony/p2p/stream/protocols/sync" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" +) + +type syncProtocol interface { + GetCurrentBlockNumber(ctx context.Context, opts ...syncproto.Option) (uint64, sttypes.StreamID, error) + GetBlocksByNumber(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([]*types.Block, sttypes.StreamID, error) + GetRawBlocksByNumber(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([][]byte, [][]byte, sttypes.StreamID, error) + GetBlockHashes(ctx context.Context, bns []uint64, opts ...syncproto.Option) ([]common.Hash, sttypes.StreamID, error) + GetBlocksByHashes(ctx context.Context, hs []common.Hash, opts ...syncproto.Option) ([]*types.Block, sttypes.StreamID, error) + + RemoveStream(stID sttypes.StreamID) // If a stream delivers invalid data, remove the stream + StreamFailed(stID sttypes.StreamID, reason string) + SubscribeAddStreamEvent(ch chan<- streammanager.EvtStreamAdded) event.Subscription + NumStreams() int +} + +type blockChain interface { + engine.ChainReader + Engine() engine.Engine + + InsertChain(chain types.Blocks, verifyHeaders bool) (int, error) + WriteCommitSig(blockNum uint64, lastCommits []byte) error +} diff --git a/api/service/stagedstreamsync/beacon_helper.go b/api/service/stagedstreamsync/beacon_helper.go new file mode 100644 index 000000000..a996f368b --- /dev/null +++ b/api/service/stagedstreamsync/beacon_helper.go @@ -0,0 +1,161 @@ +package stagedstreamsync + +import ( + "time" + + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/utils" + "github.com/rs/zerolog" +) + +// lastMileCache keeps the last 50 number blocks in memory cache +const lastMileCap = 50 + +type ( + // beaconHelper is the helper for the beacon downloader. The beaconHelper is only started + // when node is running on side chain, listening to beacon client pub-sub message and + // insert the latest blocks to the beacon chain. + beaconHelper struct { + bc blockChain + blockC <-chan *types.Block + // TODO: refactor this hook to consensus module. We'd better put it in + // consensus module under a subscription. + insertHook func() + + lastMileCache *blocksByNumber + insertC chan insertTask + closeC chan struct{} + logger zerolog.Logger + } + + insertTask struct { + doneC chan struct{} + } +) + +func newBeaconHelper(bc blockChain, blockC <-chan *types.Block, insertHook func()) *beaconHelper { + return &beaconHelper{ + bc: bc, + blockC: blockC, + insertHook: insertHook, + lastMileCache: newBlocksByNumber(lastMileCap), + insertC: make(chan insertTask, 1), + closeC: make(chan struct{}), + logger: utils.Logger().With(). + Str("module", "staged stream sync"). + Str("sub-module", "beacon helper"). + Logger(), + } +} + +func (bh *beaconHelper) start() { + go bh.loop() +} + +func (bh *beaconHelper) close() { + close(bh.closeC) +} + +func (bh *beaconHelper) loop() { + t := time.NewTicker(10 * time.Second) + defer t.Stop() + for { + select { + case <-t.C: + bh.insertAsync() + + case b, ok := <-bh.blockC: + if !ok { + return // blockC closed. Node exited + } + if b == nil { + continue + } + bh.lastMileCache.push(b) + bh.insertAsync() + + case it := <-bh.insertC: + inserted, bn, err := bh.insertLastMileBlocks() + if err != nil { + bh.logger.Error().Err(err). + Msg(WrapStagedSyncMsg("insert last mile blocks error")) + close(it.doneC) + continue + } + if inserted > 0 { + numBlocksInsertedBeaconHelperCounter.Add(float64(inserted)) + bh.logger.Info().Int("inserted", inserted). + Uint64("end height", bn). + Uint32("shard", bh.bc.ShardID()). + Msg(WrapStagedSyncMsg("insert last mile blocks")) + } + close(it.doneC) + + case <-bh.closeC: + return + } + } +} + +// insertAsync triggers the insert last mile without blocking +func (bh *beaconHelper) insertAsync() { + select { + case bh.insertC <- insertTask{ + doneC: make(chan struct{}), + }: + default: + } +} + +// insertSync triggers the insert last mile while blocking +func (bh *beaconHelper) insertSync() { + task := insertTask{ + doneC: make(chan struct{}), + } + bh.insertC <- task + <-task.doneC +} + +func (bh *beaconHelper) insertLastMileBlocks() (inserted int, bn uint64, err error) { + bn = bh.bc.CurrentBlock().NumberU64() + 1 + for { + b := bh.getNextBlock(bn) + if b == nil { + bn-- + return + } + // TODO: Instruct the beacon helper to verify signatures. This may require some forks + // in pub-sub message (add commit sigs in node.block.sync messages) + if _, err = bh.bc.InsertChain(types.Blocks{b}, true); err != nil { + bn-- + return + } + bh.logger.Info(). + Uint64("number", b.NumberU64()). + Msg(WrapStagedSyncMsg("Inserted block from beacon pub-sub")) + + if bh.insertHook != nil { + bh.insertHook() + } + inserted++ + bn++ + } +} + +func (bh *beaconHelper) getNextBlock(expBN uint64) *types.Block { + for bh.lastMileCache.len() > 0 { + b := bh.lastMileCache.pop() + if b == nil { + return nil + } + if b.NumberU64() < expBN { + continue + } + if b.NumberU64() > expBN { + bh.lastMileCache.push(b) + return nil + } + return b + } + return nil +} diff --git a/api/service/stagedstreamsync/block_by_hash_manager.go b/api/service/stagedstreamsync/block_by_hash_manager.go new file mode 100644 index 000000000..4179bbaed --- /dev/null +++ b/api/service/stagedstreamsync/block_by_hash_manager.go @@ -0,0 +1,133 @@ +package stagedstreamsync + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core/types" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/pkg/errors" +) + +type getBlocksByHashManager struct { + hashes []common.Hash + pendings map[common.Hash]struct{} + results map[common.Hash]blockResult + whitelist []sttypes.StreamID + + lock sync.Mutex +} + +func newGetBlocksByHashManager(hashes []common.Hash, whitelist []sttypes.StreamID) *getBlocksByHashManager { + return &getBlocksByHashManager{ + hashes: hashes, + pendings: make(map[common.Hash]struct{}), + results: make(map[common.Hash]blockResult), + whitelist: whitelist, + } +} + +func (m *getBlocksByHashManager) getNextHashes() ([]common.Hash, []sttypes.StreamID, error) { + m.lock.Lock() + defer m.lock.Unlock() + + num := m.numBlocksPerRequest() + hashes := make([]common.Hash, 0, num) + if len(m.whitelist) == 0 { + return nil, nil, ErrEmptyWhitelist + } + + for _, hash := range m.hashes { + if len(hashes) == num { + break + } + _, ok1 := m.pendings[hash] + _, ok2 := m.results[hash] + if !ok1 && !ok2 { + hashes = append(hashes, hash) + } + } + sts := make([]sttypes.StreamID, len(m.whitelist)) + copy(sts, m.whitelist) + return hashes, sts, nil +} + +func (m *getBlocksByHashManager) numBlocksPerRequest() int { + val := divideCeil(len(m.hashes), len(m.whitelist)) + if val < BlockByHashesLowerCap { + val = BlockByHashesLowerCap + } + if val > BlockByHashesUpperCap { + val = BlockByHashesUpperCap + } + return val +} + +func (m *getBlocksByHashManager) numRequests() int { + return divideCeil(len(m.hashes), m.numBlocksPerRequest()) +} + +func (m *getBlocksByHashManager) addResult(hashes []common.Hash, blocks []*types.Block, stid sttypes.StreamID) { + m.lock.Lock() + defer m.lock.Unlock() + + for i, hash := range hashes { + block := blocks[i] + delete(m.pendings, hash) + m.results[hash] = blockResult{ + block: block, + stid: stid, + } + } +} + +func (m *getBlocksByHashManager) handleResultError(hashes []common.Hash, stid sttypes.StreamID) { + m.lock.Lock() + defer m.lock.Unlock() + + m.removeStreamID(stid) + + for _, hash := range hashes { + delete(m.pendings, hash) + } +} + +func (m *getBlocksByHashManager) getResults() ([]*types.Block, []sttypes.StreamID, error) { + m.lock.Lock() + defer m.lock.Unlock() + + blocks := make([]*types.Block, 0, len(m.hashes)) + stids := make([]sttypes.StreamID, 0, len(m.hashes)) + for _, hash := range m.hashes { + if m.results[hash].block == nil { + return nil, nil, errors.New("SANITY: nil block found") + } + blocks = append(blocks, m.results[hash].block) + stids = append(stids, m.results[hash].stid) + } + return blocks, stids, nil +} + +func (m *getBlocksByHashManager) isDone() bool { + m.lock.Lock() + defer m.lock.Unlock() + + return len(m.results) == len(m.hashes) +} + +func (m *getBlocksByHashManager) removeStreamID(target sttypes.StreamID) { + // O(n^2) complexity. But considering the whitelist size is small, should not + // have performance issue. +loop: + for i, stid := range m.whitelist { + if stid == target { + if i == len(m.whitelist) { + m.whitelist = m.whitelist[:i] + } else { + m.whitelist = append(m.whitelist[:i], m.whitelist[i+1:]...) + } + goto loop + } + } + return +} diff --git a/api/service/stagedstreamsync/block_hash_result.go b/api/service/stagedstreamsync/block_hash_result.go new file mode 100644 index 000000000..0bae60507 --- /dev/null +++ b/api/service/stagedstreamsync/block_hash_result.go @@ -0,0 +1,75 @@ +package stagedstreamsync + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" +) + +type ( + blockHashResults struct { + bns []uint64 + results []map[sttypes.StreamID]common.Hash + + lock sync.Mutex + } +) + +func newBlockHashResults(bns []uint64) *blockHashResults { + results := make([]map[sttypes.StreamID]common.Hash, 0, len(bns)) + for range bns { + results = append(results, make(map[sttypes.StreamID]common.Hash)) + } + return &blockHashResults{ + bns: bns, + results: results, + } +} + +func (res *blockHashResults) addResult(hashes []common.Hash, stid sttypes.StreamID) { + res.lock.Lock() + defer res.lock.Unlock() + + for i, h := range hashes { + if h == emptyHash { + return // nil block hash reached + } + res.results[i][stid] = h + } + return +} + +func (res *blockHashResults) computeLongestHashChain() ([]common.Hash, []sttypes.StreamID) { + var ( + whitelist map[sttypes.StreamID]struct{} + hashChain []common.Hash + ) + for _, result := range res.results { + hash, nextWl := countHashMaxVote(result, whitelist) + if hash == emptyHash { + break + } + hashChain = append(hashChain, hash) + whitelist = nextWl + } + + sts := make([]sttypes.StreamID, 0, len(whitelist)) + for st := range whitelist { + sts = append(sts, st) + } + return hashChain, sts +} + +func (res *blockHashResults) numBlocksWithResults() int { + res.lock.Lock() + defer res.lock.Unlock() + + cnt := 0 + for _, result := range res.results { + if len(result) != 0 { + cnt++ + } + } + return cnt +} diff --git a/api/service/stagedstreamsync/block_manager.go b/api/service/stagedstreamsync/block_manager.go new file mode 100644 index 000000000..df30ab5e3 --- /dev/null +++ b/api/service/stagedstreamsync/block_manager.go @@ -0,0 +1,172 @@ +package stagedstreamsync + +import ( + "sync" + + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/rs/zerolog" +) + +type BlockDownloadDetails struct { + loopID int + streamID sttypes.StreamID +} + +// blockDownloadManager is the helper structure for get blocks request management +type blockDownloadManager struct { + chain blockChain + tx kv.RwTx + + targetBN uint64 + requesting map[uint64]struct{} // block numbers that have been assigned to workers but not received + processing map[uint64]struct{} // block numbers received requests but not inserted + retries *prioritizedNumbers // requests where error happens + rq *resultQueue // result queue wait to be inserted into blockchain + bdd map[uint64]BlockDownloadDetails // details about how this block was downloaded + + logger zerolog.Logger + lock sync.Mutex +} + +func newBlockDownloadManager(tx kv.RwTx, chain blockChain, targetBN uint64, logger zerolog.Logger) *blockDownloadManager { + return &blockDownloadManager{ + chain: chain, + tx: tx, + targetBN: targetBN, + requesting: make(map[uint64]struct{}), + processing: make(map[uint64]struct{}), + retries: newPrioritizedNumbers(), + rq: newResultQueue(), + bdd: make(map[uint64]BlockDownloadDetails), + logger: logger, + } +} + +// GetNextBatch get the next block numbers batch +func (gbm *blockDownloadManager) GetNextBatch() []uint64 { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + cap := BlocksPerRequest + + bns := gbm.getBatchFromRetries(cap) + if len(bns) > 0 { + cap -= len(bns) + gbm.addBatchToRequesting(bns) + } + + if gbm.availableForMoreTasks() { + addBNs := gbm.getBatchFromUnprocessed(cap) + gbm.addBatchToRequesting(addBNs) + bns = append(bns, addBNs...) + } + + return bns +} + +// HandleRequestError handles the error result +func (gbm *blockDownloadManager) HandleRequestError(bns []uint64, err error, streamID sttypes.StreamID) { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + // add requested block numbers to retries + for _, bn := range bns { + delete(gbm.requesting, bn) + gbm.retries.push(bn) + } +} + +// HandleRequestResult handles get blocks result +func (gbm *blockDownloadManager) HandleRequestResult(bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, streamID sttypes.StreamID) error { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + for i, bn := range bns { + delete(gbm.requesting, bn) + if len(blockBytes[i]) <= 1 { + gbm.retries.push(bn) + } else { + gbm.processing[bn] = struct{}{} + gbm.bdd[bn] = BlockDownloadDetails{ + loopID: loopID, + streamID: streamID, + } + } + } + return nil +} + +// SetDownloadDetails sets the download details for a batch of blocks +func (gbm *blockDownloadManager) SetDownloadDetails(bns []uint64, loopID int, streamID sttypes.StreamID) error { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + for _, bn := range bns { + gbm.bdd[bn] = BlockDownloadDetails{ + loopID: loopID, + streamID: streamID, + } + } + return nil +} + +// GetDownloadDetails returns the download details for a block +func (gbm *blockDownloadManager) GetDownloadDetails(blockNumber uint64) (loopID int, streamID sttypes.StreamID) { + gbm.lock.Lock() + defer gbm.lock.Unlock() + + return gbm.bdd[blockNumber].loopID, gbm.bdd[blockNumber].streamID +} + +// getBatchFromRetries get the block number batch to be requested from retries. +func (gbm *blockDownloadManager) getBatchFromRetries(cap int) []uint64 { + var ( + requestBNs []uint64 + curHeight = gbm.chain.CurrentBlock().NumberU64() + ) + for cnt := 0; cnt < cap; cnt++ { + bn := gbm.retries.pop() + if bn == 0 { + break // no more retries + } + if bn <= curHeight { + continue + } + requestBNs = append(requestBNs, bn) + } + return requestBNs +} + +// getBatchFromUnprocessed returns a batch of block numbers to be requested from unprocessed. +func (gbm *blockDownloadManager) getBatchFromUnprocessed(cap int) []uint64 { + var ( + requestBNs []uint64 + curHeight = gbm.chain.CurrentBlock().NumberU64() + ) + bn := curHeight + 1 + // TODO: this algorithm can be potentially optimized. + for cnt := 0; cnt < cap && bn <= gbm.targetBN; cnt++ { + for bn <= gbm.targetBN { + _, ok1 := gbm.requesting[bn] + _, ok2 := gbm.processing[bn] + if !ok1 && !ok2 { + requestBNs = append(requestBNs, bn) + bn++ + break + } + bn++ + } + } + return requestBNs +} + +func (gbm *blockDownloadManager) availableForMoreTasks() bool { + return gbm.rq.results.Len() < SoftQueueCap +} + +func (gbm *blockDownloadManager) addBatchToRequesting(bns []uint64) { + for _, bn := range bns { + gbm.requesting[bn] = struct{}{} + } +} diff --git a/api/service/stagedstreamsync/const.go b/api/service/stagedstreamsync/const.go new file mode 100644 index 000000000..0e6bc6e2c --- /dev/null +++ b/api/service/stagedstreamsync/const.go @@ -0,0 +1,81 @@ +package stagedstreamsync + +import ( + "time" + + "github.com/harmony-one/harmony/core/types" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" +) + +const ( + BlocksPerRequest int = 10 // number of blocks for each request + BlocksPerInsertion int = 50 // number of blocks for each insert batch + BlockHashesPerRequest int = 20 // number of get block hashes for short range sync + BlockByHashesUpperCap int = 10 // number of get blocks by hashes upper cap + BlockByHashesLowerCap int = 3 // number of get blocks by hashes lower cap + + LastMileBlocksThreshold int = 10 + + // SoftQueueCap is the soft cap of size in resultQueue. When the queue size is larger than this limit, + // no more request will be assigned to workers to wait for InsertChain to finish. + SoftQueueCap int = 100 + + // DefaultConcurrency is the default settings for concurrency + DefaultConcurrency int = 4 + + // ShortRangeTimeout is the timeout for each short range sync, which allow short range sync + // to restart automatically when stuck in `getBlockHashes` + ShortRangeTimeout time.Duration = 1 * time.Minute +) + +type ( + // Config is the downloader config + Config struct { + // Only run stream sync protocol as a server. + // TODO: remove this when stream sync is fully up. + ServerOnly bool + + // parameters + Network nodeconfig.NetworkType + Concurrency int // Number of concurrent sync requests + MinStreams int // Minimum number of streams to do sync + InitStreams int // Number of streams requirement for initial bootstrap + MaxAdvertiseWaitTime int // maximum time duration between protocol advertisements + // stream manager config + SmSoftLowCap int + SmHardLowCap int + SmHiCap int + SmDiscBatch int + + // config for beacon config + BHConfig *BeaconHelperConfig + + // log the stage progress + LogProgress bool + } + + // BeaconHelperConfig is the extra config used for beaconHelper which uses + // pub-sub block message to do sync. + BeaconHelperConfig struct { + BlockC <-chan *types.Block + InsertHook func() + } +) + +func (c *Config) fixValues() { + if c.Concurrency == 0 { + c.Concurrency = DefaultConcurrency + } + if c.Concurrency > c.MinStreams { + c.MinStreams = c.Concurrency + } + if c.MinStreams > c.InitStreams { + c.InitStreams = c.MinStreams + } + if c.MinStreams > c.SmSoftLowCap { + c.SmSoftLowCap = c.MinStreams + } + if c.MinStreams > c.SmHardLowCap { + c.SmHardLowCap = c.MinStreams + } +} diff --git a/api/service/stagedstreamsync/default_stages.go b/api/service/stagedstreamsync/default_stages.go new file mode 100644 index 000000000..6e4808738 --- /dev/null +++ b/api/service/stagedstreamsync/default_stages.go @@ -0,0 +1,87 @@ +package stagedstreamsync + +import ( + "context" +) + +type ForwardOrder []SyncStageID +type RevertOrder []SyncStageID +type CleanUpOrder []SyncStageID + +var DefaultForwardOrder = ForwardOrder{ + Heads, + SyncEpoch, + ShortRange, + BlockBodies, + // Stages below don't use Internet + States, + Finish, +} + +var DefaultRevertOrder = RevertOrder{ + Finish, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, +} + +var DefaultCleanUpOrder = CleanUpOrder{ + Finish, + States, + BlockBodies, + ShortRange, + SyncEpoch, + Heads, +} + +func DefaultStages(ctx context.Context, + headsCfg StageHeadsCfg, + seCfg StageEpochCfg, + srCfg StageShortRangeCfg, + bodiesCfg StageBodiesCfg, + statesCfg StageStatesCfg, + finishCfg StageFinishCfg, +) []*Stage { + + handlerStageHeads := NewStageHeads(headsCfg) + handlerStageShortRange := NewStageShortRange(srCfg) + handlerStageEpochSync := NewStageEpoch(seCfg) + handlerStageBodies := NewStageBodies(bodiesCfg) + handlerStageStates := NewStageStates(statesCfg) + handlerStageFinish := NewStageFinish(finishCfg) + + return []*Stage{ + { + ID: Heads, + Description: "Retrieve Chain Heads", + Handler: handlerStageHeads, + }, + { + ID: SyncEpoch, + Description: "Sync only Last Block of Epoch", + Handler: handlerStageEpochSync, + }, + { + ID: ShortRange, + Description: "Short Range Sync", + Handler: handlerStageShortRange, + }, + { + ID: BlockBodies, + Description: "Retrieve Block Bodies", + Handler: handlerStageBodies, + }, + { + ID: States, + Description: "Update Blockchain State", + Handler: handlerStageStates, + }, + { + ID: Finish, + Description: "Finalize Changes", + Handler: handlerStageFinish, + }, + } +} diff --git a/api/service/stagedstreamsync/downloader.go b/api/service/stagedstreamsync/downloader.go new file mode 100644 index 000000000..a20b4ac79 --- /dev/null +++ b/api/service/stagedstreamsync/downloader.go @@ -0,0 +1,260 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/event" + "github.com/rs/zerolog" + + "github.com/harmony-one/harmony/core" + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/p2p" + "github.com/harmony-one/harmony/p2p/stream/common/streammanager" + "github.com/harmony-one/harmony/p2p/stream/protocols/sync" + "github.com/harmony-one/harmony/shard" +) + +type ( + // Downloader is responsible for sync task of one shard + Downloader struct { + bc blockChain + syncProtocol syncProtocol + bh *beaconHelper + stagedSyncInstance *StagedStreamSync + isBeaconNode bool + + downloadC chan struct{} + closeC chan struct{} + ctx context.Context + cancel context.CancelFunc + + config Config + logger zerolog.Logger + } +) + +// NewDownloader creates a new downloader +func NewDownloader(host p2p.Host, bc core.BlockChain, isBeaconNode bool, config Config) *Downloader { + config.fixValues() + + sp := sync.NewProtocol(sync.Config{ + Chain: bc, + Host: host.GetP2PHost(), + Discovery: host.GetDiscovery(), + ShardID: nodeconfig.ShardID(bc.ShardID()), + Network: config.Network, + BeaconNode: isBeaconNode, + MaxAdvertiseWaitTime: config.MaxAdvertiseWaitTime, + SmSoftLowCap: config.SmSoftLowCap, + SmHardLowCap: config.SmHardLowCap, + SmHiCap: config.SmHiCap, + DiscBatch: config.SmDiscBatch, + }) + + host.AddStreamProtocol(sp) + + var bh *beaconHelper + if config.BHConfig != nil && bc.ShardID() == shard.BeaconChainShardID { + bh = newBeaconHelper(bc, config.BHConfig.BlockC, config.BHConfig.InsertHook) + } + + logger := utils.Logger().With(). + Str("module", "staged stream sync"). + Uint32("ShardID", bc.ShardID()).Logger() + + ctx, cancel := context.WithCancel(context.Background()) + + //TODO: use mem db should be in config file + stagedSyncInstance, err := CreateStagedSync(ctx, bc, false, isBeaconNode, sp, config, logger, config.LogProgress) + if err != nil { + cancel() + return nil + } + + return &Downloader{ + bc: bc, + syncProtocol: sp, + bh: bh, + stagedSyncInstance: stagedSyncInstance, + isBeaconNode: isBeaconNode, + + downloadC: make(chan struct{}), + closeC: make(chan struct{}), + ctx: ctx, + cancel: cancel, + + config: config, + logger: logger, + } +} + +// Start starts the downloader +func (d *Downloader) Start() { + go func() { + d.waitForBootFinish() + d.loop() + }() + + if d.bh != nil { + d.bh.start() + } +} + +// Close closes the downloader +func (d *Downloader) Close() { + close(d.closeC) + d.cancel() + + if d.bh != nil { + d.bh.close() + } +} + +// DownloadAsync triggers the download async. +func (d *Downloader) DownloadAsync() { + select { + case d.downloadC <- struct{}{}: + consensusTriggeredDownloadCounterVec.With(d.promLabels()).Inc() + + case <-time.After(100 * time.Millisecond): + } +} + +// NumPeers returns the number of peers connected of a specific shard. +func (d *Downloader) NumPeers() int { + return d.syncProtocol.NumStreams() +} + +// SyncStatus returns the current sync status +func (d *Downloader) SyncStatus() (bool, uint64, uint64) { + syncing, target := d.stagedSyncInstance.status.get() + if !syncing { + target = d.bc.CurrentBlock().NumberU64() + } + return syncing, target, 0 +} + +// SubscribeDownloadStarted subscribes download started +func (d *Downloader) SubscribeDownloadStarted(ch chan struct{}) event.Subscription { + d.stagedSyncInstance.evtDownloadStartedSubscribed = true + return d.stagedSyncInstance.evtDownloadStarted.Subscribe(ch) +} + +// SubscribeDownloadFinished subscribes the download finished +func (d *Downloader) SubscribeDownloadFinished(ch chan struct{}) event.Subscription { + d.stagedSyncInstance.evtDownloadFinishedSubscribed = true + return d.stagedSyncInstance.evtDownloadFinished.Subscribe(ch) +} + +// waitForBootFinish waits for stream manager to finish the initial discovery and have +// enough peers to start downloader +func (d *Downloader) waitForBootFinish() { + evtCh := make(chan streammanager.EvtStreamAdded, 1) + sub := d.syncProtocol.SubscribeAddStreamEvent(evtCh) + defer sub.Unsubscribe() + + checkCh := make(chan struct{}, 1) + trigger := func() { + select { + case checkCh <- struct{}{}: + default: + } + } + trigger() + + t := time.NewTicker(10 * time.Second) + defer t.Stop() + for { + select { + case <-t.C: + trigger() + + case <-evtCh: + trigger() + + case <-checkCh: + if d.syncProtocol.NumStreams() >= d.config.InitStreams { + fmt.Printf("boot completed for shard %d ( %d streams are connected )\n", d.bc.ShardID(), d.syncProtocol.NumStreams()) + return + } + case <-d.closeC: + return + } + } +} + +func (d *Downloader) loop() { + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + // for shard chain and beacon chain node, first we start with initSync=true to + // make sure it goes through the long range sync first. + // for epoch chain we do only need to go through epoch sync process + initSync := d.isBeaconNode || d.bc.ShardID() != shard.BeaconChainShardID + + trigger := func() { + select { + case d.downloadC <- struct{}{}: + case <-time.After(100 * time.Millisecond): + } + } + go trigger() + + for { + select { + case <-ticker.C: + go trigger() + + case <-d.downloadC: + addedBN, err := d.stagedSyncInstance.doSync(d.ctx, initSync) + if err != nil { + //TODO: if there is a bad block which can't be resolved + if d.stagedSyncInstance.invalidBlock.Active { + numTriedStreams := len(d.stagedSyncInstance.invalidBlock.StreamID) + // if many streams couldn't solve it, then that's an unresolvable bad block + if numTriedStreams >= d.config.InitStreams { + if !d.stagedSyncInstance.invalidBlock.IsLogged { + fmt.Println("unresolvable bad block:", d.stagedSyncInstance.invalidBlock.Number) + d.stagedSyncInstance.invalidBlock.IsLogged = true + } + //TODO: if we don't have any new or untried stream in the list, sleep or panic + } + } + + // If any error happens, sleep 5 seconds and retry + d.logger.Error(). + Err(err). + Bool("initSync", initSync). + Msg(WrapStagedSyncMsg("sync loop failed")) + go func() { + time.Sleep(5 * time.Second) + trigger() + }() + time.Sleep(1 * time.Second) + break + } + if initSync { + d.logger.Info().Int("block added", addedBN). + Uint64("current height", d.bc.CurrentBlock().NumberU64()). + Bool("initSync", initSync). + Uint32("shard", d.bc.ShardID()). + Msg(WrapStagedSyncMsg("sync finished")) + } + + if addedBN != 0 { + // If block number has been changed, trigger another sync + go trigger() + } + // try to add last mile from pub-sub (blocking) + if d.bh != nil { + d.bh.insertSync() + } + initSync = false + + case <-d.closeC: + return + } + } +} diff --git a/api/service/stagedstreamsync/downloaders.go b/api/service/stagedstreamsync/downloaders.go new file mode 100644 index 000000000..0e79c7963 --- /dev/null +++ b/api/service/stagedstreamsync/downloaders.go @@ -0,0 +1,96 @@ +package stagedstreamsync + +import ( + "github.com/harmony-one/abool" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/p2p" +) + +// Downloaders is the set of downloaders +type Downloaders struct { + ds map[uint32]*Downloader + active *abool.AtomicBool + + config Config +} + +// NewDownloaders creates Downloaders for sync of multiple blockchains +func NewDownloaders(host p2p.Host, bcs []core.BlockChain, config Config) *Downloaders { + ds := make(map[uint32]*Downloader) + isBeaconNode := len(bcs) == 1 + for _, bc := range bcs { + if bc == nil { + continue + } + if _, ok := ds[bc.ShardID()]; ok { + continue + } + ds[bc.ShardID()] = NewDownloader(host, bc, isBeaconNode, config) + } + return &Downloaders{ + ds: ds, + active: abool.New(), + config: config, + } +} + +// Start starts the downloaders +func (ds *Downloaders) Start() { + if ds.config.ServerOnly { + // Run in server only mode. Do not start downloaders. + return + } + ds.active.Set() + for _, d := range ds.ds { + d.Start() + } +} + +// Close closes the downloaders +func (ds *Downloaders) Close() { + if ds.config.ServerOnly { + // Run in server only mode. Downloaders not started. + return + } + ds.active.UnSet() + for _, d := range ds.ds { + d.Close() + } +} + +// DownloadAsync triggers a download +func (ds *Downloaders) DownloadAsync(shardID uint32) { + d, ok := ds.ds[shardID] + if !ok && d != nil { + d.DownloadAsync() + } +} + +// GetShardDownloader returns the downloader with the given shard ID +func (ds *Downloaders) GetShardDownloader(shardID uint32) *Downloader { + return ds.ds[shardID] +} + +// NumPeers returns the connected peers for each shard +func (ds *Downloaders) NumPeers() map[uint32]int { + res := make(map[uint32]int) + + for sid, d := range ds.ds { + res[sid] = d.NumPeers() + } + return res +} + +// SyncStatus returns whether the given shard is doing syncing task and the target block number +func (ds *Downloaders) SyncStatus(shardID uint32) (bool, uint64, uint64) { + d, ok := ds.ds[shardID] + if !ok { + return false, 0, 0 + } + return d.SyncStatus() +} + +// IsActive returns whether the downloader is active +func (ds *Downloaders) IsActive() bool { + return ds.active.IsSet() +} diff --git a/api/service/stagedstreamsync/errors.go b/api/service/stagedstreamsync/errors.go new file mode 100644 index 000000000..d18020dd0 --- /dev/null +++ b/api/service/stagedstreamsync/errors.go @@ -0,0 +1,35 @@ +package stagedstreamsync + +import ( + "fmt" +) + +// Errors ... +var ( + ErrSavingBodiesProgressFail = WrapStagedSyncError("saving progress for block bodies stage failed") + ErrSaveStateProgressFail = WrapStagedSyncError("saving progress for block States stage failed") + ErrInvalidBlockNumber = WrapStagedSyncError("invalid block number") + ErrInvalidBlockBytes = WrapStagedSyncError("invalid block bytes to insert into chain") + ErrStageNotFound = WrapStagedSyncError("stage not found") + ErrUnexpectedNumberOfBlockHashes = WrapStagedSyncError("unexpected number of getBlocksByHashes result") + ErrUnexpectedBlockHashes = WrapStagedSyncError("unexpected get block hashes result delivered") + ErrNilBlock = WrapStagedSyncError("nil block found") + ErrNotEnoughStreams = WrapStagedSyncError("not enough streams") + ErrParseCommitSigAndBitmapFail = WrapStagedSyncError("parse commitSigAndBitmap failed") + ErrVerifyHeaderFail = WrapStagedSyncError("verify header failed") + ErrInsertChainFail = WrapStagedSyncError("insert to chain failed") + ErrZeroBlockResponse = WrapStagedSyncError("zero block number response from remote nodes") + ErrEmptyWhitelist = WrapStagedSyncError("empty white list") + ErrWrongGetBlockNumberType = WrapStagedSyncError("wrong type of getBlockNumber interface") + ErrSaveBlocksToDbFailed = WrapStagedSyncError("saving downloaded blocks to db failed") +) + +// WrapStagedSyncError wraps errors for staged sync and returns error object +func WrapStagedSyncError(context string) error { + return fmt.Errorf("[STAGED_STREAM_SYNC]: %s", context) +} + +// WrapStagedSyncMsg wraps message for staged sync and returns string +func WrapStagedSyncMsg(context string) string { + return fmt.Sprintf("[STAGED_STREAM_SYNC]: %s", context) +} diff --git a/api/service/stagedstreamsync/helpers.go b/api/service/stagedstreamsync/helpers.go new file mode 100644 index 000000000..75e504214 --- /dev/null +++ b/api/service/stagedstreamsync/helpers.go @@ -0,0 +1,127 @@ +package stagedstreamsync + +import ( + "encoding/binary" + "fmt" + "math" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core/types" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" +) + +func marshalData(blockNumber uint64) []byte { + return encodeBigEndian(blockNumber) +} + +func unmarshalData(data []byte) (uint64, error) { + if len(data) == 0 { + return 0, nil + } + if len(data) < 8 { + return 0, fmt.Errorf("value must be at least 8 bytes, got %d", len(data)) + } + return binary.BigEndian.Uint64(data[:8]), nil +} + +func encodeBigEndian(n uint64) []byte { + var v [8]byte + binary.BigEndian.PutUint64(v[:], n) + return v[:] +} + +func divideCeil(x, y int) int { + fVal := float64(x) / float64(y) + return int(math.Ceil(fVal)) +} + +// computeBlockNumberByMaxVote computes the target block number by max vote. +func computeBlockNumberByMaxVote(votes map[sttypes.StreamID]uint64) uint64 { + var ( + nm = make(map[uint64]int) + res uint64 + maxCnt int + ) + for _, bn := range votes { + _, ok := nm[bn] + if !ok { + nm[bn] = 0 + } + nm[bn]++ + cnt := nm[bn] + + if cnt > maxCnt || (cnt == maxCnt && bn > res) { + res = bn + maxCnt = cnt + } + } + return res +} + +func checkGetBlockByHashesResult(blocks []*types.Block, hashes []common.Hash) error { + if len(blocks) != len(hashes) { + return ErrUnexpectedNumberOfBlockHashes + } + for i, block := range blocks { + if block == nil { + return ErrNilBlock + } + if block.Hash() != hashes[i] { + return fmt.Errorf("unexpected block hash: %x / %x", block.Hash(), hashes[i]) + } + } + return nil +} + +func countHashMaxVote(m map[sttypes.StreamID]common.Hash, whitelist map[sttypes.StreamID]struct{}) (common.Hash, map[sttypes.StreamID]struct{}) { + var ( + voteM = make(map[common.Hash]int) + res common.Hash + maxCnt = 0 + ) + + for st, h := range m { + if len(whitelist) != 0 { + if _, ok := whitelist[st]; !ok { + continue + } + } + if _, ok := voteM[h]; !ok { + voteM[h] = 0 + } + voteM[h]++ + if voteM[h] > maxCnt { + maxCnt = voteM[h] + res = h + } + } + + nextWl := make(map[sttypes.StreamID]struct{}) + for st, h := range m { + if h != res { + continue + } + if len(whitelist) != 0 { + if _, ok := whitelist[st]; ok { + nextWl[st] = struct{}{} + } + } else { + nextWl[st] = struct{}{} + } + } + return res, nextWl +} + +func ByteCount(b uint64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%dB", b) + } + div, exp := uint64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f%cB", + float64(b)/float64(div), "KMGTPE"[exp]) +} diff --git a/api/service/stagedstreamsync/metric.go b/api/service/stagedstreamsync/metric.go new file mode 100644 index 000000000..9437cc09e --- /dev/null +++ b/api/service/stagedstreamsync/metric.go @@ -0,0 +1,98 @@ +package stagedstreamsync + +import ( + "fmt" + + prom "github.com/harmony-one/harmony/api/service/prometheus" + "github.com/prometheus/client_golang/prometheus" +) + +func init() { + prom.PromRegistry().MustRegister( + consensusTriggeredDownloadCounterVec, + longRangeSyncedBlockCounterVec, + longRangeFailInsertedBlockCounterVec, + numShortRangeCounterVec, + numFailedDownloadCounterVec, + numBlocksInsertedShortRangeHistogramVec, + numBlocksInsertedBeaconHelperCounter, + ) +} + +var ( + consensusTriggeredDownloadCounterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "consensus_trigger", + Help: "number of times consensus triggered download task", + }, + []string{"ShardID"}, + ) + + longRangeSyncedBlockCounterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "num_blocks_synced_long_range", + Help: "number of blocks synced in long range sync", + }, + []string{"ShardID"}, + ) + + longRangeFailInsertedBlockCounterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "num_blocks_failed_long_range", + Help: "number of blocks failed to insert into change in long range sync", + }, + []string{"ShardID", "error"}, + ) + + numShortRangeCounterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "num_short_range", + Help: "number of short range sync is triggered", + }, + []string{"ShardID"}, + ) + + numFailedDownloadCounterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "failed_download", + Help: "number of downloading is failed", + }, + []string{"ShardID", "error"}, + ) + + numBlocksInsertedShortRangeHistogramVec = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "num_blocks_inserted_short_range", + Help: "number of blocks inserted for each short range sync", + // Buckets: 0, 1, 2, 4, +INF (capped at 10) + Buckets: prometheus.ExponentialBuckets(0.5, 2, 5), + }, + []string{"ShardID"}, + ) + + numBlocksInsertedBeaconHelperCounter = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "hmy", + Subsystem: "staged_stream_sync", + Name: "num_blocks_inserted_beacon_helper", + Help: "number of blocks inserted from beacon helper", + }, + ) +) + +func (d *Downloader) promLabels() prometheus.Labels { + sid := d.bc.ShardID() + return prometheus.Labels{"ShardID": fmt.Sprintf("%d", sid)} +} diff --git a/api/service/stagedstreamsync/service.go b/api/service/stagedstreamsync/service.go new file mode 100644 index 000000000..46b182fb5 --- /dev/null +++ b/api/service/stagedstreamsync/service.go @@ -0,0 +1,30 @@ +package stagedstreamsync + +import ( + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/p2p" +) + +// StagedStreamSyncService is simply a adapter of downloaders, which support block synchronization +type StagedStreamSyncService struct { + Downloaders *Downloaders +} + +// NewService creates a new downloader service +func NewService(host p2p.Host, bcs []core.BlockChain, config Config) *StagedStreamSyncService { + return &StagedStreamSyncService{ + Downloaders: NewDownloaders(host, bcs, config), + } +} + +// Start starts the service +func (s *StagedStreamSyncService) Start() error { + s.Downloaders.Start() + return nil +} + +// Stop stops the service +func (s *StagedStreamSyncService) Stop() error { + s.Downloaders.Close() + return nil +} diff --git a/api/service/stagedstreamsync/short_range_helper.go b/api/service/stagedstreamsync/short_range_helper.go new file mode 100644 index 000000000..90415c87c --- /dev/null +++ b/api/service/stagedstreamsync/short_range_helper.go @@ -0,0 +1,218 @@ +package stagedstreamsync + +import ( + "context" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core/types" + syncProto "github.com/harmony-one/harmony/p2p/stream/protocols/sync" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/pkg/errors" + "github.com/rs/zerolog" +) + +type srHelper struct { + syncProtocol syncProtocol + + ctx context.Context + config Config + logger zerolog.Logger +} + +func (sh *srHelper) getHashChain(bns []uint64) ([]common.Hash, []sttypes.StreamID, error) { + results := newBlockHashResults(bns) + + var wg sync.WaitGroup + wg.Add(sh.config.Concurrency) + + for i := 0; i != sh.config.Concurrency; i++ { + go func(index int) { + defer wg.Done() + + hashes, stid, err := sh.doGetBlockHashesRequest(bns) + if err != nil { + sh.logger.Warn().Err(err).Str("StreamID", string(stid)). + Msg(WrapStagedSyncMsg("doGetBlockHashes return error")) + return + } + results.addResult(hashes, stid) + }(i) + } + wg.Wait() + + select { + case <-sh.ctx.Done(): + sh.logger.Info().Err(sh.ctx.Err()).Int("num blocks", results.numBlocksWithResults()). + Msg(WrapStagedSyncMsg("short range sync get hashes timed out")) + return nil, nil, sh.ctx.Err() + default: + } + + hashChain, wl := results.computeLongestHashChain() + sh.logger.Info().Int("hashChain size", len(hashChain)).Int("whitelist", len(wl)). + Msg(WrapStagedSyncMsg("computeLongestHashChain result")) + return hashChain, wl, nil +} + +func (sh *srHelper) getBlocksChain(bns []uint64) ([]*types.Block, sttypes.StreamID, error) { + return sh.doGetBlocksByNumbersRequest(bns) +} + +func (sh *srHelper) getBlocksByHashes(hashes []common.Hash, whitelist []sttypes.StreamID) ([]*types.Block, []sttypes.StreamID, error) { + ctx, cancel := context.WithCancel(sh.ctx) + defer cancel() + m := newGetBlocksByHashManager(hashes, whitelist) + + var ( + wg sync.WaitGroup + gErr error + errLock sync.Mutex + ) + + concurrency := sh.config.Concurrency + if concurrency > m.numRequests() { + concurrency = m.numRequests() + } + + wg.Add(concurrency) + for i := 0; i != concurrency; i++ { + go func(index int) { + defer wg.Done() + defer cancel() // it's ok to cancel context more than once + + for { + if m.isDone() { + return + } + hashes, wl, err := m.getNextHashes() + if err != nil { + errLock.Lock() + gErr = err + errLock.Unlock() + return + } + if len(hashes) == 0 { + select { + case <-time.After(200 * time.Millisecond): + continue + case <-ctx.Done(): + return + } + } + blocks, stid, err := sh.doGetBlocksByHashesRequest(ctx, hashes, wl) + if err != nil { + sh.logger.Warn().Err(err). + Str("StreamID", string(stid)). + Int("hashes", len(hashes)). + Int("index", index). + Msg(WrapStagedSyncMsg("getBlocksByHashes worker failed")) + m.handleResultError(hashes, stid) + } else { + m.addResult(hashes, blocks, stid) + } + } + }(i) + } + wg.Wait() + + if gErr != nil { + return nil, nil, gErr + } + select { + case <-sh.ctx.Done(): + res, _, _ := m.getResults() + sh.logger.Info().Err(sh.ctx.Err()).Int("num blocks", len(res)). + Msg(WrapStagedSyncMsg("short range sync get blocks timed out")) + return nil, nil, sh.ctx.Err() + default: + } + + return m.getResults() +} + +func (sh *srHelper) checkPrerequisites() error { + if sh.syncProtocol.NumStreams() < sh.config.Concurrency { + return ErrNotEnoughStreams + } + return nil +} + +func (sh *srHelper) prepareBlockHashNumbers(curNumber uint64) []uint64 { + + res := make([]uint64, 0, BlockHashesPerRequest) + + for bn := curNumber + 1; bn <= curNumber+uint64(BlockHashesPerRequest); bn++ { + res = append(res, bn) + } + return res +} + +func (sh *srHelper) doGetBlockHashesRequest(bns []uint64) ([]common.Hash, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(sh.ctx, 1*time.Second) + defer cancel() + + hashes, stid, err := sh.syncProtocol.GetBlockHashes(ctx, bns) + if err != nil { + sh.logger.Warn().Err(err). + Interface("block numbers", bns). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest")) + return nil, stid, err + } + if len(hashes) != len(bns) { + sh.logger.Warn().Err(ErrUnexpectedBlockHashes). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest")) + sh.syncProtocol.StreamFailed(stid, "unexpected get block hashes result delivered") + return nil, stid, ErrUnexpectedBlockHashes + } + return hashes, stid, nil +} + +func (sh *srHelper) doGetBlocksByNumbersRequest(bns []uint64) ([]*types.Block, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(sh.ctx, 10*time.Second) + defer cancel() + + blocks, stid, err := sh.syncProtocol.GetBlocksByNumber(ctx, bns) + if err != nil { + sh.logger.Warn().Err(err). + Str("stream", string(stid)). + Msg(WrapStagedSyncMsg("failed to doGetBlockHashesRequest")) + return nil, stid, err + } + return blocks, stid, nil +} + +func (sh *srHelper) doGetBlocksByHashesRequest(ctx context.Context, hashes []common.Hash, wl []sttypes.StreamID) ([]*types.Block, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(sh.ctx, 10*time.Second) + defer cancel() + + blocks, stid, err := sh.syncProtocol.GetBlocksByHashes(ctx, hashes, + syncProto.WithWhitelist(wl)) + if err != nil { + sh.logger.Warn().Err(err).Str("stream", string(stid)).Msg("failed to getBlockByHashes") + return nil, stid, err + } + if err := checkGetBlockByHashesResult(blocks, hashes); err != nil { + sh.logger.Warn().Err(err).Str("stream", string(stid)).Msg(WrapStagedSyncMsg("failed to getBlockByHashes")) + sh.syncProtocol.StreamFailed(stid, "failed to getBlockByHashes") + return nil, stid, err + } + return blocks, stid, nil +} + +func (sh *srHelper) removeStreams(sts []sttypes.StreamID) { + for _, st := range sts { + sh.syncProtocol.RemoveStream(st) + } +} + +// blameAllStreams only not to blame all whitelisted streams when the it's not the last block signature verification failed. +func (sh *srHelper) blameAllStreams(blocks types.Blocks, errIndex int, err error) bool { + if errors.As(err, &emptySigVerifyErr) && errIndex == len(blocks)-1 { + return false + } + return true +} diff --git a/api/service/stagedstreamsync/sig_verify.go b/api/service/stagedstreamsync/sig_verify.go new file mode 100644 index 000000000..649c6eaec --- /dev/null +++ b/api/service/stagedstreamsync/sig_verify.go @@ -0,0 +1,60 @@ +package stagedstreamsync + +import ( + "fmt" + + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/crypto/bls" + "github.com/harmony-one/harmony/internal/chain" + "github.com/pkg/errors" +) + +var emptySigVerifyErr *sigVerifyErr + +type sigVerifyErr struct { + err error +} + +func (e *sigVerifyErr) Error() string { + return fmt.Sprintf("[VerifyHeaderSignature] %v", e.err.Error()) +} + +func verifyAndInsertBlocks(bc blockChain, blocks types.Blocks) (int, error) { + for i, block := range blocks { + if err := verifyAndInsertBlock(bc, block, blocks[i+1:]...); err != nil { + return i, err + } + } + return len(blocks), nil +} + +func verifyAndInsertBlock(bc blockChain, block *types.Block, nextBlocks ...*types.Block) error { + var ( + sigBytes bls.SerializedSignature + bitmap []byte + err error + ) + if len(nextBlocks) > 0 { + // get commit sig from the next block + next := nextBlocks[0] + sigBytes = next.Header().LastCommitSignature() + bitmap = next.Header().LastCommitBitmap() + } else { + // get commit sig from current block + sigBytes, bitmap, err = chain.ParseCommitSigAndBitmap(block.GetCurrentCommitSig()) + if err != nil { + return errors.Wrap(err, "parse commitSigAndBitmap") + } + } + + if err := bc.Engine().VerifyHeaderSignature(bc, block.Header(), sigBytes, bitmap); err != nil { + return &sigVerifyErr{err} + } + if err := bc.Engine().VerifyHeader(bc, block.Header(), true); err != nil { + return errors.Wrap(err, "[VerifyHeader]") + } + if _, err := bc.InsertChain(types.Blocks{block}, false); err != nil { + return errors.Wrap(err, "[InsertChain]") + } + return nil +} diff --git a/api/service/stagedstreamsync/stage.go b/api/service/stagedstreamsync/stage.go new file mode 100644 index 000000000..255560a0f --- /dev/null +++ b/api/service/stagedstreamsync/stage.go @@ -0,0 +1,109 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type ExecFunc func(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error + +type StageHandler interface { + // Exec is the execution function for the stage to move forward. + // * firstCycle - is it the first cycle of syncing. + // * invalidBlockRevert - whether the execution is to solve the invalid block + // * s - is the current state of the stage and contains stage data. + // * reverter - if the stage needs to cause reverting, `reverter` methods can be used. + Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error + + // Revert is the reverting logic of the stage. + // * firstCycle - is it the first cycle of syncing. + // * u - contains information about the revert itself. + // * s - represents the state of this stage at the beginning of revert. + Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) error + + // CleanUp is the execution function for the stage to prune old data. + // * firstCycle - is it the first cycle of syncing. + // * p - is the current state of the stage and contains stage data. + CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) error + + // SetStageContext updates the context for stage + SetStageContext(ctx context.Context) +} + +// Stage is a single sync stage in staged sync. +type Stage struct { + // ID of the sync stage. Should not be empty and should be unique. It is recommended to prefix it with reverse domain to avoid clashes (`com.example.my-stage`). + ID SyncStageID + // Handler handles the logic for the stage + Handler StageHandler + // Description is a string that is shown in the logs. + Description string + // DisabledDescription shows in the log with a message if the stage is disabled. Here, you can show which command line flags should be provided to enable the page. + DisabledDescription string + // Disabled defines if the stage is disabled. It sets up when the stage is build by its `StageBuilder`. + Disabled bool +} + +// StageState is the state of the stage. +type StageState struct { + state *StagedStreamSync + ID SyncStageID + BlockNumber uint64 // BlockNumber is the current block number of the stage at the beginning of the state execution. +} + +func (s *StageState) LogPrefix() string { return s.state.LogPrefix() } + +func (s *StageState) CurrentStageProgress(db kv.Getter) (uint64, error) { + return GetStageProgress(db, s.ID, s.state.isBeacon) +} + +func (s *StageState) StageProgress(db kv.Getter, id SyncStageID) (uint64, error) { + return GetStageProgress(db, id, s.state.isBeacon) +} + +// Update updates the stage state (current block number) in the database. Can be called multiple times during stage execution. +func (s *StageState) Update(db kv.Putter, newBlockNum uint64) error { + return SaveStageProgress(db, s.ID, s.state.isBeacon, newBlockNum) +} +func (s *StageState) UpdateCleanUp(db kv.Putter, blockNum uint64) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum) +} + +// Reverter allows the stage to cause an revert. +type Reverter interface { + // RevertTo begins staged sync revert to the specified block. + RevertTo(revertPoint uint64, invalidBlockNumber uint64, invalidBlockHash common.Hash, invalidBlockStreamID sttypes.StreamID) +} + +// RevertState contains the information about revert. +type RevertState struct { + ID SyncStageID + RevertPoint uint64 // RevertPoint is the block to revert to. + state *StagedStreamSync +} + +func (u *RevertState) LogPrefix() string { return u.state.LogPrefix() } + +// Done updates the DB state of the stage. +func (u *RevertState) Done(db kv.Putter) error { + return SaveStageProgress(db, u.ID, u.state.isBeacon, u.RevertPoint) +} + +// CleanUpState contains states of cleanup process for a specific stage +type CleanUpState struct { + ID SyncStageID + ForwardProgress uint64 // progress of stage forward move + CleanUpProgress uint64 // progress of stage prune move. after sync cycle it become equal to ForwardProgress by Done() method + state *StagedStreamSync +} + +func (s *CleanUpState) LogPrefix() string { return s.state.LogPrefix() + " CleanUp" } +func (s *CleanUpState) Done(db kv.Putter) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, s.ForwardProgress) +} +func (s *CleanUpState) DoneAt(db kv.Putter, blockNum uint64) error { + return SaveStageCleanUpProgress(db, s.ID, s.state.isBeacon, blockNum) +} diff --git a/api/service/stagedstreamsync/stage_bodies.go b/api/service/stagedstreamsync/stage_bodies.go new file mode 100644 index 000000000..1fbfcbc2e --- /dev/null +++ b/api/service/stagedstreamsync/stage_bodies.go @@ -0,0 +1,420 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" +) + +type StageBodies struct { + configs StageBodiesCfg +} +type StageBodiesCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + protocol syncProtocol + isBeacon bool + logProgress bool +} + +func NewStageBodies(cfg StageBodiesCfg) *StageBodies { + return &StageBodies{ + configs: cfg, + } +} + +func NewStageBodiesCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB, blockDBs []kv.RwDB, concurrency int, protocol syncProtocol, isBeacon bool, logProgress bool) StageBodiesCfg { + return StageBodiesCfg{ + ctx: ctx, + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + protocol: protocol, + isBeacon: isBeacon, + logProgress: logProgress, + } +} + +func (b *StageBodies) SetStageContext(ctx context.Context) { + b.configs.ctx = ctx +} + +// Exec progresses Bodies stage in the forward direction +func (b *StageBodies) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + + if invalidBlockRevert { + return b.redownloadBadBlock(s) + } + + // for short range sync, skip this stage + if !s.state.initSync { + return nil + } + + maxHeight := s.state.status.targetBN + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxHeight { + return nil + } + currProgress := uint64(0) + targetHeight := s.state.currentCycle.TargetHeight + // isBeacon := s.state.isBeacon + // isLastCycle := targetHeight >= maxHeight + + if errV := CreateView(b.configs.ctx, b.configs.db, tx, func(etx kv.Tx) error { + if currProgress, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + + if currProgress == 0 { + if err := b.cleanAllBlockDBs(); err != nil { + return err + } + currProgress = currentHead + } + + if currProgress >= targetHeight { + return nil + } + + // size := uint64(0) + startTime := time.Now() + // startBlock := currProgress + if b.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // Fetch blocks from neighbors + s.state.gbm = newBlockDownloadManager(tx, b.configs.bc, targetHeight, s.state.logger) + + // Setup workers to fetch blocks from remote node + var wg sync.WaitGroup + + for i := 0; i != s.state.config.Concurrency; i++ { + wg.Add(1) + go b.runBlockWorkerLoop(s.state.gbm, &wg, i, startTime) + } + + wg.Wait() + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// runBlockWorkerLoop creates a work loop for download blocks +func (b *StageBodies) runBlockWorkerLoop(gbm *blockDownloadManager, wg *sync.WaitGroup, loopID int, startTime time.Time) { + + currentBlock := int(b.configs.bc.CurrentBlock().NumberU64()) + + defer wg.Done() + + for { + select { + case <-b.configs.ctx.Done(): + return + default: + } + batch := gbm.GetNextBatch() + if len(batch) == 0 { + select { + case <-b.configs.ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + return + } + } + + blockBytes, sigBytes, stid, err := b.downloadRawBlocks(batch) + if err != nil { + if !errors.Is(err, context.Canceled) { + b.configs.protocol.StreamFailed(stid, "downloadRawBlocks failed") + } + utils.Logger().Error(). + Err(err). + Str("stream", string(stid)). + Interface("block numbers", batch). + Msg(WrapStagedSyncMsg("downloadRawBlocks failed")) + err = errors.Wrap(err, "request error") + gbm.HandleRequestError(batch, err, stid) + } else { + if err = b.saveBlocks(gbm.tx, batch, blockBytes, sigBytes, loopID, stid); err != nil { + panic(ErrSaveBlocksToDbFailed) + } + gbm.HandleRequestResult(batch, blockBytes, sigBytes, loopID, stid) + if b.configs.logProgress { + //calculating block download speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(len(gbm.bdd)) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("downloaded blocks:", currentBlock+len(gbm.bdd), "/", int(gbm.targetBN), "(", blockSpeed, "blocks/s", ")") + } + } + } +} + +// redownloadBadBlock tries to redownload the bad block from other streams +func (b *StageBodies) redownloadBadBlock(s *StageState) error { + + batch := make([]uint64, 1) + batch = append(batch, s.state.invalidBlock.Number) + + for { + if b.configs.protocol.NumStreams() == 0 { + return errors.Errorf("re-download bad block from all streams failed") + } + blockBytes, sigBytes, stid, err := b.downloadRawBlocks(batch) + if err != nil { + if !errors.Is(err, context.Canceled) { + b.configs.protocol.StreamFailed(stid, "tried to re-download bad block from this stream, but downloadRawBlocks failed") + } + continue + } + isOneOfTheBadStreams := false + for _, id := range s.state.invalidBlock.StreamID { + if id == stid { + b.configs.protocol.RemoveStream(stid) + isOneOfTheBadStreams = true + break + } + } + if isOneOfTheBadStreams { + continue + } + s.state.gbm.SetDownloadDetails(batch, 0, stid) + if errU := b.configs.blockDBs[0].Update(context.Background(), func(tx kv.RwTx) error { + if err = b.saveBlocks(tx, batch, blockBytes, sigBytes, 0, stid); err != nil { + return errors.Errorf("[STAGED_STREAM_SYNC] saving re-downloaded bad block to db failed.") + } + return nil + }); errU != nil { + continue + } + break + } + return nil +} + +func (b *StageBodies) downloadBlocks(bns []uint64) ([]*types.Block, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(b.configs.ctx, 10*time.Second) + defer cancel() + + blocks, stid, err := b.configs.protocol.GetBlocksByNumber(ctx, bns) + if err != nil { + return nil, stid, err + } + if err := validateGetBlocksResult(bns, blocks); err != nil { + return nil, stid, err + } + return blocks, stid, nil +} + +func (b *StageBodies) downloadRawBlocks(bns []uint64) ([][]byte, [][]byte, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(b.configs.ctx, 10*time.Second) + defer cancel() + + return b.configs.protocol.GetRawBlocksByNumber(ctx, bns) +} + +func validateGetBlocksResult(requested []uint64, result []*types.Block) error { + if len(result) != len(requested) { + return fmt.Errorf("unexpected number of blocks delivered: %v / %v", len(result), len(requested)) + } + for i, block := range result { + if block != nil && block.NumberU64() != requested[i] { + return fmt.Errorf("block with unexpected number delivered: %v / %v", block.NumberU64(), requested[i]) + } + } + return nil +} + +// saveBlocks saves the blocks into db +func (b *StageBodies) saveBlocks(tx kv.RwTx, bns []uint64, blockBytes [][]byte, sigBytes [][]byte, loopID int, stid sttypes.StreamID) error { + + tx, err := b.configs.blockDBs[loopID].BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + + for i := uint64(0); i < uint64(len(blockBytes)); i++ { + block := blockBytes[i] + sig := sigBytes[i] + if block == nil { + continue + } + + blkKey := marshalData(bns[i]) + + if err := tx.Put(BlocksBucket, blkKey, block); err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block height", bns[i]). + Msg("[STAGED_STREAM_SYNC] adding block to db failed") + return err + } + // sigKey := []byte("s" + string(bns[i])) + if err := tx.Put(BlockSignaturesBucket, blkKey, sig); err != nil { + utils.Logger().Error(). + Err(err). + Uint64("block height", bns[i]). + Msg("[STAGED_STREAM_SYNC] adding block sig to db failed") + return err + } + } + + if err := tx.Commit(); err != nil { + return err + } + + return nil +} + +func (b *StageBodies) saveProgress(s *StageState, progress uint64, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = b.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, progress); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage failed") + return ErrSavingBodiesProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageBodies) cleanBlocksDB(loopID int) (err error) { + + tx, errb := b.configs.blockDBs[loopID].BeginRw(b.configs.ctx) + if errb != nil { + return errb + } + defer tx.Rollback() + + // clean block bodies db + if err = tx.ClearBucket(BlocksBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_STREAM_SYNC] clear blocks bucket after revert failed") + return err + } + // clean block signatures db + if err = tx.ClearBucket(BlockSignaturesBucket); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_STREAM_SYNC] clear block signatures bucket after revert failed") + return err + } + + if err = tx.Commit(); err != nil { + return err + } + + return nil +} + +func (b *StageBodies) cleanAllBlockDBs() (err error) { + //clean all blocks DBs + for i := 0; i < b.configs.concurrency; i++ { + if err := b.cleanBlocksDB(i); err != nil { + return err + } + } + return nil +} + +func (b *StageBodies) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + + //clean all blocks DBs + if err := b.cleanAllBlockDBs(); err != nil { + return err + } + + useInternalTx := tx == nil + if useInternalTx { + tx, err = b.configs.db.BeginRw(b.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + // save progress + currentHead := b.configs.bc.CurrentBlock().NumberU64() + if err = s.Update(tx, currentHead); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block bodies stage after revert failed") + return err + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (b *StageBodies) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + + //clean all blocks DBs + if err := b.cleanAllBlockDBs(); err != nil { + return err + } + + return nil +} diff --git a/api/service/stagedstreamsync/stage_epoch.go b/api/service/stagedstreamsync/stage_epoch.go new file mode 100644 index 000000000..77dc57bfd --- /dev/null +++ b/api/service/stagedstreamsync/stage_epoch.go @@ -0,0 +1,198 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + "github.com/harmony-one/harmony/shard" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" +) + +type StageEpoch struct { + configs StageEpochCfg +} + +type StageEpochCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB +} + +func NewStageEpoch(cfg StageEpochCfg) *StageEpoch { + return &StageEpoch{ + configs: cfg, + } +} + +func NewStageEpochCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageEpochCfg { + return StageEpochCfg{ + ctx: ctx, + bc: bc, + db: db, + } +} + +func (sr *StageEpoch) SetStageContext(ctx context.Context) { + sr.configs.ctx = ctx +} + +func (sr *StageEpoch) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + + // no need to update epoch chain if we are redoing the stages because of bad block + if invalidBlockRevert { + return nil + } + // for long range sync, skip this stage + if s.state.initSync { + return nil + } + + if sr.configs.bc.ShardID() != shard.BeaconChainShardID || s.state.isBeaconNode { + return nil + } + + // doShortRangeSyncForEpochSync + n, err := sr.doShortRangeSyncForEpochSync(s) + s.state.inserted = n + if err != nil { + return err + } + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = sr.configs.db.BeginRw(sr.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (sr *StageEpoch) doShortRangeSyncForEpochSync(s *StageState) (int, error) { + + numShortRangeCounterVec.With(s.state.promLabels()).Inc() + + srCtx, cancel := context.WithTimeout(s.state.ctx, ShortRangeTimeout) + defer cancel() + + //TODO: merge srHelper with StageEpochConfig + sh := &srHelper{ + syncProtocol: s.state.protocol, + ctx: srCtx, + config: s.state.config, + logger: utils.Logger().With().Str("mode", "epoch chain short range").Logger(), + } + + if err := sh.checkPrerequisites(); err != nil { + return 0, errors.Wrap(err, "prerequisite") + } + curBN := s.state.bc.CurrentBlock().NumberU64() + bns := make([]uint64, 0, BlocksPerRequest) + // in epoch chain, we have only the last block of each epoch, so, the current + // block's epoch number shows the last epoch we have. We should start + // from next epoch then + loopEpoch := s.state.bc.CurrentHeader().Epoch().Uint64() + 1 + for len(bns) < BlocksPerRequest { + blockNum := shard.Schedule.EpochLastBlock(loopEpoch) + if blockNum > curBN { + bns = append(bns, blockNum) + } + loopEpoch = loopEpoch + 1 + } + + if len(bns) == 0 { + return 0, nil + } + + //////////////////////////////////////////////////////// + hashChain, whitelist, err := sh.getHashChain(bns) + if err != nil { + return 0, errors.Wrap(err, "getHashChain") + } + if len(hashChain) == 0 { + // short circuit for no sync is needed + return 0, nil + } + blocks, streamID, err := sh.getBlocksByHashes(hashChain, whitelist) + if err != nil { + utils.Logger().Warn().Err(err).Msg("epoch sync getBlocksByHashes failed") + if !errors.Is(err, context.Canceled) { + sh.removeStreams(whitelist) // Remote nodes cannot provide blocks with target hashes + } + return 0, errors.Wrap(err, "epoch sync getBlocksByHashes") + } + /////////////////////////////////////////////////////// + // TODO: check this + // blocks, streamID, err := sh.getBlocksChain(bns) + // if err != nil { + // return 0, errors.Wrap(err, "getHashChain") + // } + /////////////////////////////////////////////////////// + if len(blocks) == 0 { + // short circuit for no sync is needed + return 0, nil + } + + n, err := s.state.bc.InsertChain(blocks, true) + numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n)) + if err != nil { + utils.Logger().Info().Err(err).Int("blocks inserted", n).Msg("Insert block failed") + sh.removeStreams(streamID) // Data provided by remote nodes is corrupted + return n, err + } + if n > 0 { + utils.Logger().Info().Int("blocks inserted", n).Msg("Insert block success") + } + return n, nil +} + +func (sr *StageEpoch) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = sr.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (sr *StageEpoch) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = sr.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/stage_finish.go b/api/service/stagedstreamsync/stage_finish.go new file mode 100644 index 000000000..9039b5588 --- /dev/null +++ b/api/service/stagedstreamsync/stage_finish.go @@ -0,0 +1,114 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StageFinish struct { + configs StageFinishCfg +} + +type StageFinishCfg struct { + ctx context.Context + db kv.RwDB +} + +func NewStageFinish(cfg StageFinishCfg) *StageFinish { + return &StageFinish{ + configs: cfg, + } +} + +func NewStageFinishCfg(ctx context.Context, db kv.RwDB) StageFinishCfg { + return StageFinishCfg{ + ctx: ctx, + db: db, + } +} + +func (finish *StageFinish) SetStageContext(ctx context.Context) { + finish.configs.ctx = ctx +} + +func (finish *StageFinish) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = finish.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // TODO: prepare indices (useful for RPC) and finalize + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (bh *StageFinish) clearBucket(tx kv.RwTx, isBeacon bool) error { + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = bh.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (finish *StageFinish) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = finish.configs.db.BeginRw(finish.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (finish *StageFinish) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = finish.configs.db.BeginRw(finish.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/stage_heads.go b/api/service/stagedstreamsync/stage_heads.go new file mode 100644 index 000000000..8e1531a5e --- /dev/null +++ b/api/service/stagedstreamsync/stage_heads.go @@ -0,0 +1,155 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" +) + +type StageHeads struct { + configs StageHeadsCfg +} + +type StageHeadsCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB +} + +func NewStageHeads(cfg StageHeadsCfg) *StageHeads { + return &StageHeads{ + configs: cfg, + } +} + +func NewStageHeadersCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageHeadsCfg { + return StageHeadsCfg{ + ctx: ctx, + bc: bc, + db: db, + } +} + +func (heads *StageHeads) SetStageContext(ctx context.Context) { + heads.configs.ctx = ctx +} + +func (heads *StageHeads) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + + // no need to update target if we are redoing the stages because of bad block + if invalidBlockRevert { + return nil + } + + // no need for short range sync + if !s.state.initSync { + return nil + } + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = heads.configs.db.BeginRw(heads.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + maxHeight := s.state.status.targetBN + maxBlocksPerSyncCycle := uint64(1024) // TODO: should be in config -> s.state.MaxBlocksPerSyncCycle + currentHeight := heads.configs.bc.CurrentBlock().NumberU64() + s.state.currentCycle.TargetHeight = maxHeight + targetHeight := uint64(0) + if errV := CreateView(heads.configs.ctx, heads.configs.db, tx, func(etx kv.Tx) (err error) { + if targetHeight, err = s.CurrentStageProgress(etx); err != nil { + return err + } + return nil + }); errV != nil { + return errV + } + + if currentHeight >= maxHeight { + return nil + } + + // if current height is ahead of target height, we need recalculate target height + if currentHeight >= targetHeight { + if maxHeight <= currentHeight { + return nil + } + utils.Logger().Info(). + Uint64("max blocks per sync cycle", maxBlocksPerSyncCycle). + Uint64("maxPeersHeight", maxHeight). + Msgf(WrapStagedSyncMsg("current height is ahead of target height, target height is readjusted to max peers height")) + targetHeight = maxHeight + } + + if targetHeight > maxHeight { + targetHeight = maxHeight + } + + if maxBlocksPerSyncCycle > 0 && targetHeight-currentHeight > maxBlocksPerSyncCycle { + targetHeight = currentHeight + maxBlocksPerSyncCycle + } + + s.state.currentCycle.TargetHeight = targetHeight + + if err := s.Update(tx, targetHeight); err != nil { + utils.Logger().Error(). + Err(err). + Msgf(WrapStagedSyncMsg("saving progress for headers stage failed")) + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (heads *StageHeads) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = heads.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (heads *StageHeads) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = heads.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/stage_short_range.go b/api/service/stagedstreamsync/stage_short_range.go new file mode 100644 index 000000000..75f51ee1e --- /dev/null +++ b/api/service/stagedstreamsync/stage_short_range.go @@ -0,0 +1,200 @@ +package stagedstreamsync + +import ( + "context" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/harmony-one/harmony/shard" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/pkg/errors" +) + +type StageShortRange struct { + configs StageShortRangeCfg +} + +type StageShortRangeCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB +} + +func NewStageShortRange(cfg StageShortRangeCfg) *StageShortRange { + return &StageShortRange{ + configs: cfg, + } +} + +func NewStageShortRangeCfg(ctx context.Context, bc core.BlockChain, db kv.RwDB) StageShortRangeCfg { + return StageShortRangeCfg{ + ctx: ctx, + bc: bc, + db: db, + } +} + +func (sr *StageShortRange) SetStageContext(ctx context.Context) { + sr.configs.ctx = ctx +} + +func (sr *StageShortRange) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) error { + + // no need to do short range if we are redoing the stages because of bad block + if invalidBlockRevert { + return nil + } + + // for long range sync, skip this stage + if s.state.initSync { + return nil + } + + if sr.configs.bc.ShardID() == shard.BeaconChainShardID && !s.state.isBeaconNode { + return nil + } + + // do short range sync + n, err := sr.doShortRangeSync(s) + s.state.inserted = n + if err != nil { + return err + } + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = sr.configs.db.BeginRw(sr.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// doShortRangeSync does the short range sync. +// Compared with long range sync, short range sync is more focused on syncing to the latest block. +// It consist of 3 steps: +// 1. Obtain the block hashes and compute the longest hash chain.. +// 2. Get blocks by hashes from computed hash chain. +// 3. Insert the blocks to blockchain. +func (sr *StageShortRange) doShortRangeSync(s *StageState) (int, error) { + + numShortRangeCounterVec.With(s.state.promLabels()).Inc() + + srCtx, cancel := context.WithTimeout(s.state.ctx, ShortRangeTimeout) + defer cancel() + + sh := &srHelper{ + syncProtocol: s.state.protocol, + ctx: srCtx, + config: s.state.config, + logger: utils.Logger().With().Str("mode", "short range").Logger(), + } + + if err := sh.checkPrerequisites(); err != nil { + return 0, errors.Wrap(err, "prerequisite") + } + curBN := sr.configs.bc.CurrentBlock().NumberU64() + blkNums := sh.prepareBlockHashNumbers(curBN) + hashChain, whitelist, err := sh.getHashChain(blkNums) + if err != nil { + return 0, errors.Wrap(err, "getHashChain") + } + + if len(hashChain) == 0 { + // short circuit for no sync is needed + return 0, nil + } + + expEndBN := curBN + uint64(len(hashChain)) + utils.Logger().Info().Uint64("current number", curBN). + Uint64("target number", expEndBN). + Interface("hashChain", hashChain). + Msg("short range start syncing") + + s.state.status.setTargetBN(expEndBN) + + s.state.status.startSyncing() + defer func() { + utils.Logger().Info().Msg("short range finished syncing") + s.state.status.finishSyncing() + }() + + blocks, stids, err := sh.getBlocksByHashes(hashChain, whitelist) + if err != nil { + utils.Logger().Warn().Err(err).Msg("getBlocksByHashes failed") + if !errors.Is(err, context.Canceled) { + sh.removeStreams(whitelist) // Remote nodes cannot provide blocks with target hashes + } + return 0, errors.Wrap(err, "getBlocksByHashes") + } + + utils.Logger().Info().Int("num blocks", len(blocks)).Msg("getBlockByHashes result") + + n, err := verifyAndInsertBlocks(sr.configs.bc, blocks) + numBlocksInsertedShortRangeHistogramVec.With(s.state.promLabels()).Observe(float64(n)) + if err != nil { + utils.Logger().Warn().Err(err).Int("blocks inserted", n).Msg("Insert block failed") + if sh.blameAllStreams(blocks, n, err) { + sh.removeStreams(whitelist) // Data provided by remote nodes is corrupted + } else { + // It is the last block gives a wrong commit sig. Blame the provider of the last block. + st2Blame := stids[len(stids)-1] + sh.removeStreams([]sttypes.StreamID{st2Blame}) + } + return n, err + } + utils.Logger().Info().Err(err).Int("blocks inserted", n).Msg("Insert block success") + + return n, nil +} + +func (sr *StageShortRange) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = sr.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (sr *StageShortRange) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = sr.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/stage_state.go b/api/service/stagedstreamsync/stage_state.go new file mode 100644 index 000000000..9eda04247 --- /dev/null +++ b/api/service/stagedstreamsync/stage_state.go @@ -0,0 +1,295 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/core/types" + "github.com/harmony-one/harmony/internal/utils" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/prometheus/client_golang/prometheus" + "github.com/rs/zerolog" +) + +type StageStates struct { + configs StageStatesCfg +} +type StageStatesCfg struct { + ctx context.Context + bc core.BlockChain + db kv.RwDB + blockDBs []kv.RwDB + concurrency int + logger zerolog.Logger + logProgress bool +} + +func NewStageStates(cfg StageStatesCfg) *StageStates { + return &StageStates{ + configs: cfg, + } +} + +func NewStageStatesCfg(ctx context.Context, + bc core.BlockChain, + db kv.RwDB, + blockDBs []kv.RwDB, + concurrency int, + logger zerolog.Logger, + logProgress bool) StageStatesCfg { + + return StageStatesCfg{ + ctx: ctx, + bc: bc, + db: db, + blockDBs: blockDBs, + concurrency: concurrency, + logger: logger, + logProgress: logProgress, + } +} + +func (stg *StageStates) SetStageContext(ctx context.Context) { + stg.configs.ctx = ctx +} + +// Exec progresses States stage in the forward direction +func (stg *StageStates) Exec(firstCycle bool, invalidBlockRevert bool, s *StageState, reverter Reverter, tx kv.RwTx) (err error) { + + // for short range sync, skip this step + if !s.state.initSync { + return nil + } + + maxHeight := s.state.status.targetBN + currentHead := stg.configs.bc.CurrentBlock().NumberU64() + if currentHead >= maxHeight { + return nil + } + currProgress := stg.configs.bc.CurrentBlock().NumberU64() + targetHeight := s.state.currentCycle.TargetHeight + if currProgress >= targetHeight { + return nil + } + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + // isLastCycle := targetHeight >= maxHeight + startTime := time.Now() + startBlock := currProgress + pl := s.state.promLabels() + gbm := s.state.gbm + + // prepare db transactions + txs := make([]kv.RwTx, stg.configs.concurrency) + for i := 0; i < stg.configs.concurrency; i++ { + txs[i], err = stg.configs.blockDBs[i].BeginRw(context.Background()) + if err != nil { + return err + } + } + + defer func() { + for i := 0; i < stg.configs.concurrency; i++ { + txs[i].Rollback() + } + }() + + if stg.configs.logProgress { + fmt.Print("\033[s") // save the cursor position + } + + for i := currProgress + 1; i <= targetHeight; i++ { + blkKey := marshalData(i) + loopID, streamID := gbm.GetDownloadDetails(i) + + blockBytes, err := txs[loopID].GetOne(BlocksBucket, blkKey) + if err != nil { + return err + } + sigBytes, err := txs[loopID].GetOne(BlockSignaturesBucket, blkKey) + if err != nil { + return err + } + + // if block size is invalid, we have to break the updating state loop + // we don't need to do rollback, because the latest batch haven't added to chain yet + sz := len(blockBytes) + if sz <= 1 { + utils.Logger().Error(). + Uint64("block number", i). + Msg("block size invalid") + invalidBlockHash := common.Hash{} + s.state.protocol.StreamFailed(streamID, "zero bytes block is received from stream") + reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID) + return ErrInvalidBlockBytes + } + + var block *types.Block + if err := rlp.DecodeBytes(blockBytes, &block); err != nil { + utils.Logger().Error(). + Uint64("block number", i). + Msg("block size invalid") + s.state.protocol.StreamFailed(streamID, "invalid block is received from stream") + invalidBlockHash := common.Hash{} + reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID) + return ErrInvalidBlockBytes + } + if sigBytes != nil { + block.SetCurrentCommitSig(sigBytes) + } + + if block.NumberU64() != i { + s.state.protocol.StreamFailed(streamID, "invalid block with unmatched number is received from stream") + invalidBlockHash := block.Hash() + reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), i, invalidBlockHash, streamID) + return ErrInvalidBlockNumber + } + + if err := verifyAndInsertBlock(stg.configs.bc, block); err != nil { + stg.configs.logger.Warn().Err(err).Uint64("cycle target block", targetHeight). + Uint64("block number", block.NumberU64()). + Msg(WrapStagedSyncMsg("insert blocks failed in long range")) + s.state.protocol.StreamFailed(streamID, "unverifiable invalid block is received from stream") + invalidBlockHash := block.Hash() + reverter.RevertTo(stg.configs.bc.CurrentBlock().NumberU64(), block.NumberU64(), invalidBlockHash, streamID) + pl["error"] = err.Error() + longRangeFailInsertedBlockCounterVec.With(pl).Inc() + return err + } + + if invalidBlockRevert { + if s.state.invalidBlock.Number == i { + s.state.invalidBlock.resolve() + } + } + + s.state.inserted++ + longRangeSyncedBlockCounterVec.With(pl).Inc() + + utils.Logger().Info(). + Uint64("blockHeight", block.NumberU64()). + Uint64("blockEpoch", block.Epoch().Uint64()). + Str("blockHex", block.Hash().Hex()). + Uint32("ShardID", block.ShardID()). + Msg("[STAGED_STREAM_SYNC] New Block Added to Blockchain") + + // update cur progress + currProgress = stg.configs.bc.CurrentBlock().NumberU64() + + for i, tx := range block.StakingTransactions() { + utils.Logger().Info(). + Msgf( + "StakingTxn %d: %s, %v", i, tx.StakingType().String(), tx.StakingMessage(), + ) + } + + // log the stage progress in console + if stg.configs.logProgress { + //calculating block speed + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(currProgress-startBlock) / dt + } + blockSpeed := fmt.Sprintf("%.2f", speed) + fmt.Print("\033[u\033[K") // restore the cursor position and clear the line + fmt.Println("insert blocks progress:", currProgress, "/", targetHeight, "(", blockSpeed, "blocks/s", ")") + } + + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +func (stg *StageStates) insertChain(gbm *blockDownloadManager, + protocol syncProtocol, + lbls prometheus.Labels, + targetBN uint64) { + +} + +func (stg *StageStates) saveProgress(s *StageState, tx kv.RwTx) (err error) { + + useInternalTx := tx == nil + if useInternalTx { + var err error + tx, err = stg.configs.db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + } + + // save progress + if err = s.Update(tx, stg.configs.bc.CurrentBlock().NumberU64()); err != nil { + utils.Logger().Error(). + Err(err). + Msgf("[STAGED_SYNC] saving progress for block States stage failed") + return ErrSaveStateProgressFail + } + + if useInternalTx { + if err := tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStates) Revert(firstCycle bool, u *RevertState, s *StageState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if err = u.Done(tx); err != nil { + return err + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} + +func (stg *StageStates) CleanUp(firstCycle bool, p *CleanUpState, tx kv.RwTx) (err error) { + useInternalTx := tx == nil + if useInternalTx { + tx, err = stg.configs.db.BeginRw(stg.configs.ctx) + if err != nil { + return err + } + defer tx.Rollback() + } + + if useInternalTx { + if err = tx.Commit(); err != nil { + return err + } + } + return nil +} diff --git a/api/service/stagedstreamsync/staged_stream_sync.go b/api/service/stagedstreamsync/staged_stream_sync.go new file mode 100644 index 000000000..e73edd622 --- /dev/null +++ b/api/service/stagedstreamsync/staged_stream_sync.go @@ -0,0 +1,596 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/event" + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + syncproto "github.com/harmony-one/harmony/p2p/stream/protocols/sync" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/prometheus/client_golang/prometheus" + "github.com/rs/zerolog" +) + +type InvalidBlock struct { + Active bool + Number uint64 + Hash common.Hash + IsLogged bool + StreamID []sttypes.StreamID +} + +func (ib *InvalidBlock) set(num uint64, hash common.Hash, resetBadStreams bool) { + ib.Active = true + ib.IsLogged = false + ib.Number = num + ib.Hash = hash + if resetBadStreams { + ib.StreamID = make([]sttypes.StreamID, 0) + } +} + +func (ib *InvalidBlock) resolve() { + ib.Active = false + ib.IsLogged = false + ib.Number = 0 + ib.Hash = common.Hash{} + ib.StreamID = ib.StreamID[:0] +} + +func (ib *InvalidBlock) addBadStream(bsID sttypes.StreamID) { + // only add uniques IDs + for _, stID := range ib.StreamID { + if stID == bsID { + return + } + } + ib.StreamID = append(ib.StreamID, bsID) +} + +type StagedStreamSync struct { + ctx context.Context + bc core.BlockChain + isBeacon bool + isExplorer bool + db kv.RwDB + protocol syncProtocol + isBeaconNode bool + gbm *blockDownloadManager // initialized when finished get block number + inserted int + config Config + logger zerolog.Logger + status *status //TODO: merge this with currentSyncCycle + initSync bool // if sets to true, node start long range syncing + UseMemDB bool + + revertPoint *uint64 // used to run stages + prevRevertPoint *uint64 // used to get value from outside of staged sync after cycle (for example to notify RPCDaemon) + invalidBlock InvalidBlock + currentStage uint + LogProgress bool + currentCycle SyncCycle // current cycle + stages []*Stage + revertOrder []*Stage + pruningOrder []*Stage + timings []Timing + logPrefixes []string + + evtDownloadFinished event.Feed // channel for each download task finished + evtDownloadFinishedSubscribed bool + evtDownloadStarted event.Feed // channel for each download has started + evtDownloadStartedSubscribed bool +} + +type Timing struct { + isRevert bool + isCleanUp bool + stage SyncStageID + took time.Duration +} + +type SyncCycle struct { + Number uint64 + TargetHeight uint64 + lock sync.RWMutex +} + +func (s *StagedStreamSync) Len() int { return len(s.stages) } +func (s *StagedStreamSync) Context() context.Context { return s.ctx } +func (s *StagedStreamSync) Blockchain() core.BlockChain { return s.bc } +func (s *StagedStreamSync) DB() kv.RwDB { return s.db } +func (s *StagedStreamSync) IsBeacon() bool { return s.isBeacon } +func (s *StagedStreamSync) IsExplorer() bool { return s.isExplorer } +func (s *StagedStreamSync) LogPrefix() string { + if s == nil { + return "" + } + return s.logPrefixes[s.currentStage] +} +func (s *StagedStreamSync) PrevRevertPoint() *uint64 { return s.prevRevertPoint } + +func (s *StagedStreamSync) NewRevertState(id SyncStageID, revertPoint uint64) *RevertState { + return &RevertState{id, revertPoint, s} +} + +func (s *StagedStreamSync) CleanUpStageState(id SyncStageID, forwardProgress uint64, tx kv.Tx, db kv.RwDB) (*CleanUpState, error) { + var pruneProgress uint64 + var err error + + if errV := CreateView(context.Background(), db, tx, func(tx kv.Tx) error { + pruneProgress, err = GetStageCleanUpProgress(tx, id, s.isBeacon) + if err != nil { + return err + } + return nil + }); errV != nil { + return nil, errV + } + + return &CleanUpState{id, forwardProgress, pruneProgress, s}, nil +} + +func (s *StagedStreamSync) NextStage() { + if s == nil { + return + } + s.currentStage++ +} + +// IsBefore returns true if stage1 goes before stage2 in staged sync +func (s *StagedStreamSync) IsBefore(stage1, stage2 SyncStageID) bool { + idx1 := -1 + idx2 := -1 + for i, stage := range s.stages { + if stage.ID == stage1 { + idx1 = i + } + + if stage.ID == stage2 { + idx2 = i + } + } + + return idx1 < idx2 +} + +// IsAfter returns true if stage1 goes after stage2 in staged sync +func (s *StagedStreamSync) IsAfter(stage1, stage2 SyncStageID) bool { + idx1 := -1 + idx2 := -1 + for i, stage := range s.stages { + if stage.ID == stage1 { + idx1 = i + } + + if stage.ID == stage2 { + idx2 = i + } + } + + return idx1 > idx2 +} + +// RevertTo sets the revert point +func (s *StagedStreamSync) RevertTo(revertPoint uint64, invalidBlockNumber uint64, invalidBlockHash common.Hash, invalidBlockStreamID sttypes.StreamID) { + utils.Logger().Info(). + Uint64("invalidBlockNumber", invalidBlockNumber). + Interface("invalidBlockHash", invalidBlockHash). + Interface("invalidBlockStreamID", invalidBlockStreamID). + Uint64("revertPoint", revertPoint). + Msgf(WrapStagedSyncMsg("Reverting blocks")) + s.revertPoint = &revertPoint + if invalidBlockNumber > 0 || invalidBlockHash != (common.Hash{}) { + resetBadStreams := !s.invalidBlock.Active + s.invalidBlock.set(invalidBlockNumber, invalidBlockHash, resetBadStreams) + s.invalidBlock.addBadStream(invalidBlockStreamID) + } +} + +func (s *StagedStreamSync) Done() { + s.currentStage = uint(len(s.stages)) + s.revertPoint = nil +} + +// IsDone returns true if last stage have been done +func (s *StagedStreamSync) IsDone() bool { + return s.currentStage >= uint(len(s.stages)) && s.revertPoint == nil +} + +// SetCurrentStage sets the current stage to a given stage id +func (s *StagedStreamSync) SetCurrentStage(id SyncStageID) error { + for i, stage := range s.stages { + if stage.ID == id { + s.currentStage = uint(i) + return nil + } + } + + return ErrStageNotFound +} + +// StageState retrieves the latest stage state from db +func (s *StagedStreamSync) StageState(stage SyncStageID, tx kv.Tx, db kv.RwDB) (*StageState, error) { + var blockNum uint64 + var err error + if errV := CreateView(context.Background(), db, tx, func(rtx kv.Tx) error { + blockNum, err = GetStageProgress(rtx, stage, s.isBeacon) + if err != nil { + return err + } + return nil + }); errV != nil { + return nil, errV + } + + return &StageState{s, stage, blockNum}, nil +} + +// cleanUp cleans up the stage by calling pruneStage +func (s *StagedStreamSync) cleanUp(fromStage int, db kv.RwDB, tx kv.RwTx, firstCycle bool) error { + found := false + for i := 0; i < len(s.pruningOrder); i++ { + if s.pruningOrder[i].ID == s.stages[fromStage].ID { + found = true + } + if !found || s.pruningOrder[i] == nil || s.pruningOrder[i].Disabled { + continue + } + if err := s.pruneStage(firstCycle, s.pruningOrder[i], db, tx); err != nil { + panic(err) + } + } + return nil +} + +// New creates a new StagedStreamSync instance +func New(ctx context.Context, + bc core.BlockChain, + db kv.RwDB, + stagesList []*Stage, + isBeacon bool, + protocol syncProtocol, + isBeaconNode bool, + useMemDB bool, + config Config, + logger zerolog.Logger, +) *StagedStreamSync { + + revertStages := make([]*Stage, len(stagesList)) + for i, stageIndex := range DefaultRevertOrder { + for _, s := range stagesList { + if s.ID == stageIndex { + revertStages[i] = s + break + } + } + } + pruneStages := make([]*Stage, len(stagesList)) + for i, stageIndex := range DefaultCleanUpOrder { + for _, s := range stagesList { + if s.ID == stageIndex { + pruneStages[i] = s + break + } + } + } + + logPrefixes := make([]string, len(stagesList)) + for i := range stagesList { + logPrefixes[i] = fmt.Sprintf("%d/%d %s", i+1, len(stagesList), stagesList[i].ID) + } + + status := newStatus() + + return &StagedStreamSync{ + ctx: ctx, + bc: bc, + isBeacon: isBeacon, + db: db, + protocol: protocol, + isBeaconNode: isBeaconNode, + gbm: nil, + status: &status, + inserted: 0, + config: config, + logger: logger, + stages: stagesList, + currentStage: 0, + revertOrder: revertStages, + pruningOrder: pruneStages, + logPrefixes: logPrefixes, + UseMemDB: useMemDB, + } +} + +// doGetCurrentNumberRequest returns estimated current block number and corresponding stream +func (s *StagedStreamSync) doGetCurrentNumberRequest() (uint64, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(s.ctx, 10*time.Second) + defer cancel() + + bn, stid, err := s.protocol.GetCurrentBlockNumber(ctx, syncproto.WithHighPriority()) + if err != nil { + return 0, stid, err + } + return bn, stid, nil +} + +// promLabels returns a prometheus labels for current shard id +func (s *StagedStreamSync) promLabels() prometheus.Labels { + sid := s.bc.ShardID() + return prometheus.Labels{"ShardID": fmt.Sprintf("%d", sid)} +} + +// checkHaveEnoughStreams checks whether node is connected to certain number of streams +func (s *StagedStreamSync) checkHaveEnoughStreams() error { + numStreams := s.protocol.NumStreams() + if numStreams < s.config.MinStreams { + return fmt.Errorf("number of streams smaller than minimum: %v < %v", + numStreams, s.config.MinStreams) + } + return nil +} + +// SetNewContext sets a new context for all stages +func (s *StagedStreamSync) SetNewContext(ctx context.Context) error { + for _, s := range s.stages { + s.Handler.SetStageContext(ctx) + } + return nil +} + +// Run runs a full cycle of stages +func (s *StagedStreamSync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { + s.prevRevertPoint = nil + s.timings = s.timings[:0] + + for !s.IsDone() { + if s.revertPoint != nil { + s.prevRevertPoint = s.revertPoint + s.revertPoint = nil + if !s.invalidBlock.Active { + for j := 0; j < len(s.revertOrder); j++ { + if s.revertOrder[j] == nil || s.revertOrder[j].Disabled { + continue + } + if err := s.revertStage(firstCycle, s.revertOrder[j], db, tx); err != nil { + utils.Logger().Error(). + Err(err). + Interface("stage id", s.revertOrder[j].ID). + Msgf(WrapStagedSyncMsg("revert stage failed")) + return err + } + } + } + if err := s.SetCurrentStage(s.stages[0].ID); err != nil { + return err + } + firstCycle = false + } + + stage := s.stages[s.currentStage] + + if stage.Disabled { + utils.Logger().Trace(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s disabled. %s", stage.ID, stage.DisabledDescription))) + + s.NextStage() + continue + } + + if err := s.runStage(stage, db, tx, firstCycle, s.invalidBlock.Active); err != nil { + utils.Logger().Error(). + Err(err). + Interface("stage id", stage.ID). + Msgf(WrapStagedSyncMsg("stage failed")) + return err + } + s.NextStage() + } + + if err := s.cleanUp(0, db, tx, firstCycle); err != nil { + utils.Logger().Error(). + Err(err). + Msgf(WrapStagedSyncMsg("stages cleanup failed")) + return err + } + if err := s.SetCurrentStage(s.stages[0].ID); err != nil { + return err + } + if err := printLogs(tx, s.timings); err != nil { + return err + } + s.currentStage = 0 + return nil +} + +// CreateView creates a view for a given db +func CreateView(ctx context.Context, db kv.RwDB, tx kv.Tx, f func(tx kv.Tx) error) error { + if tx != nil { + return f(tx) + } + return db.View(context.Background(), func(etx kv.Tx) error { + return f(etx) + }) +} + +// printLogs prints all timing logs +func printLogs(tx kv.RwTx, timings []Timing) error { + var logCtx []interface{} + count := 0 + for i := range timings { + if timings[i].took < 50*time.Millisecond { + continue + } + count++ + if count == 50 { + break + } + if timings[i].isRevert { + logCtx = append(logCtx, "Revert "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } else if timings[i].isCleanUp { + logCtx = append(logCtx, "CleanUp "+string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } else { + logCtx = append(logCtx, string(timings[i].stage), timings[i].took.Truncate(time.Millisecond).String()) + } + } + if len(logCtx) > 0 { + utils.Logger().Info(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("Timings (slower than 50ms) %v", logCtx...))) + } + + if tx == nil { + return nil + } + + if len(logCtx) > 0 { // also don't print this logs if everything is fast + buckets := Buckets + bucketSizes := make([]interface{}, 0, 2*len(buckets)) + for _, bucket := range buckets { + sz, err1 := tx.BucketSize(bucket) + if err1 != nil { + return err1 + } + bucketSizes = append(bucketSizes, bucket, ByteCount(sz)) + } + utils.Logger().Info(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("Tables %v", bucketSizes...))) + } + tx.CollectMetrics() + return nil +} + +// runStage executes stage +func (s *StagedStreamSync) runStage(stage *Stage, db kv.RwDB, tx kv.RwTx, firstCycle bool, invalidBlockRevert bool) (err error) { + start := time.Now() + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + if err = stage.Handler.Exec(firstCycle, invalidBlockRevert, stageState, s, tx); err != nil { + utils.Logger().Error(). + Err(err). + Interface("stage id", stage.ID). + Msgf(WrapStagedSyncMsg("stage failed")) + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Info(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: DONE in %d", logPrefix, took))) + + } + s.timings = append(s.timings, Timing{stage: stage.ID, took: took}) + return nil +} + +// revertStage reverts stage +func (s *StagedStreamSync) revertStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error { + start := time.Now() + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + revert := s.NewRevertState(stage.ID, *s.revertPoint) + + if stageState.BlockNumber <= revert.RevertPoint { + return nil + } + + if err = s.SetCurrentStage(stage.ID); err != nil { + return err + } + + err = stage.Handler.Revert(firstCycle, revert, stageState, tx) + if err != nil { + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Info(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: Revert done in %d", logPrefix, took))) + } + s.timings = append(s.timings, Timing{isRevert: true, stage: stage.ID, took: took}) + return nil +} + +// pruneStage cleans up the stage and logs the timing +func (s *StagedStreamSync) pruneStage(firstCycle bool, stage *Stage, db kv.RwDB, tx kv.RwTx) error { + start := time.Now() + + stageState, err := s.StageState(stage.ID, tx, db) + if err != nil { + return err + } + + prune, err := s.CleanUpStageState(stage.ID, stageState.BlockNumber, tx, db) + if err != nil { + return err + } + if err = s.SetCurrentStage(stage.ID); err != nil { + return err + } + + err = stage.Handler.CleanUp(firstCycle, prune, tx) + if err != nil { + return fmt.Errorf("[%s] %w", s.LogPrefix(), err) + } + + took := time.Since(start) + if took > 60*time.Second { + logPrefix := s.LogPrefix() + utils.Logger().Info(). + Msgf(WrapStagedSyncMsg(fmt.Sprintf("%s: CleanUp done in %d", logPrefix, took))) + } + s.timings = append(s.timings, Timing{isCleanUp: true, stage: stage.ID, took: took}) + return nil +} + +// DisableAllStages disables all stages including their reverts +func (s *StagedStreamSync) DisableAllStages() []SyncStageID { + var backupEnabledIds []SyncStageID + for i := range s.stages { + if !s.stages[i].Disabled { + backupEnabledIds = append(backupEnabledIds, s.stages[i].ID) + } + } + for i := range s.stages { + s.stages[i].Disabled = true + } + return backupEnabledIds +} + +// DisableStages disables stages by a set of given stage IDs +func (s *StagedStreamSync) DisableStages(ids ...SyncStageID) { + for i := range s.stages { + for _, id := range ids { + if s.stages[i].ID != id { + continue + } + s.stages[i].Disabled = true + } + } +} + +// EnableStages enables stages by a set of given stage IDs +func (s *StagedStreamSync) EnableStages(ids ...SyncStageID) { + for i := range s.stages { + for _, id := range ids { + if s.stages[i].ID != id { + continue + } + s.stages[i].Disabled = false + } + } +} diff --git a/api/service/stagedstreamsync/stages.go b/api/service/stagedstreamsync/stages.go new file mode 100644 index 000000000..55681d68f --- /dev/null +++ b/api/service/stagedstreamsync/stages.go @@ -0,0 +1,66 @@ +package stagedstreamsync + +import ( + "github.com/ledgerwatch/erigon-lib/kv" +) + +// SyncStageID represents the stages in the Mode.StagedSync mode +type SyncStageID string + +const ( + Heads SyncStageID = "Heads" // Heads are downloaded + ShortRange SyncStageID = "ShortRange" // short range + SyncEpoch SyncStageID = "SyncEpoch" // epoch sync + BlockBodies SyncStageID = "BlockBodies" // Block bodies are downloaded, TxHash and UncleHash are getting verified + States SyncStageID = "States" // will construct most recent state from downloaded blocks + Finish SyncStageID = "Finish" // Nominal stage after all other stages +) + +// GetStageName returns the stage name in string +func GetStageName(stage string, isBeacon bool, prune bool) string { + name := stage + if isBeacon { + name = "beacon_" + name + } + if prune { + name = "prune_" + name + } + return name +} + +// GetStageID returns the stage name in bytes +func GetStageID(stage SyncStageID, isBeacon bool, prune bool) []byte { + return []byte(GetStageName(string(stage), isBeacon, prune)) +} + +// GetStageProgress retrieves saved progress of a given sync stage from the database +func GetStageProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) { + stgID := GetStageID(stage, isBeacon, false) + v, err := db.GetOne(kv.SyncStageProgress, stgID) + if err != nil { + return 0, err + } + return unmarshalData(v) +} + +// SaveStageProgress saves progress of given sync stage +func SaveStageProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error { + stgID := GetStageID(stage, isBeacon, false) + return db.Put(kv.SyncStageProgress, stgID, marshalData(progress)) +} + +// GetStageCleanUpProgress retrieves saved progress of given sync stage from the database +func GetStageCleanUpProgress(db kv.Getter, stage SyncStageID, isBeacon bool) (uint64, error) { + stgID := GetStageID(stage, isBeacon, true) + v, err := db.GetOne(kv.SyncStageProgress, stgID) + if err != nil { + return 0, err + } + return unmarshalData(v) +} + +// SaveStageCleanUpProgress stores the progress of the clean up for a given sync stage to the database +func SaveStageCleanUpProgress(db kv.Putter, stage SyncStageID, isBeacon bool, progress uint64) error { + stgID := GetStageID(stage, isBeacon, true) + return db.Put(kv.SyncStageProgress, stgID, marshalData(progress)) +} diff --git a/api/service/stagedstreamsync/syncing.go b/api/service/stagedstreamsync/syncing.go new file mode 100644 index 000000000..de9b88481 --- /dev/null +++ b/api/service/stagedstreamsync/syncing.go @@ -0,0 +1,323 @@ +package stagedstreamsync + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/harmony-one/harmony/core" + "github.com/harmony-one/harmony/internal/utils" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/harmony-one/harmony/shard" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/erigon-lib/kv/memdb" + "github.com/ledgerwatch/log/v3" + "github.com/pkg/errors" + "github.com/rs/zerolog" +) + +const ( + BlocksBucket = "BlockBodies" + BlockSignaturesBucket = "BlockSignatures" + StageProgressBucket = "StageProgress" + + // cache db keys + LastBlockHeight = "LastBlockHeight" + LastBlockHash = "LastBlockHash" +) + +var Buckets = []string{ + BlocksBucket, + BlockSignaturesBucket, + StageProgressBucket, +} + +// CreateStagedSync creates an instance of staged sync +func CreateStagedSync(ctx context.Context, + bc core.BlockChain, + UseMemDB bool, + isBeaconNode bool, + protocol syncProtocol, + config Config, + logger zerolog.Logger, + logProgress bool, +) (*StagedStreamSync, error) { + + isBeacon := bc.ShardID() == shard.BeaconChainShardID + + var mainDB kv.RwDB + dbs := make([]kv.RwDB, config.Concurrency) + if UseMemDB { + mainDB = memdb.New() + for i := 0; i < config.Concurrency; i++ { + dbs[i] = memdb.New() + } + } else { + mainDB = mdbx.NewMDBX(log.New()).Path(GetBlockDbPath(isBeacon, -1)).MustOpen() + for i := 0; i < config.Concurrency; i++ { + dbPath := GetBlockDbPath(isBeacon, i) + dbs[i] = mdbx.NewMDBX(log.New()).Path(dbPath).MustOpen() + } + } + + if errInitDB := initDB(ctx, mainDB, dbs, config.Concurrency); errInitDB != nil { + return nil, errInitDB + } + + stageHeadsCfg := NewStageHeadersCfg(ctx, bc, mainDB) + stageShortRangeCfg := NewStageShortRangeCfg(ctx, bc, mainDB) + stageSyncEpochCfg := NewStageEpochCfg(ctx, bc, mainDB) + stageBodiesCfg := NewStageBodiesCfg(ctx, bc, mainDB, dbs, config.Concurrency, protocol, isBeacon, logProgress) + stageStatesCfg := NewStageStatesCfg(ctx, bc, mainDB, dbs, config.Concurrency, logger, logProgress) + stageFinishCfg := NewStageFinishCfg(ctx, mainDB) + + stages := DefaultStages(ctx, + stageHeadsCfg, + stageSyncEpochCfg, + stageShortRangeCfg, + stageBodiesCfg, + stageStatesCfg, + stageFinishCfg, + ) + + return New(ctx, + bc, + mainDB, + stages, + isBeacon, + protocol, + isBeaconNode, + UseMemDB, + config, + logger, + ), nil +} + +// initDB inits the sync loop main database and create buckets +func initDB(ctx context.Context, mainDB kv.RwDB, dbs []kv.RwDB, concurrency int) error { + + // create buckets for mainDB + tx, errRW := mainDB.BeginRw(ctx) + if errRW != nil { + return errRW + } + defer tx.Rollback() + + for _, name := range Buckets { + if err := tx.CreateBucket(GetStageName(name, false, false)); err != nil { + return err + } + } + if err := tx.Commit(); err != nil { + return err + } + + // create buckets for block cache DBs + for _, db := range dbs { + tx, errRW := db.BeginRw(ctx) + if errRW != nil { + return errRW + } + + if err := tx.CreateBucket(BlocksBucket); err != nil { + return err + } + if err := tx.CreateBucket(BlockSignaturesBucket); err != nil { + return err + } + + if err := tx.Commit(); err != nil { + return err + } + } + + return nil +} + +// GetBlockDbPath returns the path of the cache database which stores blocks +func GetBlockDbPath(beacon bool, loopID int) string { + if beacon { + if loopID >= 0 { + return fmt.Sprintf("%s_%d", "cache/beacon_blocks_db", loopID) + } else { + return "cache/beacon_blocks_db_main" + } + } else { + if loopID >= 0 { + return fmt.Sprintf("%s_%d", "cache/blocks_db", loopID) + } else { + return "cache/blocks_db_main" + } + } +} + +// doSync does the long range sync. +// One LongRangeSync consists of several iterations. +// For each iteration, estimate the current block number, then fetch block & insert to blockchain +func (s *StagedStreamSync) doSync(downloaderContext context.Context, initSync bool) (int, error) { + + var totalInserted int + + s.initSync = initSync + + if err := s.checkPrerequisites(); err != nil { + return 0, err + } + + var estimatedHeight uint64 + if initSync { + if h, err := s.estimateCurrentNumber(); err != nil { + return 0, err + } else { + estimatedHeight = h + //TODO: use directly currentCycle var + s.status.setTargetBN(estimatedHeight) + } + if curBN := s.bc.CurrentBlock().NumberU64(); estimatedHeight <= curBN { + s.logger.Info().Uint64("current number", curBN).Uint64("target number", estimatedHeight). + Msg(WrapStagedSyncMsg("early return of long range sync")) + return 0, nil + } + + s.startSyncing() + defer s.finishSyncing() + } + + for { + ctx, cancel := context.WithCancel(downloaderContext) + s.ctx = ctx + s.SetNewContext(ctx) + + n, err := s.doSyncCycle(ctx, initSync) + if err != nil { + pl := s.promLabels() + pl["error"] = err.Error() + numFailedDownloadCounterVec.With(pl).Inc() + + cancel() + return totalInserted + n, err + } + cancel() + + totalInserted += n + + // if it's not long range sync, skip loop + if n < LastMileBlocksThreshold || !initSync { + return totalInserted, nil + } + } + +} + +func (s *StagedStreamSync) doSyncCycle(ctx context.Context, initSync bool) (int, error) { + + // TODO: initSync=true means currentCycleNumber==0, so we can remove initSync + + var totalInserted int + + s.inserted = 0 + startHead := s.bc.CurrentBlock().NumberU64() + canRunCycleInOneTransaction := false + + var tx kv.RwTx + if canRunCycleInOneTransaction { + var err error + if tx, err = s.DB().BeginRw(context.Background()); err != nil { + return totalInserted, err + } + defer tx.Rollback() + } + + startTime := time.Now() + + // Do one cycle of staged sync + initialCycle := s.currentCycle.Number == 0 + if err := s.Run(s.DB(), tx, initialCycle); err != nil { + utils.Logger().Error(). + Err(err). + Bool("isBeacon", s.isBeacon). + Uint32("shard", s.bc.ShardID()). + Uint64("currentHeight", startHead). + Msgf(WrapStagedSyncMsg("sync cycle failed")) + return totalInserted, err + } + + totalInserted += s.inserted + + s.currentCycle.lock.Lock() + s.currentCycle.Number++ + s.currentCycle.lock.Unlock() + + // calculating sync speed (blocks/second) + if s.LogProgress && s.inserted > 0 { + dt := time.Now().Sub(startTime).Seconds() + speed := float64(0) + if dt > 0 { + speed = float64(s.inserted) / dt + } + syncSpeed := fmt.Sprintf("%.2f", speed) + fmt.Println("sync speed:", syncSpeed, "blocks/s") + } + + return totalInserted, nil +} + +func (s *StagedStreamSync) startSyncing() { + s.status.startSyncing() + if s.evtDownloadStartedSubscribed { + s.evtDownloadStarted.Send(struct{}{}) + } +} + +func (s *StagedStreamSync) finishSyncing() { + s.status.finishSyncing() + if s.evtDownloadFinishedSubscribed { + s.evtDownloadFinished.Send(struct{}{}) + } +} + +func (s *StagedStreamSync) checkPrerequisites() error { + return s.checkHaveEnoughStreams() +} + +// estimateCurrentNumber roughly estimates the current block number. +// The block number does not need to be exact, but just a temporary target of the iteration +func (s *StagedStreamSync) estimateCurrentNumber() (uint64, error) { + var ( + cnResults = make(map[sttypes.StreamID]uint64) + lock sync.Mutex + wg sync.WaitGroup + ) + wg.Add(s.config.Concurrency) + for i := 0; i != s.config.Concurrency; i++ { + go func() { + defer wg.Done() + bn, stid, err := s.doGetCurrentNumberRequest() + if err != nil { + s.logger.Err(err).Str("streamID", string(stid)). + Msg(WrapStagedSyncMsg("getCurrentNumber request failed")) + if !errors.Is(err, context.Canceled) { + s.protocol.StreamFailed(stid, "getCurrentNumber request failed") + } + return + } + lock.Lock() + cnResults[stid] = bn + lock.Unlock() + }() + } + wg.Wait() + + if len(cnResults) == 0 { + select { + case <-s.ctx.Done(): + return 0, s.ctx.Err() + default: + } + return 0, ErrZeroBlockResponse + } + bn := computeBlockNumberByMaxVote(cnResults) + return bn, nil +} diff --git a/api/service/stagedstreamsync/types.go b/api/service/stagedstreamsync/types.go new file mode 100644 index 000000000..6d6326452 --- /dev/null +++ b/api/service/stagedstreamsync/types.go @@ -0,0 +1,287 @@ +package stagedstreamsync + +import ( + "container/heap" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/harmony-one/harmony/core/types" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" +) + +var ( + emptyHash common.Hash +) + +type status struct { + isSyncing bool + targetBN uint64 + lock sync.Mutex +} + +func newStatus() status { + return status{} +} + +func (s *status) startSyncing() { + s.lock.Lock() + defer s.lock.Unlock() + + s.isSyncing = true +} + +func (s *status) setTargetBN(val uint64) { + s.lock.Lock() + defer s.lock.Unlock() + + s.targetBN = val +} + +func (s *status) finishSyncing() { + s.lock.Lock() + defer s.lock.Unlock() + + s.isSyncing = false + s.targetBN = 0 +} + +func (s *status) get() (bool, uint64) { + s.lock.Lock() + defer s.lock.Unlock() + + return s.isSyncing, s.targetBN +} + +type getBlocksResult struct { + bns []uint64 + blocks []*types.Block + stid sttypes.StreamID +} + +type resultQueue struct { + results *priorityQueue + lock sync.Mutex +} + +func newResultQueue() *resultQueue { + pq := make(priorityQueue, 0, 200) // 200 - rough estimate + heap.Init(&pq) + return &resultQueue{ + results: &pq, + } +} + +// addBlockResults adds the blocks to the result queue to be processed by insertChainLoop. +// If a nil block is detected in the block list, will not process further blocks. +func (rq *resultQueue) addBlockResults(blocks []*types.Block, stid sttypes.StreamID) { + rq.lock.Lock() + defer rq.lock.Unlock() + + for _, block := range blocks { + if block == nil { + continue + } + heap.Push(rq.results, &blockResult{ + block: block, + stid: stid, + }) + } + return +} + +// popBlockResults pop a continuous list of blocks starting at expStartBN with capped size. +// Return the stale block numbers as the second return value +func (rq *resultQueue) popBlockResults(expStartBN uint64, cap int) ([]*blockResult, []uint64) { + rq.lock.Lock() + defer rq.lock.Unlock() + + var ( + res = make([]*blockResult, 0, cap) + stales []uint64 + ) + + for cnt := 0; rq.results.Len() > 0 && cnt < cap; cnt++ { + br := heap.Pop(rq.results).(*blockResult) + // stale block number + if br.block.NumberU64() < expStartBN { + stales = append(stales, br.block.NumberU64()) + continue + } + if br.block.NumberU64() != expStartBN { + heap.Push(rq.results, br) + return res, stales + } + res = append(res, br) + expStartBN++ + } + return res, stales +} + +// removeResultsByStreamID removes the block results of the given stream, returns the block +// number removed from the queue +func (rq *resultQueue) removeResultsByStreamID(stid sttypes.StreamID) []uint64 { + rq.lock.Lock() + defer rq.lock.Unlock() + + var removed []uint64 + +Loop: + for { + for i, res := range *rq.results { + blockRes := res.(*blockResult) + if blockRes.stid == stid { + rq.removeByIndex(i) + removed = append(removed, blockRes.block.NumberU64()) + goto Loop + } + } + break + } + return removed +} + +func (rq *resultQueue) length() int { + return len(*rq.results) +} + +func (rq *resultQueue) removeByIndex(index int) { + heap.Remove(rq.results, index) +} + +// bnPrioritizedItem is the item which uses block number to determine its priority +type bnPrioritizedItem interface { + getBlockNumber() uint64 +} + +type blockResult struct { + block *types.Block + stid sttypes.StreamID +} + +func (br *blockResult) getBlockNumber() uint64 { + return br.block.NumberU64() +} + +func blockResultsToBlocks(results []*blockResult) []*types.Block { + blocks := make([]*types.Block, 0, len(results)) + + for _, result := range results { + blocks = append(blocks, result.block) + } + return blocks +} + +type ( + prioritizedNumber uint64 + + prioritizedNumbers struct { + q *priorityQueue + } +) + +func (b prioritizedNumber) getBlockNumber() uint64 { + return uint64(b) +} + +func newPrioritizedNumbers() *prioritizedNumbers { + pqs := make(priorityQueue, 0) + heap.Init(&pqs) + return &prioritizedNumbers{ + q: &pqs, + } +} + +func (pbs *prioritizedNumbers) push(bn uint64) { + heap.Push(pbs.q, prioritizedNumber(bn)) +} + +func (pbs *prioritizedNumbers) pop() uint64 { + if pbs.q.Len() == 0 { + return 0 + } + item := heap.Pop(pbs.q) + return uint64(item.(prioritizedNumber)) +} + +func (pbs *prioritizedNumbers) length() int { + return len(*pbs.q) +} + +type ( + blockByNumber types.Block + + // blocksByNumber is the priority queue ordered by number + blocksByNumber struct { + q *priorityQueue + cap int + } +) + +func (b *blockByNumber) getBlockNumber() uint64 { + raw := (*types.Block)(b) + return raw.NumberU64() +} + +func newBlocksByNumber(cap int) *blocksByNumber { + pqs := make(priorityQueue, 0) + heap.Init(&pqs) + return &blocksByNumber{ + q: &pqs, + cap: cap, + } +} + +func (bs *blocksByNumber) push(b *types.Block) { + heap.Push(bs.q, (*blockByNumber)(b)) + for bs.q.Len() > bs.cap { + heap.Pop(bs.q) + } +} + +func (bs *blocksByNumber) pop() *types.Block { + if bs.q.Len() == 0 { + return nil + } + item := heap.Pop(bs.q) + return (*types.Block)(item.(*blockByNumber)) +} + +func (bs *blocksByNumber) len() int { + return bs.q.Len() +} + +// priorityQueue is a priority queue with lowest block number with highest priority +type priorityQueue []bnPrioritizedItem + +func (q priorityQueue) Len() int { + return len(q) +} + +func (q priorityQueue) Less(i, j int) bool { + bn1 := q[i].getBlockNumber() + bn2 := q[j].getBlockNumber() + return bn1 < bn2 // small block number has higher priority +} + +func (q priorityQueue) Swap(i, j int) { + q[i], q[j] = q[j], q[i] +} + +func (q *priorityQueue) Push(x interface{}) { + item, ok := x.(bnPrioritizedItem) + if !ok { + panic(ErrWrongGetBlockNumberType) + } + *q = append(*q, item) +} + +func (q *priorityQueue) Pop() interface{} { + prev := *q + n := len(prev) + if n == 0 { + return nil + } + res := prev[n-1] + *q = prev[0 : n-1] + return res +} diff --git a/api/service/stagedstreamsync/types_test.go b/api/service/stagedstreamsync/types_test.go new file mode 100644 index 000000000..1890608b2 --- /dev/null +++ b/api/service/stagedstreamsync/types_test.go @@ -0,0 +1,266 @@ +package stagedstreamsync + +import ( + "container/heap" + "fmt" + "math/big" + "strings" + "testing" + + "github.com/harmony-one/harmony/block" + headerV3 "github.com/harmony-one/harmony/block/v3" + "github.com/harmony-one/harmony/core/types" + bls_cosi "github.com/harmony-one/harmony/crypto/bls" + sttypes "github.com/harmony-one/harmony/p2p/stream/types" +) + +func TestResultQueue_AddBlockResults(t *testing.T) { + tests := []struct { + initBNs []uint64 + addBNs []uint64 + expSize int + }{ + { + initBNs: []uint64{}, + addBNs: []uint64{1, 2, 3, 4}, + expSize: 4, + }, + { + initBNs: []uint64{1, 2, 3, 4}, + addBNs: []uint64{5, 6, 7, 8}, + expSize: 8, + }, + } + for i, test := range tests { + rq := makeTestResultQueue(test.initBNs) + rq.addBlockResults(makeTestBlocks(test.addBNs), "") + + if rq.results.Len() != test.expSize { + t.Errorf("Test %v: unexpected size: %v / %v", i, rq.results.Len(), test.expSize) + } + } +} + +func TestResultQueue_PopBlockResults(t *testing.T) { + tests := []struct { + initBNs []uint64 + cap int + expStart uint64 + expSize int + staleSize int + }{ + { + initBNs: []uint64{1, 2, 3, 4, 5}, + cap: 3, + expStart: 1, + expSize: 3, + staleSize: 0, + }, + { + initBNs: []uint64{1, 2, 3, 4, 5}, + cap: 10, + expStart: 1, + expSize: 5, + staleSize: 0, + }, + { + initBNs: []uint64{1, 3, 4, 5}, + cap: 10, + expStart: 1, + expSize: 1, + staleSize: 0, + }, + { + initBNs: []uint64{1, 2, 3, 4, 5}, + cap: 10, + expStart: 0, + expSize: 0, + staleSize: 0, + }, + { + initBNs: []uint64{1, 1, 1, 1, 2}, + cap: 10, + expStart: 1, + expSize: 2, + staleSize: 3, + }, + { + initBNs: []uint64{1, 2, 3, 4, 5}, + cap: 10, + expStart: 2, + expSize: 4, + staleSize: 1, + }, + } + for i, test := range tests { + rq := makeTestResultQueue(test.initBNs) + res, stales := rq.popBlockResults(test.expStart, test.cap) + if len(res) != test.expSize { + t.Errorf("Test %v: unexpect size %v / %v", i, len(res), test.expSize) + } + if len(stales) != test.staleSize { + t.Errorf("Test %v: unexpect stale size %v / %v", i, len(stales), test.staleSize) + } + } +} + +func TestResultQueue_RemoveResultsByStreamID(t *testing.T) { + tests := []struct { + rq *resultQueue + rmStreamID sttypes.StreamID + removed int + expSize int + }{ + { + rq: makeTestResultQueue([]uint64{1, 2, 3, 4}), + rmStreamID: "test stream id", + removed: 4, + expSize: 0, + }, + { + rq: func() *resultQueue { + rq := makeTestResultQueue([]uint64{2, 3, 4, 5}) + rq.addBlockResults([]*types.Block{ + makeTestBlock(1), + makeTestBlock(5), + makeTestBlock(6), + }, "another test stream id") + return rq + }(), + rmStreamID: "test stream id", + removed: 4, + expSize: 3, + }, + { + rq: func() *resultQueue { + rq := makeTestResultQueue([]uint64{2, 3, 4, 5}) + rq.addBlockResults([]*types.Block{ + makeTestBlock(1), + makeTestBlock(5), + makeTestBlock(6), + }, "another test stream id") + return rq + }(), + rmStreamID: "another test stream id", + removed: 3, + expSize: 4, + }, + } + for i, test := range tests { + res := test.rq.removeResultsByStreamID(test.rmStreamID) + if len(res) != test.removed { + t.Errorf("Test %v: unexpected number removed %v / %v", i, len(res), test.removed) + } + if gotSize := test.rq.results.Len(); gotSize != test.expSize { + t.Errorf("Test %v: unexpected number after removal %v / %v", i, gotSize, test.expSize) + } + } +} + +func makeTestResultQueue(bns []uint64) *resultQueue { + rq := newResultQueue() + for _, bn := range bns { + heap.Push(rq.results, &blockResult{ + block: makeTestBlock(bn), + stid: "test stream id", + }) + } + return rq +} + +func TestPrioritizedBlocks(t *testing.T) { + addBNs := []uint64{4, 7, 6, 9} + + bns := newPrioritizedNumbers() + for _, bn := range addBNs { + bns.push(bn) + } + prevBN := uint64(0) + for len(*bns.q) > 0 { + b := bns.pop() + if b < prevBN { + t.Errorf("number not incrementing") + } + prevBN = b + } + if last := bns.pop(); last != 0 { + t.Errorf("last elem is not 0") + } +} + +func TestBlocksByNumber(t *testing.T) { + addBNs := []uint64{4, 7, 6, 9} + + bns := newBlocksByNumber(10) + for _, bn := range addBNs { + bns.push(makeTestBlock(bn)) + } + if bns.len() != len(addBNs) { + t.Errorf("size unexpected: %v / %v", bns.len(), len(addBNs)) + } + prevBN := uint64(0) + for len(*bns.q) > 0 { + b := bns.pop() + if b.NumberU64() < prevBN { + t.Errorf("number not incrementing") + } + prevBN = b.NumberU64() + } + if lastBlock := bns.pop(); lastBlock != nil { + t.Errorf("last block is not nil") + } +} + +func TestPriorityQueue(t *testing.T) { + testBNs := []uint64{1, 9, 2, 4, 5, 12} + pq := make(priorityQueue, 0, 10) + heap.Init(&pq) + for _, bn := range testBNs { + heap.Push(&pq, &blockResult{ + block: makeTestBlock(bn), + stid: "", + }) + } + cmpBN := uint64(0) + for pq.Len() > 0 { + bn := heap.Pop(&pq).(*blockResult).block.NumberU64() + if bn < cmpBN { + t.Errorf("not incrementing") + } + cmpBN = bn + } + if pq.Len() != 0 { + t.Errorf("after poping, size not 0") + } +} + +func makeTestBlocks(bns []uint64) []*types.Block { + blocks := make([]*types.Block, 0, len(bns)) + for _, bn := range bns { + blocks = append(blocks, makeTestBlock(bn)) + } + return blocks +} + +func makeTestBlock(bn uint64) *types.Block { + testHeader := &block.Header{Header: headerV3.NewHeader()} + testHeader.SetNumber(big.NewInt(int64(bn))) + testHeader.SetLastCommitSignature(bls_cosi.SerializedSignature{}) + testHeader.SetLastCommitBitmap(make([]byte, 10)) + block := types.NewBlockWithHeader(testHeader) + block.SetCurrentCommitSig(make([]byte, 106)) + return block +} + +func assertError(got, expect error) error { + if (got == nil) != (expect == nil) { + return fmt.Errorf("unexpected error [%v] / [%v]", got, expect) + } + if (got == nil) || (expect == nil) { + return nil + } + if !strings.Contains(got.Error(), expect.Error()) { + return fmt.Errorf("unexpected error [%v] / [%v]", got, expect) + } + return nil +} diff --git a/api/service/stagedsync/stagedsync.go b/api/service/stagedsync/stagedsync.go index 88df1a671..83af6abf9 100644 --- a/api/service/stagedsync/stagedsync.go +++ b/api/service/stagedsync/stagedsync.go @@ -26,6 +26,8 @@ import ( "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/p2p" "github.com/ledgerwatch/erigon-lib/kv" + + libp2p_peer "github.com/libp2p/go-libp2p/core/peer" ) type StagedSync struct { @@ -663,7 +665,7 @@ func (ss *StagedSync) AddNewBlock(peerHash []byte, block *types.Block) { } // CreateSyncConfig creates SyncConfig for StateSync object. -func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitForEachPeerToConnect bool) error { +func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, selfPeerID libp2p_peer.ID, waitForEachPeerToConnect bool) error { // sanity check to ensure no duplicate peers if err := checkPeersDuplicity(peers); err != nil { return err @@ -678,6 +680,7 @@ func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitFor } utils.Logger().Debug(). + Str("self peer ID", string(selfPeerID)). Int("peers count", len(peers)). Int("target size", targetSize). Msg("[STAGED_SYNC] CreateSyncConfig: len of peers") @@ -685,7 +688,9 @@ func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitFor if ss.syncConfig != nil { ss.syncConfig.CloseConnections() } - ss.syncConfig = &SyncConfig{} + ss.syncConfig = &SyncConfig{ + selfPeerID: selfPeerID, + } var connectedPeers int for _, peer := range peers { @@ -694,6 +699,7 @@ func (ss *StagedSync) CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitFor continue } peerConfig := &SyncPeerConfig{ + peer: peer, ip: peer.IP, port: peer.Port, client: client, @@ -1195,27 +1201,24 @@ func (ss *StagedSync) IsSameBlockchainHeight(bc core.BlockChain) (uint64, bool) } // GetMaxPeerHeight returns maximum block height of connected peers -func (ss *StagedSync) GetMaxPeerHeight() uint64 { - mph, _ := ss.getMaxPeerHeight() - return mph +func (ss *StagedSync) GetMaxPeerHeight() (uint64, error) { + return ss.getMaxPeerHeight() } -func (ss *StagedSync) addConsensusLastMile(bc core.BlockChain, consensus *consensus.Consensus) error { +func (ss *StagedSync) addConsensusLastMile(bc core.BlockChain, cs *consensus.Consensus) error { curNumber := bc.CurrentBlock().NumberU64() - blockIter, err := consensus.GetLastMileBlockIter(curNumber + 1) - if err != nil { - return err - } - for { - block := blockIter.Next() - if block == nil { - break - } - if _, err := bc.InsertChain(types.Blocks{block}, true); err != nil { - return errors.Wrap(err, "failed to InsertChain") + return cs.GetLastMileBlockIter(curNumber+1, func(blockIter *consensus.LastMileBlockIter) error { + for { + block := blockIter.Next() + if block == nil { + break + } + if _, err := bc.InsertChain(types.Blocks{block}, true); err != nil { + return errors.Wrap(err, "failed to InsertChain") + } } - } - return nil + return nil + }) } // GetSyncingPort returns the syncing port. @@ -1271,8 +1274,15 @@ func (ss *StagedSync) isSynchronized(doubleCheck bool) SyncCheckResult { if ss.syncConfig == nil { return SyncCheckResult{} // If syncConfig is not instantiated, return not in sync } - otherHeight1, _ := ss.getMaxPeerHeight() lastHeight := ss.Blockchain().CurrentBlock().NumberU64() + otherHeight1, errMaxHeight1 := ss.getMaxPeerHeight() + if errMaxHeight1 != nil { + return SyncCheckResult{ + IsSynchronized: false, + OtherHeight: 0, + HeightDiff: 0, + } + } wasOutOfSync := lastHeight+inSyncThreshold < otherHeight1 if !doubleCheck { @@ -1293,7 +1303,10 @@ func (ss *StagedSync) isSynchronized(doubleCheck bool) SyncCheckResult { // double check the sync status after 1 second to confirm (avoid false alarm) time.Sleep(1 * time.Second) - otherHeight2, _ := ss.getMaxPeerHeight() + otherHeight2, errMaxHeight2 := ss.getMaxPeerHeight() + if errMaxHeight2 != nil { + otherHeight2 = otherHeight1 + } currentHeight := ss.Blockchain().CurrentBlock().NumberU64() isOutOfSync := currentHeight+inSyncThreshold < otherHeight2 diff --git a/api/service/stagedsync/sync_config.go b/api/service/stagedsync/sync_config.go index f42737cc1..91b3a4d73 100644 --- a/api/service/stagedsync/sync_config.go +++ b/api/service/stagedsync/sync_config.go @@ -14,6 +14,8 @@ import ( "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/p2p" + + libp2p_peer "github.com/libp2p/go-libp2p/core/peer" ) // Constants for syncing. @@ -40,6 +42,7 @@ const ( // SyncPeerConfig is peer config to sync. type SyncPeerConfig struct { + peer p2p.Peer ip string port string peerHash []byte @@ -156,6 +159,7 @@ type SyncConfig struct { mtx sync.RWMutex reservedPeers []*SyncPeerConfig peers []*SyncPeerConfig + selfPeerID libp2p_peer.ID } // AddPeer adds the given sync peer. @@ -168,6 +172,9 @@ func (sc *SyncConfig) AddPeer(peer *SyncPeerConfig) { if peer.IsEqual(p2) { return } + if peer.peer.PeerID == sc.selfPeerID { + return + } } sc.peers = append(sc.peers, peer) } diff --git a/api/service/stagedsync/sync_status.go b/api/service/stagedsync/sync_status.go index 556f1058b..3f1bf11e8 100644 --- a/api/service/stagedsync/sync_status.go +++ b/api/service/stagedsync/sync_status.go @@ -1,6 +1,7 @@ package stagedsync import ( + "math" "sync" "time" @@ -75,7 +76,9 @@ func (status *syncStatus) Get(fallback func() SyncCheckResult) SyncCheckResult { defer status.lock.Unlock() if status.expired() { result := fallback() - status.update(result) + if result.OtherHeight > 0 && result.OtherHeight < uint64(math.MaxUint64) { + status.update(result) + } } return status.lastResult } diff --git a/cmd/bootnode/main.go b/cmd/bootnode/main.go index 6e3a8bbbc..92d6674b9 100644 --- a/cmd/bootnode/main.go +++ b/cmd/bootnode/main.go @@ -102,6 +102,7 @@ func main() { verbosity := flag.Int("verbosity", 5, "Logging verbosity: 0=silent, 1=error, 2=warn, 3=info, 4=debug, 5=detail (default: 5)") logConn := flag.Bool("log_conn", false, "log incoming/outgoing connections") maxConnPerIP := flag.Int("max_conn_per_ip", 10, "max connections number for same ip") + forceReachabilityPublic := flag.Bool("force_public", false, "forcing the local node to believe it is reachable externally") flag.Parse() @@ -124,12 +125,14 @@ func main() { // For bootstrap nodes, we shall keep .dht file. dataStorePath := fmt.Sprintf(".dht-%s-%s", *ip, *port) selfPeer := p2p.Peer{IP: *ip, Port: *port} + host, err := p2p.NewHost(p2p.HostConfig{ - Self: &selfPeer, - BLSKey: privKey, - BootNodes: nil, // Boot nodes have no boot nodes :) Will be connected when other nodes joined - DataStoreFile: &dataStorePath, - MaxConnPerIP: *maxConnPerIP, + Self: &selfPeer, + BLSKey: privKey, + BootNodes: nil, // Boot nodes have no boot nodes :) Will be connected when other nodes joined + DataStoreFile: &dataStorePath, + MaxConnPerIP: *maxConnPerIP, + ForceReachabilityPublic: *forceReachabilityPublic, }) if err != nil { utils.FatalErrMsg(err, "cannot initialize network") diff --git a/cmd/harmony/config.go b/cmd/harmony/config.go index dea5fb2e3..30c72c2e2 100644 --- a/cmd/harmony/config.go +++ b/cmd/harmony/config.go @@ -139,6 +139,8 @@ func getDefaultSyncConfig(nt nodeconfig.NetworkType) harmonyconfig.SyncConfig { return defaultTestNetSyncConfig case nodeconfig.Localnet: return defaultLocalNetSyncConfig + case nodeconfig.Partner: + return defaultPartnerSyncConfig default: return defaultElseSyncConfig } diff --git a/cmd/harmony/config_migrations.go b/cmd/harmony/config_migrations.go index eb98ccb79..b3da9ec2b 100644 --- a/cmd/harmony/config_migrations.go +++ b/cmd/harmony/config_migrations.go @@ -318,6 +318,45 @@ func init() { return confTree } + migrations["2.5.10"] = func(confTree *toml.Tree) *toml.Tree { + if confTree.Get("P2P.ConnManagerLowWatermark") == nil { + confTree.Set("P2P.ConnManagerLowWatermark", defaultConfig.P2P.ConnManagerLowWatermark) + } + if confTree.Get("P2P.ConnManagerHighWatermark") == nil { + confTree.Set("P2P.ConnManagerHighWatermark", defaultConfig.P2P.ConnManagerHighWatermark) + } + if confTree.Get("Sync.MaxAdvertiseWaitTime") == nil { + confTree.Set("Sync.MaxAdvertiseWaitTime", defaultConfig.Sync.MaxAdvertiseWaitTime) + } + confTree.Set("Version", "2.5.11") + return confTree + } + + migrations["2.5.11"] = func(confTree *toml.Tree) *toml.Tree { + if confTree.Get("General.TriesInMemory") == nil { + confTree.Set("General.TriesInMemory", defaultConfig.General.TriesInMemory) + } + confTree.Set("Version", "2.5.12") + return confTree + } + + migrations["2.5.12"] = func(confTree *toml.Tree) *toml.Tree { + if confTree.Get("HTTP.ReadTimeout") == nil { + confTree.Set("HTTP.ReadTimeout", defaultConfig.HTTP.ReadTimeout) + } + if confTree.Get("HTTP.WriteTimeout") == nil { + confTree.Set("HTTP.WriteTimeout", defaultConfig.HTTP.WriteTimeout) + } + if confTree.Get("HTTP.IdleTimeout") == nil { + confTree.Set("HTTP.IdleTimeout", defaultConfig.HTTP.IdleTimeout) + } + if confTree.Get("RPCOpt.EvmCallTimeout") == nil { + confTree.Set("RPCOpt.EvmCallTimeout", defaultConfig.RPCOpt.EvmCallTimeout) + } + confTree.Set("Version", "2.5.13") + return confTree + } + // check that the latest version here is the same as in default.go largestKey := getNextVersion(migrations) if largestKey != tomlConfigVersion { diff --git a/cmd/harmony/default.go b/cmd/harmony/default.go index 7de12af9c..95e05b29c 100644 --- a/cmd/harmony/default.go +++ b/cmd/harmony/default.go @@ -5,7 +5,7 @@ import ( nodeconfig "github.com/harmony-one/harmony/internal/configs/node" ) -const tomlConfigVersion = "2.5.10" +const tomlConfigVersion = "2.5.13" const ( defNetworkType = nodeconfig.Mainnet @@ -22,6 +22,7 @@ var defaultConfig = harmonyconfig.HarmonyConfig{ IsOffline: false, DataDir: "./", TraceEnable: false, + TriesInMemory: 128, }, Network: getDefaultNetworkConfig(defNetworkType), P2P: harmonyconfig.P2pConfig{ @@ -32,6 +33,8 @@ var defaultConfig = harmonyconfig.HarmonyConfig{ MaxConnsPerIP: nodeconfig.DefaultMaxConnPerIP, DisablePrivateIPScan: false, MaxPeers: nodeconfig.DefaultMaxPeers, + ConnManagerLowWatermark: nodeconfig.DefaultConnManagerLowWatermark, + ConnManagerHighWatermark: nodeconfig.DefaultConnManagerHighWatermark, WaitForEachPeerToConnect: nodeconfig.DefaultWaitForEachPeerToConnect, }, HTTP: harmonyconfig.HttpConfig{ @@ -41,6 +44,9 @@ var defaultConfig = harmonyconfig.HarmonyConfig{ Port: nodeconfig.DefaultRPCPort, AuthPort: nodeconfig.DefaultAuthRPCPort, RosettaPort: nodeconfig.DefaultRosettaPort, + ReadTimeout: nodeconfig.DefaultHTTPTimeoutRead, + WriteTimeout: nodeconfig.DefaultHTTPTimeoutWrite, + IdleTimeout: nodeconfig.DefaultHTTPTimeoutIdle, }, WS: harmonyconfig.WsConfig{ Enabled: true, @@ -56,6 +62,7 @@ var defaultConfig = harmonyconfig.HarmonyConfig{ RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: nodeconfig.DefaultRPCRateLimit, + EvmCallTimeout: nodeconfig.DefaultEvmCallTimeout, }, BLSKeys: harmonyconfig.BlsConfig{ KeyDir: "./.hmy/blskeys", @@ -159,59 +166,78 @@ var defaultStagedSyncConfig = harmonyconfig.StagedSyncConfig{ var ( defaultMainnetSyncConfig = harmonyconfig.SyncConfig{ - Enabled: false, - Downloader: false, - StagedSync: false, - StagedSyncCfg: defaultStagedSyncConfig, - Concurrency: 6, - MinPeers: 6, - InitStreams: 8, - DiscSoftLowCap: 8, - DiscHardLowCap: 6, - DiscHighCap: 128, - DiscBatch: 8, + Enabled: false, + Downloader: false, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, + Concurrency: 6, + MinPeers: 6, + InitStreams: 8, + MaxAdvertiseWaitTime: 60, //minutes + DiscSoftLowCap: 8, + DiscHardLowCap: 6, + DiscHighCap: 128, + DiscBatch: 8, } defaultTestNetSyncConfig = harmonyconfig.SyncConfig{ - Enabled: true, - Downloader: false, - StagedSync: false, - StagedSyncCfg: defaultStagedSyncConfig, - Concurrency: 2, - MinPeers: 2, - InitStreams: 2, - DiscSoftLowCap: 2, - DiscHardLowCap: 2, - DiscHighCap: 1024, - DiscBatch: 3, + Enabled: true, + Downloader: false, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, + Concurrency: 2, + MinPeers: 2, + InitStreams: 2, + MaxAdvertiseWaitTime: 5, //minutes + DiscSoftLowCap: 2, + DiscHardLowCap: 2, + DiscHighCap: 1024, + DiscBatch: 3, } defaultLocalNetSyncConfig = harmonyconfig.SyncConfig{ - Enabled: true, - Downloader: true, - StagedSync: false, - StagedSyncCfg: defaultStagedSyncConfig, - Concurrency: 4, - MinPeers: 5, - InitStreams: 5, - DiscSoftLowCap: 5, - DiscHardLowCap: 5, - DiscHighCap: 1024, - DiscBatch: 8, + Enabled: true, + Downloader: true, + StagedSync: true, + StagedSyncCfg: defaultStagedSyncConfig, + Concurrency: 4, + MinPeers: 4, + InitStreams: 4, + MaxAdvertiseWaitTime: 5, //minutes + DiscSoftLowCap: 4, + DiscHardLowCap: 4, + DiscHighCap: 1024, + DiscBatch: 8, + } + + defaultPartnerSyncConfig = harmonyconfig.SyncConfig{ + Enabled: true, + Downloader: true, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, + Concurrency: 4, + MinPeers: 2, + InitStreams: 2, + MaxAdvertiseWaitTime: 2, //minutes + DiscSoftLowCap: 2, + DiscHardLowCap: 2, + DiscHighCap: 1024, + DiscBatch: 4, } defaultElseSyncConfig = harmonyconfig.SyncConfig{ - Enabled: true, - Downloader: true, - StagedSync: false, - StagedSyncCfg: defaultStagedSyncConfig, - Concurrency: 4, - MinPeers: 4, - InitStreams: 4, - DiscSoftLowCap: 4, - DiscHardLowCap: 4, - DiscHighCap: 1024, - DiscBatch: 8, + Enabled: true, + Downloader: true, + StagedSync: false, + StagedSyncCfg: defaultStagedSyncConfig, + Concurrency: 4, + MinPeers: 4, + InitStreams: 4, + MaxAdvertiseWaitTime: 2, //minutes + DiscSoftLowCap: 4, + DiscHardLowCap: 4, + DiscHighCap: 1024, + DiscBatch: 8, } ) diff --git a/cmd/harmony/flags.go b/cmd/harmony/flags.go index 26a250d95..8a2799ce2 100644 --- a/cmd/harmony/flags.go +++ b/cmd/harmony/flags.go @@ -31,6 +31,7 @@ var ( legacyDataDirFlag, taraceFlag, + triesInMemoryFlag, } dnsSyncFlags = []cli.Flag{ @@ -63,6 +64,8 @@ var ( p2pDisablePrivateIPScanFlag, maxConnPerIPFlag, maxPeersFlag, + connManagerLowWatermarkFlag, + connManagerHighWatermarkFlag, } httpFlags = []cli.Flag{ @@ -72,6 +75,9 @@ var ( httpPortFlag, httpAuthPortFlag, httpRosettaPortFlag, + httpReadTimeoutFlag, + httpWriteTimeoutFlag, + httpIdleTimeoutFlag, } wsFlags = []cli.Flag{ @@ -89,6 +95,7 @@ var ( rpcFilterFileFlag, rpcRateLimiterEnabledFlag, rpcRateLimitFlag, + rpcEvmCallTimeoutFlag, } blsFlags = append(newBLSFlags, legacyBLSFlags...) @@ -320,6 +327,11 @@ var ( Usage: "indicates if full transaction tracing should be enabled", DefValue: defaultConfig.General.TraceEnable, } + triesInMemoryFlag = cli.IntFlag{ + Name: "blockchain.tries_in_memory", + Usage: "number of blocks from header stored in disk before exiting", + DefValue: defaultConfig.General.TriesInMemory, + } ) func getRootFlags() []cli.Flag { @@ -397,6 +409,14 @@ func applyGeneralFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) if cli.IsFlagChanged(cmd, isBackupFlag) { config.General.IsBackup = cli.GetBoolFlagValue(cmd, isBackupFlag) } + + if cli.IsFlagChanged(cmd, triesInMemoryFlag) { + value := cli.GetIntFlagValue(cmd, triesInMemoryFlag) + if value <= 2 { + panic("Must provide number greater than 2 for General.TriesInMemory") + } + config.General.TriesInMemory = value + } } // network flags @@ -579,6 +599,16 @@ var ( Usage: "maximum number of peers allowed, 0 means no limit", DefValue: defaultConfig.P2P.MaxConnsPerIP, } + connManagerLowWatermarkFlag = cli.IntFlag{ + Name: "p2p.connmgr-low", + Usage: "lowest number of connections that'll be maintained in connection manager. Set both high and low watermarks to zero to disable connection manager", + DefValue: defaultConfig.P2P.ConnManagerLowWatermark, + } + connManagerHighWatermarkFlag = cli.IntFlag{ + Name: "p2p.connmgr-high", + Usage: "highest number of connections that'll be maintained in connection manager. Set both high and low watermarks to zero to disable connection manager", + DefValue: defaultConfig.P2P.ConnManagerHighWatermark, + } waitForEachPeerToConnectFlag = cli.BoolFlag{ Name: "p2p.wait-for-connections", Usage: "node waits for each single peer to connect and it doesn't add them to peers list after timeout", @@ -624,6 +654,14 @@ func applyP2PFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { config.P2P.WaitForEachPeerToConnect = cli.GetBoolFlagValue(cmd, waitForEachPeerToConnectFlag) } + if cli.IsFlagChanged(cmd, connManagerLowWatermarkFlag) { + config.P2P.ConnManagerLowWatermark = cli.GetIntFlagValue(cmd, connManagerLowWatermarkFlag) + } + + if cli.IsFlagChanged(cmd, connManagerHighWatermarkFlag) { + config.P2P.ConnManagerHighWatermark = cli.GetIntFlagValue(cmd, connManagerHighWatermarkFlag) + } + if cli.IsFlagChanged(cmd, p2pDisablePrivateIPScanFlag) { config.P2P.DisablePrivateIPScan = cli.GetBoolFlagValue(cmd, p2pDisablePrivateIPScanFlag) } @@ -661,6 +699,21 @@ var ( Usage: "rosetta port to listen for HTTP requests", DefValue: defaultConfig.HTTP.RosettaPort, } + httpReadTimeoutFlag = cli.StringFlag{ + Name: "http.timeout.read", + Usage: "maximum duration to read the entire request, including the body", + DefValue: defaultConfig.HTTP.ReadTimeout, + } + httpWriteTimeoutFlag = cli.StringFlag{ + Name: "http.timeout.write", + Usage: "maximum duration before timing out writes of the response", + DefValue: defaultConfig.HTTP.WriteTimeout, + } + httpIdleTimeoutFlag = cli.StringFlag{ + Name: "http.timeout.idle", + Usage: "maximum amount of time to wait for the next request when keep-alives are enabled", + DefValue: defaultConfig.HTTP.IdleTimeout, + } ) func applyHTTPFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { @@ -698,6 +751,16 @@ func applyHTTPFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { config.HTTP.Enabled = true } + if cli.IsFlagChanged(cmd, httpReadTimeoutFlag) { + config.HTTP.ReadTimeout = cli.GetStringFlagValue(cmd, httpReadTimeoutFlag) + } + if cli.IsFlagChanged(cmd, httpWriteTimeoutFlag) { + config.HTTP.WriteTimeout = cli.GetStringFlagValue(cmd, httpWriteTimeoutFlag) + } + if cli.IsFlagChanged(cmd, httpIdleTimeoutFlag) { + config.HTTP.IdleTimeout = cli.GetStringFlagValue(cmd, httpIdleTimeoutFlag) + } + } // ws flags @@ -787,6 +850,12 @@ var ( Usage: "the number of requests per second for RPCs", DefValue: defaultConfig.RPCOpt.RequestsPerSecond, } + + rpcEvmCallTimeoutFlag = cli.StringFlag{ + Name: "rpc.evm-call-timeout", + Usage: "timeout for evm execution (eth_call); 0 means infinite timeout", + DefValue: defaultConfig.RPCOpt.EvmCallTimeout, + } ) func applyRPCOptFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { @@ -811,7 +880,9 @@ func applyRPCOptFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { if cli.IsFlagChanged(cmd, rpcRateLimitFlag) { config.RPCOpt.RequestsPerSecond = cli.GetIntFlagValue(cmd, rpcRateLimitFlag) } - + if cli.IsFlagChanged(cmd, rpcEvmCallTimeoutFlag) { + config.RPCOpt.EvmCallTimeout = cli.GetStringFlagValue(cmd, rpcEvmCallTimeoutFlag) + } } // bls flags @@ -1692,6 +1763,11 @@ var ( Usage: "Initial shard-wise number of peers to start syncing", Hidden: true, } + syncMaxAdvertiseWaitTimeFlag = cli.IntFlag{ + Name: "sync.max-advertise-wait-time", + Usage: "The max time duration between two advertises for each p2p peer to tell other nodes what protocols it supports", + Hidden: true, + } syncDiscSoftLowFlag = cli.IntFlag{ Name: "sync.disc.soft-low-cap", Usage: "Soft low cap for sync stream management", @@ -1740,6 +1816,10 @@ func applySyncFlags(cmd *cobra.Command, config *harmonyconfig.HarmonyConfig) { config.Sync.InitStreams = cli.GetIntFlagValue(cmd, syncInitStreamsFlag) } + if cli.IsFlagChanged(cmd, syncMaxAdvertiseWaitTimeFlag) { + config.Sync.MaxAdvertiseWaitTime = cli.GetIntFlagValue(cmd, syncMaxAdvertiseWaitTimeFlag) + } + if cli.IsFlagChanged(cmd, syncDiscSoftLowFlag) { config.Sync.DiscSoftLowCap = cli.GetIntFlagValue(cmd, syncDiscSoftLowFlag) } diff --git a/cmd/harmony/flags_test.go b/cmd/harmony/flags_test.go index 7bd3e7199..2015188ed 100644 --- a/cmd/harmony/flags_test.go +++ b/cmd/harmony/flags_test.go @@ -35,11 +35,12 @@ func TestHarmonyFlags(t *testing.T) { expConfig: harmonyconfig.HarmonyConfig{ Version: tomlConfigVersion, General: harmonyconfig.GeneralConfig{ - NodeType: "validator", - NoStaking: false, - ShardID: -1, - IsArchival: false, - DataDir: "./", + NodeType: "validator", + NoStaking: false, + ShardID: -1, + IsArchival: false, + DataDir: "./", + TriesInMemory: 128, }, Network: harmonyconfig.NetworkConfig{ NetworkType: "mainnet", @@ -65,6 +66,8 @@ func TestHarmonyFlags(t *testing.T) { MaxConnsPerIP: 5, DisablePrivateIPScan: false, MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: false, }, HTTP: harmonyconfig.HttpConfig{ @@ -74,6 +77,9 @@ func TestHarmonyFlags(t *testing.T) { AuthPort: 9501, RosettaEnabled: false, RosettaPort: 9700, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, RPCOpt: harmonyconfig.RpcOptConfig{ DebugEnabled: false, @@ -83,6 +89,7 @@ func TestHarmonyFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, WS: harmonyconfig.WsConfig{ Enabled: true, @@ -184,63 +191,80 @@ func TestGeneralFlags(t *testing.T) { { args: []string{}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "validator", - NoStaking: false, - ShardID: -1, - IsArchival: false, - DataDir: "./", + NodeType: "validator", + NoStaking: false, + ShardID: -1, + IsArchival: false, + DataDir: "./", + TriesInMemory: 128, }, }, { args: []string{"--run", "explorer", "--run.legacy", "--run.shard=0", "--run.archive=true", "--datadir=./.hmy"}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "explorer", - NoStaking: true, - ShardID: 0, - IsArchival: true, - DataDir: "./.hmy", + NodeType: "explorer", + NoStaking: true, + ShardID: 0, + IsArchival: true, + DataDir: "./.hmy", + TriesInMemory: 128, }, }, { args: []string{"--node_type", "explorer", "--staking", "--shard_id", "0", "--is_archival", "--db_dir", "./"}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "explorer", - NoStaking: false, - ShardID: 0, - IsArchival: true, - DataDir: "./", + NodeType: "explorer", + NoStaking: false, + ShardID: 0, + IsArchival: true, + DataDir: "./", + TriesInMemory: 128, }, }, { args: []string{"--staking=false", "--is_archival=false"}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "validator", - NoStaking: true, - ShardID: -1, - IsArchival: false, - DataDir: "./", + NodeType: "validator", + NoStaking: true, + ShardID: -1, + IsArchival: false, + DataDir: "./", + TriesInMemory: 128, }, }, { args: []string{"--run", "explorer", "--run.shard", "0"}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "explorer", - NoStaking: false, - ShardID: 0, - IsArchival: false, - DataDir: "./", + NodeType: "explorer", + NoStaking: false, + ShardID: 0, + IsArchival: false, + DataDir: "./", + TriesInMemory: 128, }, }, { args: []string{"--run", "explorer", "--run.shard", "0", "--run.archive=false"}, expConfig: harmonyconfig.GeneralConfig{ - NodeType: "explorer", - NoStaking: false, - ShardID: 0, - IsArchival: false, - DataDir: "./", + NodeType: "explorer", + NoStaking: false, + ShardID: 0, + IsArchival: false, + DataDir: "./", + TriesInMemory: 128, + }, + }, + { + args: []string{"--blockchain.tries_in_memory", "64"}, + expConfig: harmonyconfig.GeneralConfig{ + NodeType: "validator", + NoStaking: false, + ShardID: -1, + IsArchival: false, + DataDir: "./", + TriesInMemory: 64, }, }, } @@ -374,6 +398,8 @@ func TestP2PFlags(t *testing.T) { MaxConnsPerIP: 10, DisablePrivateIPScan: false, MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: false, }, }, @@ -386,6 +412,8 @@ func TestP2PFlags(t *testing.T) { MaxConnsPerIP: 10, DisablePrivateIPScan: false, MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: false, }, }, @@ -399,6 +427,8 @@ func TestP2PFlags(t *testing.T) { MaxConnsPerIP: 5, DisablePrivateIPScan: false, MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: false, }, }, @@ -412,6 +442,8 @@ func TestP2PFlags(t *testing.T) { MaxConnsPerIP: nodeconfig.DefaultMaxConnPerIP, DisablePrivateIPScan: true, MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: false, }, }, @@ -425,6 +457,38 @@ func TestP2PFlags(t *testing.T) { MaxConnsPerIP: nodeconfig.DefaultMaxConnPerIP, DisablePrivateIPScan: defaultConfig.P2P.DisablePrivateIPScan, MaxPeers: 100, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, + WaitForEachPeerToConnect: false, + }, + }, + { + args: []string{"--p2p.connmgr-low", "100"}, + expConfig: harmonyconfig.P2pConfig{ + Port: nodeconfig.DefaultP2PPort, + IP: nodeconfig.DefaultPublicListenIP, + KeyFile: "./.hmykey", + DiscConcurrency: nodeconfig.DefaultP2PConcurrency, + MaxConnsPerIP: nodeconfig.DefaultMaxConnPerIP, + DisablePrivateIPScan: defaultConfig.P2P.DisablePrivateIPScan, + MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: 100, + ConnManagerHighWatermark: defaultConfig.P2P.ConnManagerHighWatermark, + WaitForEachPeerToConnect: false, + }, + }, + { + args: []string{"--p2p.connmgr-high", "400"}, + expConfig: harmonyconfig.P2pConfig{ + Port: nodeconfig.DefaultP2PPort, + IP: nodeconfig.DefaultPublicListenIP, + KeyFile: "./.hmykey", + DiscConcurrency: nodeconfig.DefaultP2PConcurrency, + MaxConnsPerIP: nodeconfig.DefaultMaxConnPerIP, + DisablePrivateIPScan: defaultConfig.P2P.DisablePrivateIPScan, + MaxPeers: defaultConfig.P2P.MaxPeers, + ConnManagerLowWatermark: defaultConfig.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: 400, WaitForEachPeerToConnect: false, }, }, @@ -471,6 +535,9 @@ func TestRPCFlags(t *testing.T) { Port: defaultConfig.HTTP.Port, AuthPort: defaultConfig.HTTP.AuthPort, RosettaPort: defaultConfig.HTTP.RosettaPort, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, }, { @@ -482,6 +549,9 @@ func TestRPCFlags(t *testing.T) { Port: 9001, AuthPort: defaultConfig.HTTP.AuthPort, RosettaPort: defaultConfig.HTTP.RosettaPort, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, }, { @@ -493,6 +563,9 @@ func TestRPCFlags(t *testing.T) { Port: defaultConfig.HTTP.Port, AuthPort: 9001, RosettaPort: defaultConfig.HTTP.RosettaPort, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, }, { @@ -504,6 +577,9 @@ func TestRPCFlags(t *testing.T) { Port: 9001, AuthPort: defaultConfig.HTTP.AuthPort, RosettaPort: 10001, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, }, { @@ -515,6 +591,9 @@ func TestRPCFlags(t *testing.T) { Port: defaultConfig.HTTP.Port, AuthPort: defaultConfig.HTTP.AuthPort, RosettaPort: 10001, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, }, }, { @@ -526,6 +605,23 @@ func TestRPCFlags(t *testing.T) { Port: 9501, AuthPort: 9502, RosettaPort: 9701, + ReadTimeout: defaultConfig.HTTP.ReadTimeout, + WriteTimeout: defaultConfig.HTTP.WriteTimeout, + IdleTimeout: defaultConfig.HTTP.IdleTimeout, + }, + }, + { + args: []string{"--http.timeout.read", "10s", "--http.timeout.write", "20s", "--http.timeout.idle", "30s"}, + expConfig: harmonyconfig.HttpConfig{ + Enabled: true, + RosettaEnabled: false, + IP: defaultConfig.HTTP.IP, + Port: defaultConfig.HTTP.Port, + AuthPort: defaultConfig.HTTP.AuthPort, + RosettaPort: defaultConfig.HTTP.RosettaPort, + ReadTimeout: "10s", + WriteTimeout: "20s", + IdleTimeout: "30s", }, }, } @@ -639,6 +735,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -652,6 +749,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -665,6 +763,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -678,6 +777,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -691,6 +791,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./rmf.toml", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -704,6 +805,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 1000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -717,6 +819,7 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: true, RequestsPerSecond: 2000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, }, }, @@ -730,6 +833,21 @@ func TestRPCOptFlags(t *testing.T) { RpcFilterFile: "./.hmy/rpc_filter.txt", RateLimterEnabled: false, RequestsPerSecond: 2000, + EvmCallTimeout: defaultConfig.RPCOpt.EvmCallTimeout, + }, + }, + + { + args: []string{"--rpc.evm-call-timeout", "10s"}, + expConfig: harmonyconfig.RpcOptConfig{ + DebugEnabled: false, + EthRPCsEnabled: true, + StakingRPCsEnabled: true, + LegacyRPCsEnabled: true, + RpcFilterFile: "./.hmy/rpc_filter.txt", + RateLimterEnabled: true, + RequestsPerSecond: 1000, + EvmCallTimeout: "10s", }, }, } diff --git a/cmd/harmony/main.go b/cmd/harmony/main.go index da51d9c52..f01cb758e 100644 --- a/cmd/harmony/main.go +++ b/cmd/harmony/main.go @@ -39,6 +39,7 @@ import ( "github.com/harmony-one/harmony/api/service" "github.com/harmony-one/harmony/api/service/pprof" "github.com/harmony-one/harmony/api/service/prometheus" + "github.com/harmony-one/harmony/api/service/stagedstreamsync" "github.com/harmony-one/harmony/api/service/synchronize" "github.com/harmony-one/harmony/common/fdlimit" "github.com/harmony-one/harmony/common/ntp" @@ -333,23 +334,7 @@ func setupNodeAndRun(hc harmonyconfig.HarmonyConfig) { } // Parse RPC config - nodeConfig.RPCServer = nodeconfig.RPCServerConfig{ - HTTPEnabled: hc.HTTP.Enabled, - HTTPIp: hc.HTTP.IP, - HTTPPort: hc.HTTP.Port, - HTTPAuthPort: hc.HTTP.AuthPort, - WSEnabled: hc.WS.Enabled, - WSIp: hc.WS.IP, - WSPort: hc.WS.Port, - WSAuthPort: hc.WS.AuthPort, - DebugEnabled: hc.RPCOpt.DebugEnabled, - EthRPCsEnabled: hc.RPCOpt.EthRPCsEnabled, - StakingRPCsEnabled: hc.RPCOpt.StakingRPCsEnabled, - LegacyRPCsEnabled: hc.RPCOpt.LegacyRPCsEnabled, - RpcFilterFile: hc.RPCOpt.RpcFilterFile, - RateLimiterEnabled: hc.RPCOpt.RateLimterEnabled, - RequestsPerSecond: hc.RPCOpt.RequestsPerSecond, - } + nodeConfig.RPCServer = hc.ToRPCServerConfig() // Parse rosetta config nodeConfig.RosettaServer = nodeconfig.RosettaServerConfig{ @@ -415,7 +400,11 @@ func setupNodeAndRun(hc harmonyconfig.HarmonyConfig) { // Setup services if hc.Sync.Enabled { - setupSyncService(currentNode, myHost, hc) + if hc.Sync.StagedSync { + setupStagedSyncService(currentNode, myHost, hc) + } else { + setupSyncService(currentNode, myHost, hc) + } } if currentNode.NodeConfig.Role() == nodeconfig.Validator { currentNode.RegisterValidatorServices() @@ -624,6 +613,13 @@ func createGlobalConfig(hc harmonyconfig.HarmonyConfig) (*nodeconfig.ConfigType, Port: strconv.Itoa(hc.P2P.Port), ConsensusPubKey: nodeConfig.ConsensusPriKey[0].Pub.Object, } + + // for local-net the node has to be forced to assume it is public reachable + forceReachabilityPublic := false + if hc.Network.NetworkType == nodeconfig.Localnet { + forceReachabilityPublic = true + } + myHost, err = p2p.NewHost(p2p.HostConfig{ Self: &selfPeer, BLSKey: nodeConfig.P2PPriKey, @@ -633,7 +629,10 @@ func createGlobalConfig(hc harmonyconfig.HarmonyConfig) (*nodeconfig.ConfigType, MaxConnPerIP: hc.P2P.MaxConnsPerIP, DisablePrivateIPScan: hc.P2P.DisablePrivateIPScan, MaxPeers: hc.P2P.MaxPeers, + ConnManagerLowWatermark: hc.P2P.ConnManagerLowWatermark, + ConnManagerHighWatermark: hc.P2P.ConnManagerHighWatermark, WaitForEachPeerToConnect: hc.P2P.WaitForEachPeerToConnect, + ForceReachabilityPublic: forceReachabilityPublic, }) if err != nil { return nil, errors.Wrap(err, "cannot create P2P network host") @@ -760,7 +759,8 @@ func setupConsensusAndNode(hc harmonyconfig.HarmonyConfig, nodeConfig *nodeconfi currentNode.SyncingPeerProvider = node.NewLocalSyncingPeerProvider( 6000, uint16(selfPort), epochConfig.NumShards(), uint32(epochConfig.NumNodesPerShard())) } else { - currentNode.SyncingPeerProvider = node.NewDNSSyncingPeerProvider(hc.DNSSync.Zone, strconv.Itoa(hc.DNSSync.Port)) + addrs := myHost.GetP2PHost().Addrs() + currentNode.SyncingPeerProvider = node.NewDNSSyncingPeerProvider(hc.DNSSync.Zone, strconv.Itoa(hc.DNSSync.Port), addrs) } currentNode.NodeConfig.DNSZone = hc.DNSSync.Zone @@ -769,7 +769,7 @@ func setupConsensusAndNode(hc harmonyconfig.HarmonyConfig, nodeConfig *nodeconfi ) nodeconfig.GetDefaultConfig().DBDir = nodeConfig.DBDir - processNodeType(hc, currentNode, currentConsensus) + currentConsensus.SetIsBackup(processNodeType(hc, currentNode)) currentNode.NodeConfig.SetShardGroupID(nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(nodeConfig.ShardID))) currentNode.NodeConfig.SetClientGroupID(nodeconfig.NewClientGroupIDByShardID(shard.BeaconChainShardID)) currentNode.NodeConfig.ConsensusPriKey = nodeConfig.ConsensusPriKey @@ -821,7 +821,7 @@ func setupTiKV(hc harmonyconfig.HarmonyConfig) shardchain.DBFactory { return factory } -func processNodeType(hc harmonyconfig.HarmonyConfig, currentNode *node.Node, currentConsensus *consensus.Consensus) { +func processNodeType(hc harmonyconfig.HarmonyConfig, currentNode *node.Node) (isBackup bool) { switch hc.General.NodeType { case nodeTypeExplorer: nodeconfig.SetDefaultRole(nodeconfig.ExplorerNode) @@ -831,10 +831,9 @@ func processNodeType(hc harmonyconfig.HarmonyConfig, currentNode *node.Node, cur nodeconfig.SetDefaultRole(nodeconfig.Validator) currentNode.NodeConfig.SetRole(nodeconfig.Validator) - if hc.General.IsBackup { - currentConsensus.SetIsBackup(true) - } + return hc.General.IsBackup } + return false } func setupPprofService(node *node.Node, hc harmonyconfig.HarmonyConfig) { @@ -874,8 +873,8 @@ func setupPrometheusService(node *node.Node, hc harmonyconfig.HarmonyConfig, sid func setupSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.HarmonyConfig) { blockchains := []core.BlockChain{node.Blockchain()} - if !node.IsRunningBeaconChain() { - blockchains = append(blockchains, node.Beaconchain()) + if node.Blockchain().ShardID() != shard.BeaconChainShardID { + blockchains = append(blockchains, node.EpochChain()) } dConfig := downloader.Config{ @@ -907,6 +906,46 @@ func setupSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.HarmonyCo } } +func setupStagedSyncService(node *node.Node, host p2p.Host, hc harmonyconfig.HarmonyConfig) { + blockchains := []core.BlockChain{node.Blockchain()} + if node.Blockchain().ShardID() != shard.BeaconChainShardID { + blockchains = append(blockchains, node.EpochChain()) + } + + sConfig := stagedstreamsync.Config{ + ServerOnly: !hc.Sync.Downloader, + Network: nodeconfig.NetworkType(hc.Network.NetworkType), + Concurrency: hc.Sync.Concurrency, + MinStreams: hc.Sync.MinPeers, + InitStreams: hc.Sync.InitStreams, + MaxAdvertiseWaitTime: hc.Sync.MaxAdvertiseWaitTime, + SmSoftLowCap: hc.Sync.DiscSoftLowCap, + SmHardLowCap: hc.Sync.DiscHardLowCap, + SmHiCap: hc.Sync.DiscHighCap, + SmDiscBatch: hc.Sync.DiscBatch, + LogProgress: node.NodeConfig.LogProgress, + } + + // If we are running side chain, we will need to do some extra works for beacon + // sync. + if !node.IsRunningBeaconChain() { + sConfig.BHConfig = &stagedstreamsync.BeaconHelperConfig{ + BlockC: node.BeaconBlockChannel, + InsertHook: node.BeaconSyncHook, + } + } + + //Setup stream sync service + s := stagedstreamsync.NewService(host, blockchains, sConfig) + + node.RegisterService(service.StagedStreamSync, s) + + d := s.Downloaders.GetShardDownloader(node.Blockchain().ShardID()) + if hc.Sync.Downloader && hc.General.NodeType != nodeTypeExplorer { + node.Consensus.SetDownloader(d) // Set downloader when stream client is active + } +} + func setupBlacklist(hc harmonyconfig.HarmonyConfig) (map[ethCommon.Address]struct{}, error) { rosetta_common.InitRosettaFile(hc.TxPool.RosettaFixFile) diff --git a/consensus/checks.go b/consensus/checks.go index ceaf9987b..28da66ad7 100644 --- a/consensus/checks.go +++ b/consensus/checks.go @@ -55,11 +55,11 @@ func (consensus *Consensus) senderKeySanityChecks(msg *msg_pb.Message, senderKey return true } -func (consensus *Consensus) isRightBlockNumAndViewID(recvMsg *FBFTMessage, -) bool { - if recvMsg.ViewID != consensus.GetCurBlockViewID() || recvMsg.BlockNum != consensus.BlockNum() { +func (consensus *Consensus) isRightBlockNumAndViewID(recvMsg *FBFTMessage) bool { + blockNum := consensus.getBlockNum() + if recvMsg.ViewID != consensus.getCurBlockViewID() || recvMsg.BlockNum != blockNum { consensus.getLogger().Debug(). - Uint64("blockNum", consensus.BlockNum()). + Uint64("blockNum", blockNum). Str("recvMsg", recvMsg.String()). Msg("BlockNum/viewID not match") return false @@ -87,7 +87,7 @@ func (consensus *Consensus) onAnnounceSanityChecks(recvMsg *FBFTMessage) bool { Str("recvMsg", recvMsg.String()). Str("LeaderKey", consensus.LeaderPubKey.Bytes.Hex()). Msg("[OnAnnounce] Leader is malicious") - if consensus.IsViewChangingMode() { + if consensus.isViewChangingMode() { consensus.getLogger().Debug().Msg( "[OnAnnounce] Already in ViewChanging mode, conflicing announce, doing noop", ) @@ -147,12 +147,12 @@ func (consensus *Consensus) onViewChangeSanityCheck(recvMsg *FBFTMessage) bool { consensus.getLogger().Debug(). Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("MyViewChangingID", consensus.GetViewChangingID()). + Uint64("MyViewChangingID", consensus.getViewChangingID()). Uint64("MsgViewChangingID", recvMsg.ViewID). Interface("SendPubKeys", recvMsg.SenderPubkeys). Msg("[onViewChangeSanityCheck]") - if consensus.BlockNum() > recvMsg.BlockNum { + if consensus.getBlockNum() > recvMsg.BlockNum { consensus.getLogger().Debug(). Msg("[onViewChange] Message BlockNum Is Low") return false @@ -162,14 +162,14 @@ func (consensus *Consensus) onViewChangeSanityCheck(recvMsg *FBFTMessage) bool { Msg("[onViewChangeSanityCheck] MsgBlockNum is different from my BlockNumber") return false } - if consensus.IsViewChangingMode() && - consensus.GetCurBlockViewID() > recvMsg.ViewID { - consensus.getLogger().Debug().Uint64("curBlockViewID", consensus.GetCurBlockViewID()). + if consensus.isViewChangingMode() && + consensus.getCurBlockViewID() > recvMsg.ViewID { + consensus.getLogger().Debug().Uint64("curBlockViewID", consensus.getCurBlockViewID()). Uint64("msgViewID", recvMsg.ViewID). Msg("[onViewChangeSanityCheck] ViewChanging ID Is Low") return false } - if recvMsg.ViewID > consensus.GetViewChangingID() && recvMsg.ViewID-consensus.GetViewChangingID() > MaxViewIDDiff { + if recvMsg.ViewID > consensus.getViewChangingID() && recvMsg.ViewID-consensus.getViewChangingID() > MaxViewIDDiff { consensus.getLogger().Debug(). Msg("[onViewChangeSanityCheck] Received viewID that is MaxViewIDDiff (249) further from the current viewID!") return false @@ -194,9 +194,9 @@ func (consensus *Consensus) onViewChangeSanityCheck(recvMsg *FBFTMessage) bool { // TODO: leo: move the sanity check to p2p message validation func (consensus *Consensus) onNewViewSanityCheck(recvMsg *FBFTMessage) bool { - if recvMsg.ViewID < consensus.GetCurBlockViewID() { + if recvMsg.ViewID < consensus.getCurBlockViewID() { consensus.getLogger().Warn(). - Uint64("LastSuccessfulConsensusViewID", consensus.GetCurBlockViewID()). + Uint64("LastSuccessfulConsensusViewID", consensus.getCurBlockViewID()). Uint64("MsgViewChangingID", recvMsg.ViewID). Msg("[onNewView] ViewID should be larger than the viewID of the last successful consensus") return false diff --git a/consensus/consensus.go b/consensus/consensus.go index 89897e372..e0cc591df 100644 --- a/consensus/consensus.go +++ b/consensus/consensus.go @@ -47,7 +47,7 @@ type Consensus struct { // FBFTLog stores the pbft messages and blocks during FBFT process FBFTLog *FBFTLog // phase: different phase of FBFT protocol: pre-prepare, prepare, commit, finish etc - phase *LockedFBFTPhase + phase FBFTPhase // current indicates what state a node is in current State // isBackup declarative the node is in backup mode @@ -61,14 +61,12 @@ type Consensus struct { commitBitmap *bls_cosi.Mask multiSigBitmap *bls_cosi.Mask // Bitmap for parsing multisig bitmap from validators - multiSigMutex sync.RWMutex // Registry for services. registry *registry.Registry // Minimal number of peers in the shard // If the number of validators is less than minPeers, the consensus won't start - MinPeers int - pubKeyLock sync.Mutex + MinPeers int // private/public keys of current node priKey multibls.PrivateKeys // the publickey of leader @@ -85,9 +83,7 @@ type Consensus struct { // IgnoreViewIDCheck determines whether to ignore viewID check IgnoreViewIDCheck *abool.AtomicBool // consensus mutex - mutex sync.Mutex - // mutex for verify new block - verifyBlockMutex sync.Mutex + mutex sync.RWMutex // ViewChange struct vc *viewChange // Signal channel for proposing a new block and start new consensus @@ -114,10 +110,6 @@ type Consensus struct { host p2p.Host // MessageSender takes are of sending consensus message and the corresponding retry logic. msgSender *MessageSender - // Used to convey to the consensus main loop that block syncing has finished. - syncReadyChan chan struct{} - // Used to convey to the consensus main loop that node is out of sync - syncNotReadyChan chan struct{} // If true, this consensus will not propose view change. disableViewChange bool // Have a dedicated reader thread pull from this chan, like in node @@ -136,6 +128,10 @@ type Consensus struct { finalityCounter atomic.Value //int64 dHelper *downloadHelper + + // Both flags only for initialization state. + start bool + isInitialLeader bool } // Blockchain returns the blockchain. @@ -144,7 +140,7 @@ func (consensus *Consensus) Blockchain() core.BlockChain { } // VerifyBlock is a function used to verify the block and keep trace of verified blocks. -func (consensus *Consensus) VerifyBlock(block *types.Block) error { +func (consensus *Consensus) verifyBlock(block *types.Block) error { if !consensus.FBFTLog.IsBlockVerified(block.Hash()) { if err := consensus.BlockVerifier(block); err != nil { return errors.Errorf("Block verification failed: %s", err) @@ -157,12 +153,14 @@ func (consensus *Consensus) VerifyBlock(block *types.Block) error { // BlocksSynchronized lets the main loop know that block synchronization finished // thus the blockchain is likely to be up to date. func (consensus *Consensus) BlocksSynchronized() { - consensus.syncReadyChan <- struct{}{} + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.syncReadyChan() } // BlocksNotSynchronized lets the main loop know that block is not synchronized func (consensus *Consensus) BlocksNotSynchronized() { - consensus.syncNotReadyChan <- struct{}{} + consensus.syncNotReadyChan() } // VdfSeedSize returns the number of VRFs for VDF computation @@ -172,21 +170,39 @@ func (consensus *Consensus) VdfSeedSize() int { // GetPublicKeys returns the public keys func (consensus *Consensus) GetPublicKeys() multibls.PublicKeys { + return consensus.getPublicKeys() +} + +func (consensus *Consensus) getPublicKeys() multibls.PublicKeys { return consensus.priKey.GetPublicKeys() } func (consensus *Consensus) GetLeaderPubKey() *bls_cosi.PublicKeyWrapper { - consensus.pubKeyLock.Lock() - defer consensus.pubKeyLock.Unlock() + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.getLeaderPubKey() +} + +func (consensus *Consensus) getLeaderPubKey() *bls_cosi.PublicKeyWrapper { return consensus.LeaderPubKey } +func (consensus *Consensus) SetLeaderPubKey(pub *bls_cosi.PublicKeyWrapper) { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.setLeaderPubKey(pub) +} + +func (consensus *Consensus) setLeaderPubKey(pub *bls_cosi.PublicKeyWrapper) { + consensus.LeaderPubKey = pub +} + func (consensus *Consensus) GetPrivateKeys() multibls.PrivateKeys { return consensus.priKey } // GetLeaderPrivateKey returns leader private key if node is the leader -func (consensus *Consensus) GetLeaderPrivateKey(leaderKey *bls_core.PublicKey) (*bls.PrivateKeyWrapper, error) { +func (consensus *Consensus) getLeaderPrivateKey(leaderKey *bls_core.PublicKey) (*bls.PrivateKeyWrapper, error) { for i, key := range consensus.priKey { if key.Pub.Object.IsEqual(leaderKey) { return &consensus.priKey[i], nil @@ -195,15 +211,15 @@ func (consensus *Consensus) GetLeaderPrivateKey(leaderKey *bls_core.PublicKey) ( return nil, errors.Wrapf(errLeaderPriKeyNotFound, leaderKey.SerializeToHexStr()) } -// GetConsensusLeaderPrivateKey returns consensus leader private key if node is the leader -func (consensus *Consensus) GetConsensusLeaderPrivateKey() (*bls.PrivateKeyWrapper, error) { - return consensus.GetLeaderPrivateKey(consensus.LeaderPubKey.Object) +// getConsensusLeaderPrivateKey returns consensus leader private key if node is the leader +func (consensus *Consensus) getConsensusLeaderPrivateKey() (*bls.PrivateKeyWrapper, error) { + return consensus.getLeaderPrivateKey(consensus.LeaderPubKey.Object) } // SetBlockVerifier sets the block verifier func (consensus *Consensus) SetBlockVerifier(verifier VerifyBlockFunc) { consensus.BlockVerifier = verifier - consensus.vc.SetVerifyBlock(consensus.VerifyBlock) + consensus.vc.SetVerifyBlock(consensus.verifyBlock) } func (consensus *Consensus) IsBackup() bool { @@ -214,13 +230,19 @@ func (consensus *Consensus) BlockNum() uint64 { return atomic.LoadUint64(&consensus.blockNum) } +func (consensus *Consensus) getBlockNum() uint64 { + return atomic.LoadUint64(&consensus.blockNum) +} + // New create a new Consensus record func New( host p2p.Host, shard uint32, multiBLSPriKey multibls.PrivateKeys, registry *registry.Registry, Decider quorum.Decider, minPeers int, aggregateSig bool, ) (*Consensus, error) { - consensus := Consensus{} + consensus := Consensus{ + ShardID: shard, + } consensus.Decider = Decider consensus.registry = registry consensus.MinPeers = minPeers @@ -230,7 +252,7 @@ func New( consensus.BlockNumLowChan = make(chan struct{}, 1) // FBFT related consensus.FBFTLog = NewFBFTLog() - consensus.phase = NewLockedFBFTPhase(FBFTAnnounce) + consensus.phase = FBFTAnnounce consensus.current = State{mode: Normal} // FBFT timeout consensus.consensusTimeout = createTimeout() @@ -249,8 +271,6 @@ func New( // displayed on explorer as Height right now consensus.SetCurBlockViewID(0) consensus.ShardID = shard - consensus.syncReadyChan = make(chan struct{}) - consensus.syncNotReadyChan = make(chan struct{}) consensus.SlashChan = make(chan slash.Record) consensus.ReadySignal = make(chan ProposalType) consensus.CommitSigChannel = make(chan []byte) diff --git a/consensus/consensus_fbft.go b/consensus/consensus_fbft.go deleted file mode 100644 index 313abf061..000000000 --- a/consensus/consensus_fbft.go +++ /dev/null @@ -1,30 +0,0 @@ -package consensus - -import "sync" - -type LockedFBFTPhase struct { - mu sync.Mutex - phase FBFTPhase -} - -func NewLockedFBFTPhase(initialPhrase FBFTPhase) *LockedFBFTPhase { - return &LockedFBFTPhase{ - phase: initialPhrase, - } -} - -func (a *LockedFBFTPhase) Set(phrase FBFTPhase) { - a.mu.Lock() - a.phase = phrase - a.mu.Unlock() -} - -func (a *LockedFBFTPhase) Get() FBFTPhase { - a.mu.Lock() - defer a.mu.Unlock() - return a.phase -} - -func (a *LockedFBFTPhase) String() string { - return a.Get().String() -} diff --git a/consensus/consensus_fbft_test.go b/consensus/consensus_fbft_test.go deleted file mode 100644 index a84cc3c83..000000000 --- a/consensus/consensus_fbft_test.go +++ /dev/null @@ -1,15 +0,0 @@ -package consensus - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestLockedFBFTPhase(t *testing.T) { - s := NewLockedFBFTPhase(FBFTAnnounce) - require.Equal(t, FBFTAnnounce, s.Get()) - - s.Set(FBFTCommit) - require.Equal(t, FBFTCommit, s.Get()) -} diff --git a/consensus/consensus_msg_sender.go b/consensus/consensus_msg_sender.go index 003d89cd9..ffa9d8b1c 100644 --- a/consensus/consensus_msg_sender.go +++ b/consensus/consensus_msg_sender.go @@ -67,7 +67,10 @@ func (sender *MessageSender) SendWithRetry(blockNum uint64, msgType msg_pb.Messa sender.Retry(&msgRetry) }() } - return sender.host.SendMessageToGroups(groups, p2pMsg) + // MessageSender lays inside consensus, but internally calls consensus public api. + // Tt would be deadlock if run in current thread. + go sender.host.SendMessageToGroups(groups, p2pMsg) + return nil } // DelayedSendWithRetry is similar to SendWithRetry but without the initial message sending but only retries. @@ -86,7 +89,10 @@ func (sender *MessageSender) DelayedSendWithRetry(blockNum uint64, msgType msg_p // SendWithoutRetry sends message without retry logic. func (sender *MessageSender) SendWithoutRetry(groups []nodeconfig.GroupID, p2pMsg []byte) error { - return sender.host.SendMessageToGroups(groups, p2pMsg) + // MessageSender lays inside consensus, but internally calls consensus public api. + // It would be deadlock if run in current thread. + go sender.host.SendMessageToGroups(groups, p2pMsg) + return nil } // Retry will retry the consensus message for times. diff --git a/consensus/consensus_service.go b/consensus/consensus_service.go index 310c9bb9d..0e4bb6814 100644 --- a/consensus/consensus_service.go +++ b/consensus/consensus_service.go @@ -7,6 +7,7 @@ import ( "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/crypto/bls" + "github.com/harmony-one/harmony/multibls" "github.com/ethereum/go-ethereum/common" protobuf "github.com/golang/protobuf/proto" @@ -73,8 +74,12 @@ func (consensus *Consensus) signAndMarshalConsensusMessage(message *msg_pb.Messa // UpdatePublicKeys updates the PublicKeys for // quorum on current subcommittee, protected by a mutex func (consensus *Consensus) UpdatePublicKeys(pubKeys, allowlist []bls_cosi.PublicKeyWrapper) int64 { - // TODO: use mutex for updating public keys pointer. No need to lock on all these logic. - consensus.pubKeyLock.Lock() + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + return consensus.updatePublicKeys(pubKeys, allowlist) +} + +func (consensus *Consensus) updatePublicKeys(pubKeys, allowlist []bls_cosi.PublicKeyWrapper) int64 { consensus.Decider.UpdateParticipants(pubKeys, allowlist) consensus.getLogger().Info().Msg("My Committee updated") for i := range pubKeys { @@ -93,24 +98,24 @@ func (consensus *Consensus) UpdatePublicKeys(pubKeys, allowlist []bls_cosi.Publi consensus.getLogger().Error(). Msg("[UpdatePublicKeys] Participants is empty") } - consensus.pubKeyLock.Unlock() + for i := range pubKeys { + consensus.getLogger().Info(). + Int("index", i). + Str("BLSPubKey", pubKeys[i].Bytes.Hex()). + Msg("Member") + } // reset states after update public keys // TODO: incorporate bitmaps in the decider, so their state can't be inconsistent. - consensus.UpdateBitmaps() - consensus.ResetState() + consensus.updateBitmaps() + consensus.resetState() // do not reset view change state if it is in view changing mode - if !consensus.IsViewChangingMode() { - consensus.ResetViewChangeState() + if !consensus.isViewChangingMode() { + consensus.resetViewChangeState() } return consensus.Decider.ParticipantsCount() } -// NewFaker returns a faker consensus. -func NewFaker() *Consensus { - return &Consensus{} -} - // Sign on the hash of the message func (consensus *Consensus) signMessage(message []byte, priKey *bls_core.SecretKey) []byte { hash := hash.Keccak256(message) @@ -133,7 +138,7 @@ func (consensus *Consensus) signConsensusMessage(message *msg_pb.Message, } // UpdateBitmaps update the bitmaps for prepare and commit phase -func (consensus *Consensus) UpdateBitmaps() { +func (consensus *Consensus) updateBitmaps() { consensus.getLogger().Debug(). Str("MessageType", consensus.phase.String()). Msg("[UpdateBitmaps] Updating consensus bitmaps") @@ -143,13 +148,12 @@ func (consensus *Consensus) UpdateBitmaps() { multiSigBitmap, _ := bls_cosi.NewMask(members, nil) consensus.prepareBitmap = prepareBitmap consensus.commitBitmap = commitBitmap - consensus.multiSigMutex.Lock() consensus.multiSigBitmap = multiSigBitmap - consensus.multiSigMutex.Unlock() + } // ResetState resets the state of the consensus -func (consensus *Consensus) ResetState() { +func (consensus *Consensus) resetState() { consensus.switchPhase("ResetState", FBFTAnnounce) consensus.blockHash = [32]byte{} @@ -167,11 +171,24 @@ func (consensus *Consensus) ResetState() { // IsValidatorInCommittee returns whether the given validator BLS address is part of my committee func (consensus *Consensus) IsValidatorInCommittee(pubKey bls.SerializedPublicKey) bool { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.isValidatorInCommittee(pubKey) +} + +func (consensus *Consensus) isValidatorInCommittee(pubKey bls.SerializedPublicKey) bool { return consensus.Decider.IndexOf(pubKey) != -1 } // SetMode sets the mode of consensus func (consensus *Consensus) SetMode(m Mode) { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.setMode(m) +} + +// SetMode sets the mode of consensus +func (consensus *Consensus) setMode(m Mode) { if m == Normal && consensus.isBackup { m = NormalBackup } @@ -193,6 +210,13 @@ func (consensus *Consensus) SetIsBackup(isBackup bool) { // Mode returns the mode of consensus func (consensus *Consensus) Mode() Mode { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.mode() +} + +// mode returns the mode of consensus +func (consensus *Consensus) mode() Mode { return consensus.current.Mode() } @@ -212,8 +236,8 @@ func (consensus *Consensus) checkViewID(msg *FBFTMessage) error { if consensus.IgnoreViewIDCheck.IsSet() { //in syncing mode, node accepts incoming messages without viewID/leaderKey checking //so only set mode to normal when new node enters consensus and need checking viewID - consensus.SetMode(Normal) - consensus.SetViewIDs(msg.ViewID) + consensus.setMode(Normal) + consensus.setViewIDs(msg.ViewID) if !msg.HasSingleSender() { return errors.New("Leader message can not have multiple sender keys") } @@ -224,9 +248,9 @@ func (consensus *Consensus) checkViewID(msg *FBFTMessage) error { Str("leaderKey", consensus.LeaderPubKey.Bytes.Hex()). Msg("[checkViewID] Start consensus timer") return nil - } else if msg.ViewID > consensus.GetCurBlockViewID() { + } else if msg.ViewID > consensus.getCurBlockViewID() { return consensus_engine.ErrViewIDNotMatch - } else if msg.ViewID < consensus.GetCurBlockViewID() { + } else if msg.ViewID < consensus.getCurBlockViewID() { return errors.New("view ID belongs to the past") } return nil @@ -237,17 +261,26 @@ func (consensus *Consensus) SetBlockNum(blockNum uint64) { atomic.StoreUint64(&consensus.blockNum, blockNum) } +// SetBlockNum sets the blockNum in consensus object, called at node bootstrap +func (consensus *Consensus) setBlockNum(blockNum uint64) { + atomic.StoreUint64(&consensus.blockNum, blockNum) +} + // ReadSignatureBitmapPayload read the payload for signature and bitmap; offset is the beginning position of reading -func (consensus *Consensus) ReadSignatureBitmapPayload( - recvPayload []byte, offset int, -) (*bls_core.Sign, *bls_cosi.Mask, error) { +func (consensus *Consensus) ReadSignatureBitmapPayload(recvPayload []byte, offset int) (*bls_core.Sign, *bls_cosi.Mask, error) { + consensus.mutex.RLock() + members := consensus.Decider.Participants() + consensus.mutex.RUnlock() + return consensus.readSignatureBitmapPayload(recvPayload, offset, members) +} + +func (consensus *Consensus) readSignatureBitmapPayload(recvPayload []byte, offset int, members multibls.PublicKeys) (*bls_core.Sign, *bls_cosi.Mask, error) { if offset+bls.BLSSignatureSizeInBytes > len(recvPayload) { return nil, nil, errors.New("payload not have enough length") } sigAndBitmapPayload := recvPayload[offset:] // TODO(audit): keep a Mask in the Decider so it won't be reconstructed on the fly. - members := consensus.Decider.Participants() return chain.ReadSignatureBitmapByPublicKeys( sigAndBitmapPayload, members, ) @@ -264,6 +297,12 @@ func (consensus *Consensus) ReadSignatureBitmapPayload( // (b) node in committed but has any err during processing: Syncing mode // (c) node in committed and everything looks good: Normal mode func (consensus *Consensus) UpdateConsensusInformation() Mode { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + return consensus.updateConsensusInformation() +} + +func (consensus *Consensus) updateConsensusInformation() Mode { curHeader := consensus.Blockchain().CurrentHeader() curEpoch := curHeader.Epoch() nextEpoch := new(big.Int).Add(curHeader.Epoch(), common.Big1) @@ -368,7 +407,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { consensus.getLogger().Info(). Int("numPubKeys", len(pubKeys)). Msg("[UpdateConsensusInformation] Successfully updated public keys") - consensus.UpdatePublicKeys(pubKeys, shard.Schedule.InstanceForEpoch(nextEpoch).ExternalAllowlist()) + consensus.updatePublicKeys(pubKeys, shard.Schedule.InstanceForEpoch(nextEpoch).ExternalAllowlist()) // Update voters in the committee if _, err := consensus.Decider.SetVoters( @@ -399,15 +438,13 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { consensus.getLogger().Info(). Str("leaderPubKey", leaderPubKey.Bytes.Hex()). Msg("[UpdateConsensusInformation] Most Recent LeaderPubKey Updated Based on BlockChain") - consensus.pubKeyLock.Lock() consensus.LeaderPubKey = leaderPubKey - consensus.pubKeyLock.Unlock() } } for _, key := range pubKeys { // in committee - myPubKeys := consensus.GetPublicKeys() + myPubKeys := consensus.getPublicKeys() if myPubKeys.Contains(key.Object) { if hasError { consensus.getLogger().Error(). @@ -419,7 +456,7 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { // If the leader changed and I myself become the leader if (oldLeader != nil && consensus.LeaderPubKey != nil && - !consensus.LeaderPubKey.Object.IsEqual(oldLeader.Object)) && consensus.IsLeader() { + !consensus.LeaderPubKey.Object.IsEqual(oldLeader.Object)) && consensus.isLeader() { go func() { consensus.getLogger().Info(). Str("myKey", myPubKeys.SerializeToHexStr()). @@ -440,9 +477,16 @@ func (consensus *Consensus) UpdateConsensusInformation() Mode { // IsLeader check if the node is a leader or not by comparing the public key of // the node with the leader public key func (consensus *Consensus) IsLeader() bool { - consensus.pubKeyLock.Lock() + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + + return consensus.isLeader() +} + +// isLeader check if the node is a leader or not by comparing the public key of +// the node with the leader public key. This function assume it runs under lock. +func (consensus *Consensus) isLeader() bool { obj := consensus.LeaderPubKey.Object - consensus.pubKeyLock.Unlock() for _, key := range consensus.priKey { if key.Pub.Object.IsEqual(obj) { return true @@ -454,12 +498,25 @@ func (consensus *Consensus) IsLeader() bool { // SetViewIDs set both current view ID and view changing ID to the height // of the blockchain. It is used during client startup to recover the state func (consensus *Consensus) SetViewIDs(height uint64) { - consensus.SetCurBlockViewID(height) - consensus.SetViewChangingID(height) + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.setViewIDs(height) +} + +// SetViewIDs set both current view ID and view changing ID to the height +// of the blockchain. It is used during client startup to recover the state +func (consensus *Consensus) setViewIDs(height uint64) { + consensus.setCurBlockViewID(height) + consensus.setViewChangingID(height) +} + +// SetCurBlockViewID set the current view ID +func (consensus *Consensus) SetCurBlockViewID(viewID uint64) uint64 { + return consensus.current.SetCurBlockViewID(viewID) } // SetCurBlockViewID set the current view ID -func (consensus *Consensus) SetCurBlockViewID(viewID uint64) { +func (consensus *Consensus) setCurBlockViewID(viewID uint64) { consensus.current.SetCurBlockViewID(viewID) } @@ -468,6 +525,11 @@ func (consensus *Consensus) SetViewChangingID(viewID uint64) { consensus.current.SetViewChangingID(viewID) } +// SetViewChangingID set the current view change ID +func (consensus *Consensus) setViewChangingID(viewID uint64) { + consensus.current.SetViewChangingID(viewID) +} + // StartFinalityCount set the finality counter to current time func (consensus *Consensus) StartFinalityCount() { consensus.finalityCounter.Store(time.Now().UnixNano()) @@ -492,8 +554,7 @@ func (consensus *Consensus) switchPhase(subject string, desired FBFTPhase) { Str("to:", desired.String()). Str("switchPhase:", subject) - consensus.phase.Set(desired) - return + consensus.phase = desired } var ( @@ -515,13 +576,13 @@ func (consensus *Consensus) selfCommit(payload []byte) error { return errGetPreparedBlock } - aggSig, mask, err := consensus.ReadSignatureBitmapPayload(payload, 32) + aggSig, mask, err := consensus.readSignatureBitmapPayload(payload, 32, consensus.Decider.Participants()) if err != nil { return errReadBitmapPayload } // Have to keep the block hash so the leader can finish the commit phase of prepared block - consensus.ResetState() + consensus.resetState() copy(consensus.blockHash[:], blockHash[:]) consensus.switchPhase("selfCommit", FBFTCommit) @@ -578,11 +639,18 @@ func (consensus *Consensus) NumSignaturesIncludedInBlock(block *types.Block) uin return count } +// GetLogger returns logger for consensus contexts added. +func (consensus *Consensus) GetLogger() *zerolog.Logger { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.getLogger() +} + // getLogger returns logger for consensus contexts added func (consensus *Consensus) getLogger() *zerolog.Logger { logger := utils.Logger().With(). - Uint64("myBlock", consensus.BlockNum()). - Uint64("myViewID", consensus.GetCurBlockViewID()). + Uint64("myBlock", consensus.blockNum). + Uint64("myViewID", consensus.getCurBlockViewID()). Str("phase", consensus.phase.String()). Str("mode", consensus.current.Mode().String()). Logger() diff --git a/consensus/consensus_test.go b/consensus/consensus_test.go index c2d3d729b..41fe5b127 100644 --- a/consensus/consensus_test.go +++ b/consensus/consensus_test.go @@ -39,7 +39,7 @@ func TestConsensusInitialization(t *testing.T) { // FBFTLog assert.Equal(t, fbtLog, consensus.FBFTLog) - assert.Equal(t, FBFTAnnounce, consensus.phase.Get()) + assert.Equal(t, FBFTAnnounce, consensus.phase) // State / consensus.current assert.Equal(t, state.mode, consensus.current.mode) @@ -61,11 +61,7 @@ func TestConsensusInitialization(t *testing.T) { assert.Equal(t, uint64(0), consensus.GetViewChangingID()) assert.Equal(t, uint32(shard.BeaconChainShardID), consensus.ShardID) - assert.IsType(t, make(chan struct{}), consensus.syncReadyChan) - assert.NotNil(t, consensus.syncReadyChan) - - assert.IsType(t, make(chan struct{}), consensus.syncNotReadyChan) - assert.NotNil(t, consensus.syncNotReadyChan) + assert.Equal(t, false, consensus.start) assert.IsType(t, make(chan slash.Record), consensus.SlashChan) assert.NotNil(t, consensus.SlashChan) diff --git a/consensus/consensus_v2.go b/consensus/consensus_v2.go index deb0883d9..99bccf755 100644 --- a/consensus/consensus_v2.go +++ b/consensus/consensus_v2.go @@ -4,12 +4,13 @@ import ( "bytes" "context" "encoding/hex" + "math/big" "sync/atomic" "time" + "github.com/ethereum/go-ethereum/common" bls2 "github.com/harmony-one/bls/ffi/go/bls" "github.com/harmony-one/harmony/consensus/signature" - nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/internal/utils" @@ -45,16 +46,24 @@ const ( // IsViewChangingMode return true if curernt mode is viewchanging func (consensus *Consensus) IsViewChangingMode() bool { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.isViewChangingMode() +} + +func (consensus *Consensus) isViewChangingMode() bool { return consensus.current.Mode() == ViewChanging } // HandleMessageUpdate will update the consensus state according to received message func (consensus *Consensus) HandleMessageUpdate(ctx context.Context, msg *msg_pb.Message, senderKey *bls.SerializedPublicKey) error { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() // when node is in ViewChanging mode, it still accepts normal messages into FBFTLog // in order to avoid possible trap forever but drop PREPARE and COMMIT // which are message types specifically for a node acting as leader // so we just ignore those messages - if consensus.IsViewChangingMode() && + if consensus.isViewChangingMode() && (msg.Type == msg_pb.MessageType_PREPARE || msg.Type == msg_pb.MessageType_COMMIT) { return nil @@ -86,7 +95,7 @@ func (consensus *Consensus) HandleMessageUpdate(ctx context.Context, msg *msg_pb members := consensus.Decider.Participants() fbftMsg, err = ParseNewViewMessage(msg, members) default: - fbftMsg, err = consensus.ParseFBFTMessage(msg) + fbftMsg, err = consensus.parseFBFTMessage(msg) } if err != nil || fbftMsg == nil { return errors.Wrapf(err, "unable to parse consensus msg with type: %s", msg.Type) @@ -94,8 +103,8 @@ func (consensus *Consensus) HandleMessageUpdate(ctx context.Context, msg *msg_pb canHandleViewChange := true intendedForValidator, intendedForLeader := - !consensus.IsLeader(), - consensus.IsLeader() + !consensus.isLeader(), + consensus.isLeader() // if in backup normal mode, force ignore view change event and leader event. if consensus.current.Mode() == NormalBackup { @@ -135,9 +144,9 @@ func (consensus *Consensus) finalCommit() { consensus.getLogger().Info(). Int64("NumCommits", numCommits). Msg("[finalCommit] Finalizing Consensus") - beforeCatchupNum := consensus.BlockNum() + beforeCatchupNum := consensus.getBlockNum() - leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() + leaderPriKey, err := consensus.getConsensusLeaderPrivateKey() if err != nil { consensus.getLogger().Error().Err(err).Msg("[finalCommit] leader not found") return @@ -176,7 +185,7 @@ func (consensus *Consensus) finalCommit() { // Note: leader already sent 67% commit in preCommit. The 100% commit won't be sent immediately // to save network traffic. It will only be sent in retry if consensus doesn't move forward. // Or if the leader is changed for next block, the 100% committed sig will be sent to the next leader immediately. - if !consensus.IsLeader() || block.IsLastBlockInEpoch() { + if !consensus.isLeader() || block.IsLastBlockInEpoch() { // send immediately if err := consensus.msgSender.SendWithRetry( block.NumberU64(), @@ -241,7 +250,7 @@ func (consensus *Consensus) finalCommit() { // If still the leader, send commit sig/bitmap to finish the new block proposal, // else, the block proposal will timeout by itself. - if consensus.IsLeader() { + if consensus.isLeader() { if block.IsLastBlockInEpoch() { // No pipelining go func() { @@ -289,141 +298,29 @@ func (consensus *Consensus) BlockCommitSigs(blockNum uint64) ([]byte, error) { // Start waits for the next new block and run consensus func (consensus *Consensus) Start( - blockChannel chan *types.Block, stopChan, stoppedChan, startChannel chan struct{}, + stopChan chan struct{}, ) { go func() { - toStart := make(chan struct{}, 1) - isInitialLeader := consensus.IsLeader() - if isInitialLeader { - consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Waiting for consensus start") - // send a signal to indicate it's ready to run consensus - // this signal is consumed by node object to create a new block and in turn trigger a new consensus on it - go func() { - <-startChannel - toStart <- struct{}{} - consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Send ReadySignal") - consensus.ReadySignal <- SyncProposal - }() - } consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Consensus started") - defer close(stoppedChan) - ticker := time.NewTicker(250 * time.Millisecond) - defer ticker.Stop() + go func() { + ticker := time.NewTicker(250 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-stopChan: + return + case <-ticker.C: + consensus.Tick() + } + } + }() + + consensus.mutex.Lock() consensus.consensusTimeout[timeoutBootstrap].Start() consensus.getLogger().Info().Msg("[ConsensusMainLoop] Start bootstrap timeout (only once)") - // Set up next block due time. consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) - start := false - for { - select { - case <-toStart: - start = true - case <-ticker.C: - if !start && isInitialLeader { - continue - } - for k, v := range consensus.consensusTimeout { - // stop timer in listening mode - if consensus.current.Mode() == Listening { - v.Stop() - continue - } - - if consensus.current.Mode() == Syncing { - // never stop bootstrap timer here in syncing mode as it only starts once - // if it is stopped, bootstrap will be stopped and nodes - // can't start view change or join consensus - // the bootstrap timer will be stopped once consensus is reached or view change - // is succeeded - if k != timeoutBootstrap { - consensus.getLogger().Debug(). - Str("k", k.String()). - Str("Mode", consensus.current.Mode().String()). - Msg("[ConsensusMainLoop] consensusTimeout stopped!!!") - v.Stop() - continue - } - } - if !v.CheckExpire() { - continue - } - if k != timeoutViewChange { - consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops Consensus Timeout!!!") - consensus.startViewChange() - break - } else { - consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops View Change Timeout!!!") - consensus.startViewChange() - break - } - } - - // TODO: Refactor this piece of code to consensus/downloader.go after DNS legacy sync is removed - case <-consensus.syncReadyChan: - consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan") - consensus.mutex.Lock() - if consensus.BlockNum() < consensus.Blockchain().CurrentHeader().Number().Uint64()+1 { - consensus.SetBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1) - consensus.SetViewIDs(consensus.Blockchain().CurrentHeader().ViewID().Uint64() + 1) - mode := consensus.UpdateConsensusInformation() - consensus.current.SetMode(mode) - consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer") - consensus.consensusTimeout[timeoutConsensus].Start() - consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") - consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc() - } else if consensus.Mode() == Syncing { - // Corner case where sync is triggered before `onCommitted` and there is a race - // for block insertion between consensus and downloader. - mode := consensus.UpdateConsensusInformation() - consensus.SetMode(mode) - consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer") - consensus.consensusTimeout[timeoutConsensus].Start() - consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc() - } - consensus.mutex.Unlock() - - // TODO: Refactor this piece of code to consensus/downloader.go after DNS legacy sync is removed - case <-consensus.syncNotReadyChan: - consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncNotReadyChan") - consensus.SetBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1) - consensus.current.SetMode(Syncing) - consensus.getLogger().Info().Msg("[ConsensusMainLoop] Node is OUT OF SYNC") - consensusSyncCounterVec.With(prometheus.Labels{"consensus": "out_of_sync"}).Inc() - - case newBlock := <-blockChannel: - consensus.getLogger().Info(). - Uint64("MsgBlockNum", newBlock.NumberU64()). - Msg("[ConsensusMainLoop] Received Proposed New Block!") - - if newBlock.NumberU64() < consensus.BlockNum() { - consensus.getLogger().Warn().Uint64("newBlockNum", newBlock.NumberU64()). - Msg("[ConsensusMainLoop] received old block, abort") - continue - } - // Sleep to wait for the full block time - consensus.getLogger().Info().Msg("[ConsensusMainLoop] Waiting for Block Time") - <-time.After(time.Until(consensus.NextBlockDue)) - consensus.StartFinalityCount() - - // Update time due for next block - consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) - - startTime = time.Now() - consensus.msgSender.Reset(newBlock.NumberU64()) - - consensus.getLogger().Info(). - Int("numTxs", len(newBlock.Transactions())). - Int("numStakingTxs", len(newBlock.StakingTransactions())). - Time("startTime", startTime). - Int64("publicKeys", consensus.Decider.ParticipantsCount()). - Msg("[ConsensusMainLoop] STARTING CONSENSUS") - consensus.announce(newBlock) - case <-stopChan: - consensus.getLogger().Info().Msg("[ConsensusMainLoop] stopChan") - return - } - } + consensus.mutex.Unlock() }() if consensus.dHelper != nil { @@ -431,28 +328,129 @@ func (consensus *Consensus) Start( } } -// Close close the consensus. If current is in normal commit phase, wait until the commit -// phase end. -func (consensus *Consensus) Close() error { - if consensus.dHelper != nil { - consensus.dHelper.close() +func (consensus *Consensus) StartChannel() { + consensus.mutex.Lock() + consensus.isInitialLeader = consensus.isLeader() + if consensus.isInitialLeader { + consensus.start = true + consensus.getLogger().Info().Time("time", time.Now()).Msg("[ConsensusMainLoop] Send ReadySignal") + consensus.mutex.Unlock() + consensus.ReadySignal <- SyncProposal + return } - consensus.waitForCommit() - return nil + consensus.mutex.Unlock() +} + +func (consensus *Consensus) syncReadyChan() { + consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncReadyChan") + if consensus.getBlockNum() < consensus.Blockchain().CurrentHeader().Number().Uint64()+1 { + consensus.setBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1) + consensus.setViewIDs(consensus.Blockchain().CurrentHeader().ViewID().Uint64() + 1) + mode := consensus.updateConsensusInformation() + consensus.current.SetMode(mode) + consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer") + consensus.consensusTimeout[timeoutConsensus].Start() + consensus.getLogger().Info().Str("Mode", mode.String()).Msg("Node is IN SYNC") + consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc() + } else if consensus.mode() == Syncing { + // Corner case where sync is triggered before `onCommitted` and there is a race + // for block insertion between consensus and downloader. + mode := consensus.updateConsensusInformation() + consensus.setMode(mode) + consensus.getLogger().Info().Msg("[syncReadyChan] Start consensus timer") + consensus.consensusTimeout[timeoutConsensus].Start() + consensusSyncCounterVec.With(prometheus.Labels{"consensus": "in_sync"}).Inc() + } +} + +func (consensus *Consensus) syncNotReadyChan() { + consensus.getLogger().Info().Msg("[ConsensusMainLoop] syncNotReadyChan") + consensus.setBlockNum(consensus.Blockchain().CurrentHeader().Number().Uint64() + 1) + consensus.current.SetMode(Syncing) + consensus.getLogger().Info().Msg("[ConsensusMainLoop] Node is OUT OF SYNC") + consensusSyncCounterVec.With(prometheus.Labels{"consensus": "out_of_sync"}).Inc() } -// waitForCommit wait extra 2 seconds for commit phase to finish -func (consensus *Consensus) waitForCommit() { - if consensus.Mode() != Normal || consensus.phase.Get() != FBFTCommit { +func (consensus *Consensus) Tick() { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.tick() +} + +func (consensus *Consensus) tick() { + if !consensus.start && consensus.isInitialLeader { return } - // We only need to wait consensus is in normal commit phase - utils.Logger().Warn().Str("phase", consensus.phase.String()).Msg("[shutdown] commit phase has to wait") + for k, v := range consensus.consensusTimeout { + // stop timer in listening mode + if consensus.current.Mode() == Listening { + v.Stop() + continue + } - maxWait := time.Now().Add(2 * consensus.BlockPeriod) - for time.Now().Before(maxWait) && consensus.GetConsensusPhase() == "Commit" { - utils.Logger().Warn().Msg("[shutdown] wait for consensus finished") - time.Sleep(time.Millisecond * 100) + if consensus.current.Mode() == Syncing { + // never stop bootstrap timer here in syncing mode as it only starts once + // if it is stopped, bootstrap will be stopped and nodes + // can't start view change or join consensus + // the bootstrap timer will be stopped once consensus is reached or view change + // is succeeded + if k != timeoutBootstrap { + consensus.getLogger().Debug(). + Str("k", k.String()). + Str("Mode", consensus.current.Mode().String()). + Msg("[ConsensusMainLoop] consensusTimeout stopped!!!") + v.Stop() + continue + } + } + if !v.CheckExpire() { + continue + } + if k != timeoutViewChange { + consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops Consensus Timeout!!!") + consensus.startViewChange() + break + } else { + consensus.getLogger().Warn().Msg("[ConsensusMainLoop] Ops View Change Timeout!!!") + consensus.startViewChange() + break + } + } +} + +func (consensus *Consensus) BlockChannel(newBlock *types.Block) { + consensus.GetLogger().Info(). + Uint64("MsgBlockNum", newBlock.NumberU64()). + Msg("[ConsensusMainLoop] Received Proposed New Block!") + + if newBlock.NumberU64() < consensus.BlockNum() { + consensus.getLogger().Warn().Uint64("newBlockNum", newBlock.NumberU64()). + Msg("[ConsensusMainLoop] received old block, abort") + return + } + // Sleep to wait for the full block time + consensus.getLogger().Info().Msg("[ConsensusMainLoop] Waiting for Block Time") + time.AfterFunc(time.Until(consensus.NextBlockDue), func() { + consensus.StartFinalityCount() + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + // Update time due for next block + consensus.NextBlockDue = time.Now().Add(consensus.BlockPeriod) + + startTime = time.Now() + consensus.msgSender.Reset(newBlock.NumberU64()) + + consensus.getLogger().Info(). + Int("numTxs", len(newBlock.Transactions())). + Int("numStakingTxs", len(newBlock.StakingTransactions())). + Time("startTime", startTime). + Int64("publicKeys", consensus.Decider.ParticipantsCount()). + Msg("[ConsensusMainLoop] STARTING CONSENSUS") + consensus.announce(newBlock) + }) + + if consensus.dHelper != nil { + consensus.dHelper.start() } } @@ -467,24 +465,29 @@ type LastMileBlockIter struct { } // GetLastMileBlockIter get the iterator of the last mile blocks starting from number bnStart -func (consensus *Consensus) GetLastMileBlockIter(bnStart uint64) (*LastMileBlockIter, error) { +func (consensus *Consensus) GetLastMileBlockIter(bnStart uint64, cb func(iter *LastMileBlockIter) error) error { consensus.mutex.Lock() defer consensus.mutex.Unlock() + return consensus.getLastMileBlockIter(bnStart, cb) +} + +// GetLastMileBlockIter get the iterator of the last mile blocks starting from number bnStart +func (consensus *Consensus) getLastMileBlockIter(bnStart uint64, cb func(iter *LastMileBlockIter) error) error { if consensus.BlockVerifier == nil { - return nil, errors.New("consensus haven't initialized yet") + return errors.New("consensus haven't initialized yet") } blocks, _, err := consensus.getLastMileBlocksAndMsg(bnStart) if err != nil { - return nil, err + return err } - return &LastMileBlockIter{ + return cb(&LastMileBlockIter{ blockCandidates: blocks, fbftLog: consensus.FBFTLog, verify: consensus.BlockVerifier, curIndex: 0, logger: consensus.getLogger(), - }, nil + }) } // Next iterate to the next last mile block @@ -531,7 +534,7 @@ func (consensus *Consensus) preCommitAndPropose(blk *types.Block) error { return errors.New("block to pre-commit is nil") } - leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() + leaderPriKey, err := consensus.getConsensusLeaderPrivateKey() if err != nil { consensus.getLogger().Error().Err(err).Msg("[preCommitAndPropose] leader not found") return err @@ -621,7 +624,7 @@ func (consensus *Consensus) tryCatchup() error { if consensus.BlockVerifier == nil { return errors.New("consensus haven't finished initialization") } - initBN := consensus.BlockNum() + initBN := consensus.getBlockNum() defer consensus.postCatchup(initBN) blks, msgs, err := consensus.getLastMileBlocksAndMsg(initBN) @@ -635,7 +638,7 @@ func (consensus *Consensus) tryCatchup() error { } blk.SetCurrentCommitSig(msg.Payload) - if err := consensus.VerifyBlock(blk); err != nil { + if err := consensus.verifyBlock(blk); err != nil { consensus.getLogger().Err(err).Msg("[TryCatchup] failed block verifier") return err } @@ -671,39 +674,102 @@ func (consensus *Consensus) commitBlock(blk *types.Block, committedMsg *FBFTMess } consensus.FinishFinalityCount() - consensus.PostConsensusJob(blk) - consensus.SetupForNewConsensus(blk, committedMsg) + go func() { + consensus.PostConsensusJob(blk) + }() + consensus.setupForNewConsensus(blk, committedMsg) utils.Logger().Info().Uint64("blockNum", blk.NumberU64()). Str("hash", blk.Header().Hash().Hex()). Msg("Added New Block to Blockchain!!!") + return nil } +// rotateLeader rotates the leader to the next leader in the committee. +// This function must be called with enabled leader rotation. +func (consensus *Consensus) rotateLeader(epoch *big.Int) { + prev := consensus.getLeaderPubKey() + bc := consensus.Blockchain() + curNumber := bc.CurrentHeader().Number().Uint64() + utils.Logger().Info().Msgf("[Rotating leader] epoch: %v rotation:%v numblocks:%d", epoch.Uint64(), bc.Config().IsLeaderRotation(epoch), bc.Config().LeaderRotationBlocksCount) + leader := consensus.getLeaderPubKey() + for i := 0; i < bc.Config().LeaderRotationBlocksCount; i++ { + header := bc.GetHeaderByNumber(curNumber - uint64(i)) + if header == nil { + return + } + // Previous epoch, we should not change leader. + if header.Epoch().Uint64() != epoch.Uint64() { + return + } + // Check if the same leader. + pub, err := bc.GetLeaderPubKeyFromCoinbase(header) + if err != nil { + utils.Logger().Error().Err(err).Msg("Failed to get leader public key from coinbase") + return + } + if !pub.Object.IsEqual(leader.Object) { + // Another leader. + return + } + } + // Passed all checks, we can change leader. + var ( + wasFound bool + next *bls.PublicKeyWrapper + ) + if consensus.ShardID == shard.BeaconChainShardID { + wasFound, next = consensus.Decider.NthNextHmy(shard.Schedule.InstanceForEpoch(epoch), leader, 1) + } else { + wasFound, next = consensus.Decider.NthNext(leader, 1) + } + if !wasFound { + utils.Logger().Error().Msg("Failed to get next leader") + return + } else { + consensus.setLeaderPubKey(next) + } + if consensus.isLeader() && !consensus.getLeaderPubKey().Object.IsEqual(prev.Object) { + // leader changed + go func() { + consensus.ReadySignal <- SyncProposal + }() + } +} + // SetupForNewConsensus sets the state for new consensus -func (consensus *Consensus) SetupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) { +func (consensus *Consensus) setupForNewConsensus(blk *types.Block, committedMsg *FBFTMessage) { atomic.StoreUint64(&consensus.blockNum, blk.NumberU64()+1) - consensus.SetCurBlockViewID(committedMsg.ViewID + 1) - consensus.pubKeyLock.Lock() + consensus.setCurBlockViewID(committedMsg.ViewID + 1) consensus.LeaderPubKey = committedMsg.SenderPubkeys[0] - consensus.pubKeyLock.Unlock() + var epoch *big.Int + if blk.IsLastBlockInEpoch() { + epoch = new(big.Int).Add(blk.Epoch(), common.Big1) + } else { + epoch = blk.Epoch() + } + if consensus.Blockchain().Config().IsLeaderRotation(epoch) { + consensus.rotateLeader(epoch) + } + // Update consensus keys at last so the change of leader status doesn't mess up normal flow if blk.IsLastBlockInEpoch() { - consensus.SetMode(consensus.UpdateConsensusInformation()) + consensus.setMode(consensus.updateConsensusInformation()) } consensus.FBFTLog.PruneCacheBeforeBlock(blk.NumberU64()) - consensus.ResetState() + consensus.resetState() } func (consensus *Consensus) postCatchup(initBN uint64) { - if initBN < consensus.BlockNum() { + if initBN < consensus.getBlockNum() { consensus.getLogger().Info(). Uint64("From", initBN). - Uint64("To", consensus.BlockNum()). + Uint64("To", consensus.getBlockNum()). Msg("[TryCatchup] Caught up!") consensus.switchPhase("TryCatchup", FBFTAnnounce) } // catch up and skip from view change trap - if initBN < consensus.BlockNum() && consensus.IsViewChangingMode() { + if initBN < consensus.getBlockNum() && consensus.isViewChangingMode() { consensus.current.SetMode(Normal) consensus.consensusTimeout[timeoutViewChange].Stop() } @@ -711,7 +777,7 @@ func (consensus *Consensus) postCatchup(initBN uint64) { // GenerateVrfAndProof generates new VRF/Proof from hash of previous block func (consensus *Consensus) GenerateVrfAndProof(newHeader *block.Header) error { - key, err := consensus.GetConsensusLeaderPrivateKey() + key, err := consensus.getConsensusLeaderPrivateKey() if err != nil { return errors.New("[GenerateVrfAndProof] no leader private key provided") } @@ -764,7 +830,7 @@ func (consensus *Consensus) GenerateVdfAndProof(newBlock *types.Block, vrfBlockN start := time.Now() vdf.Execute() duration := time.Since(start) - consensus.getLogger().Info(). + consensus.GetLogger().Info(). Dur("duration", duration). Msg("[ConsensusMainLoop] VDF computation finished") output := <-outputChannel diff --git a/consensus/construct.go b/consensus/construct.go index bbee71203..eaa6fd83f 100644 --- a/consensus/construct.go +++ b/consensus/construct.go @@ -29,8 +29,8 @@ type NetworkMessage struct { func (consensus *Consensus) populateMessageFields( request *msg_pb.ConsensusRequest, blockHash []byte, ) *msg_pb.ConsensusRequest { - request.ViewId = consensus.GetCurBlockViewID() - request.BlockNum = consensus.BlockNum() + request.ViewId = consensus.getCurBlockViewID() + request.BlockNum = consensus.getBlockNum() request.ShardId = consensus.ShardID // 32 byte block hash request.BlockHash = blockHash @@ -143,7 +143,7 @@ func (consensus *Consensus) construct( return nil, err } - FBFTMsg, err2 := consensus.ParseFBFTMessage(message) + FBFTMsg, err2 := consensus.parseFBFTMessage(message) if err2 != nil { utils.Logger().Error().Err(err). diff --git a/consensus/construct_test.go b/consensus/construct_test.go index 71da42e3f..1add32219 100644 --- a/consensus/construct_test.go +++ b/consensus/construct_test.go @@ -67,8 +67,8 @@ func TestConstructPreparedMessage(test *testing.T) { if err != nil { test.Fatalf("Cannot craeate consensus: %v", err) } - consensus.ResetState() - consensus.UpdateBitmaps() + consensus.resetState() + consensus.updateBitmaps() consensus.blockHash = [32]byte{} message := "test string" diff --git a/consensus/debug.go b/consensus/debug.go index ae1d2d92b..cba13cc01 100644 --- a/consensus/debug.go +++ b/consensus/debug.go @@ -2,6 +2,13 @@ package consensus // GetConsensusPhase returns the current phase of the consensus. func (consensus *Consensus) GetConsensusPhase() string { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.getConsensusPhase() +} + +// GetConsensusPhase returns the current phase of the consensus. +func (consensus *Consensus) getConsensusPhase() string { return consensus.phase.String() } @@ -12,10 +19,24 @@ func (consensus *Consensus) GetConsensusMode() string { // GetCurBlockViewID returns the current view ID of the consensus func (consensus *Consensus) GetCurBlockViewID() uint64 { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.getCurBlockViewID() +} + +// GetCurBlockViewID returns the current view ID of the consensus +func (consensus *Consensus) getCurBlockViewID() uint64 { return consensus.current.GetCurBlockViewID() } // GetViewChangingID returns the current view changing ID of the consensus func (consensus *Consensus) GetViewChangingID() uint64 { + consensus.mutex.RLock() + defer consensus.mutex.RUnlock() + return consensus.current.GetViewChangingID() +} + +// GetViewChangingID returns the current view changing ID of the consensus +func (consensus *Consensus) getViewChangingID() uint64 { return consensus.current.GetViewChangingID() } diff --git a/consensus/downloader.go b/consensus/downloader.go index 2734f8bae..8442ed534 100644 --- a/consensus/downloader.go +++ b/consensus/downloader.go @@ -76,7 +76,7 @@ func (dh *downloadHelper) downloadFinishedLoop() { for { select { case <-dh.finishedCh: - err := dh.c.addConsensusLastMile() + err := dh.c.AddConsensusLastMile() if err != nil { dh.c.getLogger().Error().Err(err).Msg("add last mile failed") } @@ -89,22 +89,21 @@ func (dh *downloadHelper) downloadFinishedLoop() { } } -func (consensus *Consensus) addConsensusLastMile() error { +func (consensus *Consensus) AddConsensusLastMile() error { curBN := consensus.Blockchain().CurrentBlock().NumberU64() - blockIter, err := consensus.GetLastMileBlockIter(curBN + 1) - if err != nil { - return err - } - for { - block := blockIter.Next() - if block == nil { - break - } - if _, err := consensus.Blockchain().InsertChain(types.Blocks{block}, true); err != nil { - return errors.Wrap(err, "failed to InsertChain") + err := consensus.GetLastMileBlockIter(curBN+1, func(blockIter *LastMileBlockIter) error { + for { + block := blockIter.Next() + if block == nil { + break + } + if _, err := consensus.Blockchain().InsertChain(types.Blocks{block}, true); err != nil { + return errors.Wrap(err, "failed to InsertChain") + } } - } - return nil + return nil + }) + return err } func (consensus *Consensus) spinUpStateSync() { diff --git a/consensus/fbft_log.go b/consensus/fbft_log.go index 1a5966572..ba3745103 100644 --- a/consensus/fbft_log.go +++ b/consensus/fbft_log.go @@ -3,7 +3,6 @@ package consensus import ( "encoding/binary" "fmt" - "sync" "github.com/ethereum/go-ethereum/common" bls_core "github.com/harmony-one/bls/ffi/go/bls" @@ -102,10 +101,7 @@ func (m *FBFTMessage) id() fbftMsgID { type FBFTLog struct { blocks map[common.Hash]*types.Block // store blocks received in FBFT verifiedBlocks map[common.Hash]struct{} // store block hashes for blocks that has already been verified - blockLock sync.RWMutex - - messages map[fbftMsgID]*FBFTMessage // store messages received in FBFT - msgLock sync.RWMutex + messages map[fbftMsgID]*FBFTMessage // store messages received in FBFT } // NewFBFTLog returns new instance of FBFTLog @@ -120,42 +116,27 @@ func NewFBFTLog() *FBFTLog { // AddBlock add a new block into the log func (log *FBFTLog) AddBlock(block *types.Block) { - log.blockLock.Lock() - defer log.blockLock.Unlock() - log.blocks[block.Hash()] = block } // MarkBlockVerified marks the block as verified func (log *FBFTLog) MarkBlockVerified(block *types.Block) { - log.blockLock.Lock() - defer log.blockLock.Unlock() - log.verifiedBlocks[block.Hash()] = struct{}{} } // IsBlockVerified checks whether the block is verified func (log *FBFTLog) IsBlockVerified(hash common.Hash) bool { - log.blockLock.RLock() - defer log.blockLock.RUnlock() - _, exist := log.verifiedBlocks[hash] return exist } // GetBlockByHash returns the block matches the given block hash func (log *FBFTLog) GetBlockByHash(hash common.Hash) *types.Block { - log.blockLock.RLock() - defer log.blockLock.RUnlock() - return log.blocks[hash] } // GetBlocksByNumber returns the blocks match the given block number func (log *FBFTLog) GetBlocksByNumber(number uint64) []*types.Block { - log.blockLock.RLock() - defer log.blockLock.RUnlock() - var blocks []*types.Block for _, block := range log.blocks { if block.NumberU64() == number { @@ -167,9 +148,6 @@ func (log *FBFTLog) GetBlocksByNumber(number uint64) []*types.Block { // DeleteBlocksLessThan deletes blocks less than given block number func (log *FBFTLog) DeleteBlocksLessThan(number uint64) { - log.blockLock.Lock() - defer log.blockLock.Unlock() - for h, block := range log.blocks { if block.NumberU64() < number { delete(log.blocks, h) @@ -180,9 +158,6 @@ func (log *FBFTLog) DeleteBlocksLessThan(number uint64) { // DeleteBlockByNumber deletes block of specific number func (log *FBFTLog) DeleteBlockByNumber(number uint64) { - log.blockLock.Lock() - defer log.blockLock.Unlock() - for h, block := range log.blocks { if block.NumberU64() == number { delete(log.blocks, h) @@ -193,9 +168,6 @@ func (log *FBFTLog) DeleteBlockByNumber(number uint64) { // DeleteMessagesLessThan deletes messages less than given block number func (log *FBFTLog) DeleteMessagesLessThan(number uint64) { - log.msgLock.Lock() - defer log.msgLock.Unlock() - for h, msg := range log.messages { if msg.BlockNum < number { delete(log.messages, h) @@ -205,9 +177,6 @@ func (log *FBFTLog) DeleteMessagesLessThan(number uint64) { // AddVerifiedMessage adds a signature verified pbft message into the log func (log *FBFTLog) AddVerifiedMessage(msg *FBFTMessage) { - log.msgLock.Lock() - defer log.msgLock.Unlock() - msg.Verified = true log.messages[msg.id()] = msg @@ -215,9 +184,6 @@ func (log *FBFTLog) AddVerifiedMessage(msg *FBFTMessage) { // AddNotVerifiedMessage adds a not signature verified pbft message into the log func (log *FBFTLog) AddNotVerifiedMessage(msg *FBFTMessage) { - log.msgLock.Lock() - defer log.msgLock.Unlock() - msg.Verified = false log.messages[msg.id()] = msg @@ -225,9 +191,6 @@ func (log *FBFTLog) AddNotVerifiedMessage(msg *FBFTMessage) { // GetNotVerifiedCommittedMessages returns not verified committed pbft messages with matching blockNum, viewID and blockHash func (log *FBFTLog) GetNotVerifiedCommittedMessages(blockNum uint64, viewID uint64, blockHash common.Hash) []*FBFTMessage { - log.msgLock.RLock() - defer log.msgLock.RUnlock() - var found []*FBFTMessage for _, msg := range log.messages { if msg.MessageType == msg_pb.MessageType_COMMITTED && msg.BlockNum == blockNum && msg.ViewID == viewID && msg.BlockHash == blockHash && !msg.Verified { @@ -239,9 +202,6 @@ func (log *FBFTLog) GetNotVerifiedCommittedMessages(blockNum uint64, viewID uint // GetMessagesByTypeSeqViewHash returns pbft messages with matching type, blockNum, viewID and blockHash func (log *FBFTLog) GetMessagesByTypeSeqViewHash(typ msg_pb.MessageType, blockNum uint64, viewID uint64, blockHash common.Hash) []*FBFTMessage { - log.msgLock.RLock() - defer log.msgLock.RUnlock() - var found []*FBFTMessage for _, msg := range log.messages { if msg.MessageType == typ && msg.BlockNum == blockNum && msg.ViewID == viewID && msg.BlockHash == blockHash && msg.Verified { @@ -253,9 +213,6 @@ func (log *FBFTLog) GetMessagesByTypeSeqViewHash(typ msg_pb.MessageType, blockNu // GetMessagesByTypeSeq returns pbft messages with matching type, blockNum func (log *FBFTLog) GetMessagesByTypeSeq(typ msg_pb.MessageType, blockNum uint64) []*FBFTMessage { - log.msgLock.RLock() - defer log.msgLock.RUnlock() - var found []*FBFTMessage for _, msg := range log.messages { if msg.MessageType == typ && msg.BlockNum == blockNum && msg.Verified { @@ -267,9 +224,6 @@ func (log *FBFTLog) GetMessagesByTypeSeq(typ msg_pb.MessageType, blockNum uint64 // GetMessagesByTypeSeqHash returns pbft messages with matching type, blockNum func (log *FBFTLog) GetMessagesByTypeSeqHash(typ msg_pb.MessageType, blockNum uint64, blockHash common.Hash) []*FBFTMessage { - log.msgLock.RLock() - defer log.msgLock.RUnlock() - var found []*FBFTMessage for _, msg := range log.messages { if msg.MessageType == typ && msg.BlockNum == blockNum && msg.BlockHash == blockHash && msg.Verified { @@ -305,9 +259,6 @@ func (log *FBFTLog) HasMatchingViewPrepared(blockNum uint64, viewID uint64, bloc // GetMessagesByTypeSeqView returns pbft messages with matching type, blockNum and viewID func (log *FBFTLog) GetMessagesByTypeSeqView(typ msg_pb.MessageType, blockNum uint64, viewID uint64) []*FBFTMessage { - log.msgLock.RLock() - defer log.msgLock.RUnlock() - var found []*FBFTMessage for _, msg := range log.messages { if msg.MessageType != typ || msg.BlockNum != blockNum || msg.ViewID != viewID && msg.Verified { @@ -336,6 +287,12 @@ func (log *FBFTLog) FindMessageByMaxViewID(msgs []*FBFTMessage) *FBFTMessage { // ParseFBFTMessage parses FBFT message into FBFTMessage structure func (consensus *Consensus) ParseFBFTMessage(msg *msg_pb.Message) (*FBFTMessage, error) { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + return consensus.parseFBFTMessage(msg) +} + +func (consensus *Consensus) parseFBFTMessage(msg *msg_pb.Message) (*FBFTMessage, error) { // TODO Have this do sanity checks on the message please pbftMsg := FBFTMessage{} pbftMsg.MessageType = msg.GetType() @@ -360,9 +317,7 @@ func (consensus *Consensus) ParseFBFTMessage(msg *msg_pb.Message) (*FBFTMessage, copy(pbftMsg.SenderPubkeys[0].Bytes[:], consensusMsg.SenderPubkey[:]) } else { // else, it should be a multi-key message where the bitmap is populated - consensus.multiSigMutex.RLock() pubKeys, err := consensus.multiSigBitmap.GetSignedPubKeysFromBitmap(pbftMsg.SenderPubkeyBitmap) - consensus.multiSigMutex.RUnlock() if err != nil { return nil, err } diff --git a/consensus/leader.go b/consensus/leader.go index 477d8eb29..2f7766e19 100644 --- a/consensus/leader.go +++ b/consensus/leader.go @@ -3,12 +3,11 @@ package consensus import ( "time" + "github.com/harmony-one/harmony/consensus/signature" "github.com/harmony-one/harmony/crypto/bls" "github.com/harmony-one/harmony/internal/common" nodeconfig "github.com/harmony-one/harmony/internal/configs/node" - "github.com/harmony-one/harmony/consensus/signature" - "github.com/ethereum/go-ethereum/rlp" bls_core "github.com/harmony-one/bls/ffi/go/bls" msg_pb "github.com/harmony-one/harmony/api/proto/message" @@ -27,14 +26,10 @@ func (consensus *Consensus) announce(block *types.Block) { return } - //// Lock Write - Start - consensus.mutex.Lock() copy(consensus.blockHash[:], blockHash[:]) consensus.block = encodedBlock // Must set block bytes before consensus.construct() - consensus.mutex.Unlock() - //// Lock Write - End - key, err := consensus.GetConsensusLeaderPrivateKey() + key, err := consensus.getConsensusLeaderPrivateKey() if err != nil { consensus.getLogger().Warn().Err(err).Msg("[Announce] Node not a leader") return @@ -79,7 +74,7 @@ func (consensus *Consensus) announce(block *types.Block) { } // Construct broadcast p2p message if err := consensus.msgSender.SendWithRetry( - consensus.BlockNum(), msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ + consensus.getBlockNum(), msg_pb.MessageType_ANNOUNCE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID)), }, p2p.ConstructMessage(msgToSend)); err != nil { consensus.getLogger().Warn(). @@ -100,18 +95,13 @@ func (consensus *Consensus) announce(block *types.Block) { func (consensus *Consensus) onPrepare(recvMsg *FBFTMessage) { // TODO(audit): make FBFT lookup using map instead of looping through all items. if !consensus.FBFTLog.HasMatchingViewAnnounce( - consensus.BlockNum(), consensus.GetCurBlockViewID(), recvMsg.BlockHash, + consensus.getBlockNum(), consensus.getCurBlockViewID(), recvMsg.BlockHash, ) { consensus.getLogger().Debug(). Uint64("MsgViewID", recvMsg.ViewID). Uint64("MsgBlockNum", recvMsg.BlockNum). Msg("[OnPrepare] No Matching Announce message") } - - //// Read - Start - consensus.mutex.Lock() - defer consensus.mutex.Unlock() - if !consensus.isRightBlockNumAndViewID(recvMsg) { return } @@ -200,8 +190,6 @@ func (consensus *Consensus) onPrepare(recvMsg *FBFTMessage) { } func (consensus *Consensus) onCommit(recvMsg *FBFTMessage) { - consensus.mutex.Lock() - defer consensus.mutex.Unlock() //// Read - Start if !consensus.isRightBlockNumAndViewID(recvMsg) { return @@ -292,7 +280,7 @@ func (consensus *Consensus) onCommit(recvMsg *FBFTMessage) { //// Write - End //// Read - Start - viewID := consensus.GetCurBlockViewID() + viewID := consensus.getCurBlockViewID() if consensus.Decider.IsAllSigsCollected() { logger.Info().Msg("[OnCommit] 100% Enough commits received") @@ -327,7 +315,7 @@ func (consensus *Consensus) onCommit(recvMsg *FBFTMessage) { consensus.mutex.Lock() defer consensus.mutex.Unlock() - if viewID == consensus.GetCurBlockViewID() { + if viewID == consensus.getCurBlockViewID() { consensus.finalCommit() } }(viewID) diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index 867fd9967..aba62fa53 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -99,7 +99,6 @@ type SignatoryTracker interface { // SignatureReader .. type SignatureReader interface { SignatoryTracker - ReadAllBallots(Phase) []*votepower.Ballot ReadBallot(p Phase, pubkey bls.SerializedPublicKey) *votepower.Ballot TwoThirdsSignersCount() int64 // 96 bytes aggregated signature diff --git a/consensus/threshold.go b/consensus/threshold.go index e8bd875cb..ecb54980a 100644 --- a/consensus/threshold.go +++ b/consensus/threshold.go @@ -15,7 +15,7 @@ import ( func (consensus *Consensus) didReachPrepareQuorum() error { logger := utils.Logger() logger.Info().Msg("[OnPrepare] Received Enough Prepare Signatures") - leaderPriKey, err := consensus.GetConsensusLeaderPrivateKey() + leaderPriKey, err := consensus.getConsensusLeaderPrivateKey() if err != nil { utils.Logger().Warn().Err(err).Msg("[OnPrepare] leader not found") return err diff --git a/consensus/validator.go b/consensus/validator.go index a73ac92eb..f85cb8e3d 100644 --- a/consensus/validator.go +++ b/consensus/validator.go @@ -18,7 +18,7 @@ import ( ) func (consensus *Consensus) onAnnounce(msg *msg_pb.Message) { - recvMsg, err := consensus.ParseFBFTMessage(msg) + recvMsg, err := consensus.parseFBFTMessage(msg) if err != nil { consensus.getLogger().Error(). Err(err). @@ -38,12 +38,10 @@ func (consensus *Consensus) onAnnounce(msg *msg_pb.Message) { Uint64("MsgBlockNum", recvMsg.BlockNum). Msg("[OnAnnounce] Announce message Added") consensus.FBFTLog.AddVerifiedMessage(recvMsg) - consensus.mutex.Lock() - defer consensus.mutex.Unlock() consensus.blockHash = recvMsg.BlockHash // we have already added message and block, skip check viewID // and send prepare message if is in ViewChanging mode - if consensus.IsViewChangingMode() { + if consensus.isViewChangingMode() { consensus.getLogger().Debug(). Msg("[OnAnnounce] Still in ViewChanging Mode, Exiting !!") return @@ -64,7 +62,7 @@ func (consensus *Consensus) onAnnounce(msg *msg_pb.Message) { if len(recvMsg.Block) > 0 { go func() { // Best effort check, no need to error out. - _, err := consensus.validateNewBlock(recvMsg) + _, err := consensus.ValidateNewBlock(recvMsg) if err == nil { consensus.getLogger().Info(). @@ -74,11 +72,12 @@ func (consensus *Consensus) onAnnounce(msg *msg_pb.Message) { } } +func (consensus *Consensus) ValidateNewBlock(recvMsg *FBFTMessage) (*types.Block, error) { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + return consensus.validateNewBlock(recvMsg) +} func (consensus *Consensus) validateNewBlock(recvMsg *FBFTMessage) (*types.Block, error) { - // Lock to prevent race condition between announce and prepare - consensus.verifyBlockMutex.Lock() - defer consensus.verifyBlockMutex.Unlock() - if consensus.FBFTLog.IsBlockVerified(recvMsg.BlockHash) { var blockObj *types.Block @@ -132,7 +131,7 @@ func (consensus *Consensus) validateNewBlock(recvMsg *FBFTMessage) (*types.Block return nil, errors.New("nil block verifier") } - if err := consensus.VerifyBlock(&blockObj); err != nil { + if err := consensus.verifyBlock(&blockObj); err != nil { consensus.getLogger().Error().Err(err).Msg("[validateNewBlock] Block verification failed") return nil, errors.New("Block verification failed") } @@ -184,9 +183,6 @@ func (consensus *Consensus) sendCommitMessages(blockObj *types.Block) { // if onPrepared accepts the prepared message from the leader, then // it will send a COMMIT message for the leader to receive on the network. func (consensus *Consensus) onPrepared(recvMsg *FBFTMessage) { - consensus.mutex.Lock() - defer consensus.mutex.Unlock() - consensus.getLogger().Info(). Uint64("MsgBlockNum", recvMsg.BlockNum). Uint64("MsgViewID", recvMsg.ViewID). @@ -209,7 +205,7 @@ func (consensus *Consensus) onPrepared(recvMsg *FBFTMessage) { // check validity of prepared signature blockHash := recvMsg.BlockHash - aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 0) + aggSig, mask, err := consensus.readSignatureBitmapPayload(recvMsg.Payload, 0, consensus.Decider.Participants()) if err != nil { consensus.getLogger().Error().Err(err).Msg("ReadSignatureBitmapPayload failed!") return @@ -275,11 +271,13 @@ func (consensus *Consensus) onPrepared(recvMsg *FBFTMessage) { return } curBlockNum := consensus.BlockNum() + consensus.mutex.Lock() + defer consensus.mutex.Unlock() for _, committedMsg := range consensus.FBFTLog.GetNotVerifiedCommittedMessages(blockObj.NumberU64(), blockObj.Header().ViewID().Uint64(), blockObj.Hash()) { if committedMsg != nil { consensus.onCommitted(committedMsg) } - if curBlockNum < consensus.BlockNum() { + if curBlockNum < consensus.getBlockNum() { consensus.getLogger().Info().Msg("[OnPrepared] Successfully caught up with committed message") break } @@ -288,9 +286,6 @@ func (consensus *Consensus) onPrepared(recvMsg *FBFTMessage) { } func (consensus *Consensus) onCommitted(recvMsg *FBFTMessage) { - consensus.mutex.Lock() - defer consensus.mutex.Unlock() - consensus.getLogger().Info(). Uint64("MsgBlockNum", recvMsg.BlockNum). Uint64("MsgViewID", recvMsg.ViewID). @@ -385,7 +380,7 @@ func (consensus *Consensus) onCommitted(recvMsg *FBFTMessage) { return } - if consensus.IsViewChangingMode() { + if consensus.isViewChangingMode() { consensus.getLogger().Info().Msg("[OnCommitted] Still in ViewChanging mode, Exiting!!") return } @@ -406,7 +401,7 @@ func (consensus *Consensus) onCommitted(recvMsg *FBFTMessage) { func (consensus *Consensus) getPriKeysInCommittee() []*bls.PrivateKeyWrapper { priKeys := []*bls.PrivateKeyWrapper{} for i, key := range consensus.priKey { - if !consensus.IsValidatorInCommittee(key.Pub.Bytes) { + if !consensus.isValidatorInCommittee(key.Pub.Bytes) { continue } priKeys = append(priKeys, &consensus.priKey[i]) diff --git a/consensus/view_change.go b/consensus/view_change.go index 5bfd49f83..aafdfd121 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -2,7 +2,6 @@ package consensus import ( "math/big" - "sync" "time" "github.com/harmony-one/harmony/internal/chain" @@ -25,26 +24,21 @@ const MaxViewIDDiff = 249 // State contains current mode and current viewID type State struct { - mode Mode - modeMux sync.RWMutex + mode Mode // current view id in normal mode // it changes per successful consensus blockViewID uint64 - cViewMux sync.RWMutex // view changing id is used during view change mode // it is the next view id viewChangingID uint64 - viewMux sync.RWMutex isBackup bool } // Mode return the current node mode func (pm *State) Mode() Mode { - pm.modeMux.RLock() - defer pm.modeMux.RUnlock() return pm.mode } @@ -54,48 +48,35 @@ func (pm *State) SetMode(s Mode) { s = NormalBackup } - pm.modeMux.Lock() - defer pm.modeMux.Unlock() pm.mode = s } // GetCurBlockViewID return the current view id func (pm *State) GetCurBlockViewID() uint64 { - pm.cViewMux.RLock() - defer pm.cViewMux.RUnlock() return pm.blockViewID } // SetCurBlockViewID sets the current view id -func (pm *State) SetCurBlockViewID(viewID uint64) { - pm.cViewMux.Lock() - defer pm.cViewMux.Unlock() +func (pm *State) SetCurBlockViewID(viewID uint64) uint64 { pm.blockViewID = viewID + return pm.blockViewID } // GetViewChangingID return the current view changing id // It is meaningful during view change mode func (pm *State) GetViewChangingID() uint64 { - pm.viewMux.RLock() - defer pm.viewMux.RUnlock() return pm.viewChangingID } // SetViewChangingID set the current view changing id // It is meaningful during view change mode func (pm *State) SetViewChangingID(id uint64) { - pm.viewMux.Lock() - defer pm.viewMux.Unlock() pm.viewChangingID = id } // GetViewChangeDuraion return the duration of the current view change // It increase in the power of difference betweeen view changing ID and current view ID func (pm *State) GetViewChangeDuraion() time.Duration { - pm.viewMux.RLock() - pm.cViewMux.RLock() - defer pm.viewMux.RUnlock() - defer pm.cViewMux.RUnlock() diff := int64(pm.viewChangingID - pm.blockViewID) return time.Duration(diff * diff * int64(viewChangeDuration)) } @@ -107,14 +88,14 @@ func (pm *State) SetIsBackup(isBackup bool) { // fallbackNextViewID return the next view ID and duration when there is an exception // to calculate the time-based viewId func (consensus *Consensus) fallbackNextViewID() (uint64, time.Duration) { - diff := int64(consensus.GetViewChangingID() + 1 - consensus.GetCurBlockViewID()) + diff := int64(consensus.getViewChangingID() + 1 - consensus.getCurBlockViewID()) if diff <= 0 { diff = int64(1) } consensus.getLogger().Error(). Int64("diff", diff). Msg("[fallbackNextViewID] use legacy viewID algorithm") - return consensus.GetViewChangingID() + 1, time.Duration(diff * diff * int64(viewChangeDuration)) + return consensus.getViewChangingID() + 1, time.Duration(diff * diff * int64(viewChangeDuration)) } // getNextViewID return the next view ID based on the timestamp @@ -171,8 +152,9 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) { func (consensus *Consensus) getNextLeaderKey(viewID uint64) *bls.PublicKeyWrapper { gap := 1 - if viewID > consensus.GetCurBlockViewID() { - gap = int(viewID - consensus.GetCurBlockViewID()) + cur := consensus.getCurBlockViewID() + if viewID > cur { + gap = int(viewID - cur) } var lastLeaderPubKey *bls.PublicKeyWrapper var err error @@ -214,17 +196,23 @@ func (consensus *Consensus) getNextLeaderKey(viewID uint64) *bls.PublicKeyWrappe Str("leaderPubKey", consensus.LeaderPubKey.Bytes.Hex()). Int("gap", gap). Uint64("newViewID", viewID). - Uint64("myCurBlockViewID", consensus.GetCurBlockViewID()). + Uint64("myCurBlockViewID", consensus.getCurBlockViewID()). Msg("[getNextLeaderKey] got leaderPubKey from coinbase") // wasFound, next := consensus.Decider.NthNext(lastLeaderPubKey, gap) // FIXME: rotate leader on harmony nodes only before fully externalization var wasFound bool var next *bls.PublicKeyWrapper - if blockchain != nil && blockchain.Config().IsAllowlistEpoch(epoch) { - wasFound, next = consensus.Decider.NthNextHmyExt( - shard.Schedule.InstanceForEpoch(epoch), - lastLeaderPubKey, - gap) + if blockchain != nil && blockchain.Config().IsLeaderRotation(epoch) { + if consensus.ShardID == shard.BeaconChainShardID { + wasFound, next = consensus.Decider.NthNextHmy( + shard.Schedule.InstanceForEpoch(epoch), + lastLeaderPubKey, + gap) + } else { + wasFound, next = consensus.Decider.NthNext( + lastLeaderPubKey, + gap) + } } else { wasFound, next = consensus.Decider.NthNextHmy( shard.Schedule.InstanceForEpoch(epoch), @@ -252,30 +240,26 @@ func createTimeout() map[TimeoutType]*utils.Timeout { // startViewChange start the view change process func (consensus *Consensus) startViewChange() { - if consensus.disableViewChange || consensus.IsBackup() { + if consensus.disableViewChange || consensus.isBackup { return } - consensus.mutex.Lock() - defer consensus.mutex.Unlock() consensus.consensusTimeout[timeoutConsensus].Stop() consensus.consensusTimeout[timeoutBootstrap].Stop() consensus.current.SetMode(ViewChanging) nextViewID, duration := consensus.getNextViewID() - consensus.SetViewChangingID(nextViewID) + consensus.setViewChangingID(nextViewID) // TODO: set the Leader PubKey to the next leader for view change // this is dangerous as the leader change is not succeeded yet // we use it this way as in many code we validate the messages // aganist the consensus.LeaderPubKey variable. // Ideally, we shall use another variable to keep track of the // leader pubkey in viewchange mode - consensus.pubKeyLock.Lock() consensus.LeaderPubKey = consensus.getNextLeaderKey(nextViewID) - consensus.pubKeyLock.Unlock() consensus.getLogger().Warn(). Uint64("nextViewID", nextViewID). - Uint64("viewChangingID", consensus.GetViewChangingID()). + Uint64("viewChangingID", consensus.getViewChangingID()). Dur("timeoutDuration", duration). Str("NextLeader", consensus.LeaderPubKey.Bytes.Hex()). Msg("[startViewChange]") @@ -292,7 +276,7 @@ func (consensus *Consensus) startViewChange() { if err := consensus.vc.InitPayload( consensus.FBFTLog, nextViewID, - consensus.BlockNum(), + consensus.getBlockNum(), consensus.priKey, members); err != nil { consensus.getLogger().Error().Err(err).Msg("[startViewChange] Init Payload Error") @@ -301,12 +285,12 @@ func (consensus *Consensus) startViewChange() { // for view change, send separate view change per public key // do not do multi-sign of view change message for _, key := range consensus.priKey { - if !consensus.IsValidatorInCommittee(key.Pub.Bytes) { + if !consensus.isValidatorInCommittee(key.Pub.Bytes) { continue } msgToSend := consensus.constructViewChangeMessage(&key) if err := consensus.msgSender.SendWithRetry( - consensus.BlockNum(), + consensus.getBlockNum(), msg_pb.MessageType_VIEWCHANGE, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, @@ -320,7 +304,7 @@ func (consensus *Consensus) startViewChange() { // startNewView stops the current view change func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.PrivateKeyWrapper, reset bool) error { - if !consensus.IsViewChangingMode() { + if !consensus.isViewChangingMode() { return errors.New("not in view changing mode anymore") } @@ -332,7 +316,7 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri } if err := consensus.msgSender.SendWithRetry( - consensus.BlockNum(), + consensus.getBlockNum(), msg_pb.MessageType_NEWVIEW, []nodeconfig.GroupID{ nodeconfig.NewGroupIDByShardID(nodeconfig.ShardID(consensus.ShardID))}, @@ -349,8 +333,8 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri consensus.current.SetMode(Normal) consensus.consensusTimeout[timeoutViewChange].Stop() - consensus.SetViewIDs(viewID) - consensus.ResetViewChangeState() + consensus.setViewIDs(viewID) + consensus.resetViewChangeState() consensus.consensusTimeout[timeoutConsensus].Start() consensus.getLogger().Info(). @@ -360,18 +344,15 @@ func (consensus *Consensus) startNewView(viewID uint64, newLeaderPriKey *bls.Pri // TODO: consider make ResetState unified and only called in one place like finalizeCommit() if reset { - consensus.ResetState() + consensus.resetState() } - consensus.LeaderPubKey = newLeaderPriKey.Pub + consensus.setLeaderPubKey(newLeaderPriKey.Pub) return nil } // onViewChange is called when the view change message is received. func (consensus *Consensus) onViewChange(recvMsg *FBFTMessage) { - consensus.mutex.Lock() - defer consensus.mutex.Unlock() - consensus.getLogger().Debug(). Uint64("viewID", recvMsg.ViewID). Uint64("blockNum", recvMsg.BlockNum). @@ -380,7 +361,7 @@ func (consensus *Consensus) onViewChange(recvMsg *FBFTMessage) { // if not leader, noop newLeaderKey := recvMsg.LeaderPubkey - newLeaderPriKey, err := consensus.GetLeaderPrivateKey(newLeaderKey.Object) + newLeaderPriKey, err := consensus.getLeaderPrivateKey(newLeaderKey.Object) if err != nil { consensus.getLogger().Debug(). Err(err). @@ -433,7 +414,7 @@ func (consensus *Consensus) onViewChange(recvMsg *FBFTMessage) { } // received enough view change messages, change state to normal consensus - if consensus.Decider.IsQuorumAchievedByMask(consensus.vc.GetViewIDBitmap(recvMsg.ViewID)) && consensus.IsViewChangingMode() { + if consensus.Decider.IsQuorumAchievedByMask(consensus.vc.GetViewIDBitmap(recvMsg.ViewID)) && consensus.isViewChangingMode() { // no previous prepared message, go straight to normal mode // and start proposing new block if consensus.vc.IsM1PayloadEmpty() { @@ -468,9 +449,6 @@ func (consensus *Consensus) onViewChange(recvMsg *FBFTMessage) { // Or the validator will enter announce phase to wait for the new block proposed // from the new leader func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { - consensus.mutex.Lock() - defer consensus.mutex.Unlock() - consensus.getLogger().Info(). Uint64("viewID", recvMsg.ViewID). Uint64("blockNum", recvMsg.BlockNum). @@ -478,10 +456,10 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { Msg("[onNewView] Received NewView Message") // change view and leaderKey to keep in sync with network - if consensus.BlockNum() != recvMsg.BlockNum { + if consensus.getBlockNum() != recvMsg.BlockNum { consensus.getLogger().Warn(). Uint64("MsgBlockNum", recvMsg.BlockNum). - Uint64("myBlockNum", consensus.BlockNum()). + Uint64("myBlockNum", consensus.getBlockNum()). Msg("[onNewView] Invalid block number") return } @@ -515,7 +493,7 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { utils.CountOneBits(m3Mask.Bitmap) > utils.CountOneBits(m2Mask.Bitmap)) { // m1 is not empty, check it's valid blockHash := recvMsg.Payload[:32] - aggSig, mask, err := consensus.ReadSignatureBitmapPayload(recvMsg.Payload, 32) + aggSig, mask, err := consensus.readSignatureBitmapPayload(recvMsg.Payload, 32, consensus.Decider.Participants()) if err != nil { consensus.getLogger().Error().Err(err). Msg("[onNewView] ReadSignatureBitmapPayload Failed") @@ -549,7 +527,7 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { } } - if !consensus.IsViewChangingMode() { + if !consensus.isViewChangingMode() { consensus.getLogger().Info().Msg("Not in ViewChanging Mode.") return } @@ -557,11 +535,9 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { consensus.consensusTimeout[timeoutViewChange].Stop() // newView message verified success, override my state - consensus.SetViewIDs(recvMsg.ViewID) - consensus.pubKeyLock.Lock() + consensus.setViewIDs(recvMsg.ViewID) consensus.LeaderPubKey = senderKey - consensus.pubKeyLock.Unlock() - consensus.ResetViewChangeState() + consensus.resetViewChangeState() consensus.msgSender.StopRetry(msg_pb.MessageType_VIEWCHANGE) @@ -570,7 +546,7 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { consensus.sendCommitMessages(preparedBlock) consensus.switchPhase("onNewView", FBFTCommit) } else { - consensus.ResetState() + consensus.resetState() consensus.getLogger().Info().Msg("onNewView === announce") } consensus.getLogger().Info(). @@ -582,6 +558,13 @@ func (consensus *Consensus) onNewView(recvMsg *FBFTMessage) { // ResetViewChangeState resets the view change structure func (consensus *Consensus) ResetViewChangeState() { + consensus.mutex.Lock() + defer consensus.mutex.Unlock() + consensus.resetViewChangeState() +} + +// ResetViewChangeState resets the view change structure +func (consensus *Consensus) resetViewChangeState() { consensus.getLogger().Info(). Str("Phase", consensus.phase.String()). Msg("[ResetViewChangeState] Resetting view change state") diff --git a/consensus/view_change_construct.go b/consensus/view_change_construct.go index b818507eb..52ea8bbd7 100644 --- a/consensus/view_change_construct.go +++ b/consensus/view_change_construct.go @@ -87,7 +87,7 @@ func (vc *viewChange) AddViewIDKeyIfNotExist(viewID uint64, members multibls.Pub } } -// Reset reset the state for viewchange +// Reset resets the state for viewChange. func (vc *viewChange) Reset() { vc.m1Payload = []byte{} vc.bhpSigs = map[uint64]map[string]*bls_core.Sign{} diff --git a/consensus/view_change_msg.go b/consensus/view_change_msg.go index be1974105..c24145030 100644 --- a/consensus/view_change_msg.go +++ b/consensus/view_change_msg.go @@ -23,8 +23,8 @@ func (consensus *Consensus) constructViewChangeMessage(priKey *bls.PrivateKeyWra Type: msg_pb.MessageType_VIEWCHANGE, Request: &msg_pb.Message_Viewchange{ Viewchange: &msg_pb.ViewChangeRequest{ - ViewId: consensus.GetViewChangingID(), - BlockNum: consensus.BlockNum(), + ViewId: consensus.getViewChangingID(), + BlockNum: consensus.getBlockNum(), ShardId: consensus.ShardID, SenderPubkey: priKey.Pub.Bytes[:], LeaderPubkey: consensus.LeaderPubKey.Bytes[:], @@ -33,7 +33,7 @@ func (consensus *Consensus) constructViewChangeMessage(priKey *bls.PrivateKeyWra } preparedMsgs := consensus.FBFTLog.GetMessagesByTypeSeq( - msg_pb.MessageType_PREPARED, consensus.BlockNum(), + msg_pb.MessageType_PREPARED, consensus.getBlockNum(), ) preparedMsg := consensus.FBFTLog.FindMessageByMaxViewID(preparedMsgs) @@ -45,7 +45,7 @@ func (consensus *Consensus) constructViewChangeMessage(priKey *bls.PrivateKeyWra Interface("preparedMsg", preparedMsg). Msg("[constructViewChangeMessage] found prepared msg") if block != nil { - if err := consensus.VerifyBlock(block); err == nil { + if err := consensus.verifyBlock(block); err == nil { tmpEncoded, err := rlp.EncodeToBytes(block) if err != nil { consensus.getLogger().Err(err).Msg("[constructViewChangeMessage] Failed encoding block") @@ -83,7 +83,7 @@ func (consensus *Consensus) constructViewChangeMessage(priKey *bls.PrivateKeyWra } viewIDBytes := make([]byte, 8) - binary.LittleEndian.PutUint64(viewIDBytes, consensus.GetViewChangingID()) + binary.LittleEndian.PutUint64(viewIDBytes, consensus.getViewChangingID()) sign1 := priKey.Pri.SignHash(viewIDBytes) if sign1 != nil { vcMsg.ViewidSig = sign1.Serialize() @@ -107,7 +107,7 @@ func (consensus *Consensus) constructNewViewMessage(viewID uint64, priKey *bls.P Request: &msg_pb.Message_Viewchange{ Viewchange: &msg_pb.ViewChangeRequest{ ViewId: viewID, - BlockNum: consensus.BlockNum(), + BlockNum: consensus.getBlockNum(), ShardId: consensus.ShardID, SenderPubkey: priKey.Pub.Bytes[:], }, diff --git a/consensus/view_change_test.go b/consensus/view_change_test.go index fc80b6ccf..2d149b6b7 100644 --- a/consensus/view_change_test.go +++ b/consensus/view_change_test.go @@ -43,7 +43,7 @@ func TestPhaseSwitching(t *testing.T) { _, _, consensus, _, err := GenerateConsensusForTesting() assert.NoError(t, err) - assert.Equal(t, FBFTAnnounce, consensus.phase.Get()) // It's a new consensus, we should be at the FBFTAnnounce phase. + assert.Equal(t, FBFTAnnounce, consensus.phase) // It's a new consensus, we should be at the FBFTAnnounce phase. switches := []phaseSwitch{ {start: FBFTAnnounce, end: FBFTPrepare}, @@ -73,10 +73,10 @@ func TestPhaseSwitching(t *testing.T) { func testPhaseGroupSwitching(t *testing.T, consensus *Consensus, phases []FBFTPhase, startPhase FBFTPhase, desiredPhase FBFTPhase) { for range phases { consensus.switchPhase("test", desiredPhase) - assert.Equal(t, desiredPhase, consensus.phase.Get()) + assert.Equal(t, desiredPhase, consensus.phase) } - assert.Equal(t, desiredPhase, consensus.phase.Get()) + assert.Equal(t, desiredPhase, consensus.phase) return } diff --git a/core/blockchain.go b/core/blockchain.go index 1c7ea43d3..fda483165 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -13,6 +13,7 @@ import ( "github.com/harmony-one/harmony/core/state" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/core/vm" + "github.com/harmony-one/harmony/crypto/bls" harmonyconfig "github.com/harmony-one/harmony/internal/configs/harmony" "github.com/harmony-one/harmony/internal/params" "github.com/harmony-one/harmony/internal/tikv/redis_helper" @@ -334,6 +335,8 @@ type BlockChain interface { state *state.DB, ) (status WriteStatus, err error) + GetLeaderPubKeyFromCoinbase(h *block.Header) (*bls.PublicKeyWrapper, error) + // ========== Only For Tikv Start ========== // return true if is tikv writer master diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 73f37f862..8b0683bd6 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -39,6 +39,7 @@ import ( "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" + bls2 "github.com/harmony-one/bls/ffi/go/bls" "github.com/harmony-one/harmony/block" consensus_engine "github.com/harmony-one/harmony/consensus/engine" "github.com/harmony-one/harmony/consensus/reward" @@ -47,6 +48,7 @@ import ( "github.com/harmony-one/harmony/core/state" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/core/vm" + "github.com/harmony-one/harmony/crypto/bls" harmonyconfig "github.com/harmony-one/harmony/internal/configs/harmony" "github.com/harmony-one/harmony/internal/params" "github.com/harmony-one/harmony/internal/tikv" @@ -101,7 +103,6 @@ const ( maxFutureBlocks = 16 maxTimeFutureBlocks = 30 badBlockLimit = 10 - triesInMemory = 128 triesInRedis = 1000 shardCacheLimit = 10 commitsCacheLimit = 10 @@ -125,6 +126,7 @@ type CacheConfig struct { Disabled bool // Whether to disable trie write caching (archive node) TrieNodeLimit int // Memory limit (MB) at which to flush the current in-memory trie to disk TrieTimeLimit time.Duration // Time limit after which to flush the current in-memory trie to disk + TriesInMemory uint64 // Block number from the head stored in disk before exiting } type BlockChainImpl struct { @@ -181,6 +183,7 @@ type BlockChainImpl struct { validatorListByDelegatorCache *lru.Cache // Cache of validator list by delegator pendingCrossLinksCache *lru.Cache // Cache of last pending crosslinks blockAccumulatorCache *lru.Cache // Cache of block accumulators + leaderPubKeyFromCoinbase *lru.Cache // Cache of leader public key from coinbase quit chan struct{} // blockchain quit channel running int32 // running must be called atomically blockchainPruner *blockchainPruner // use to prune beacon chain @@ -220,12 +223,7 @@ func newBlockChainWithOptions( db ethdb.Database, stateCache state.Database, beaconChain BlockChain, cacheConfig *CacheConfig, chainConfig *params.ChainConfig, engine consensus_engine.Engine, vmConfig vm.Config, options Options) (*BlockChainImpl, error) { - if cacheConfig == nil { - cacheConfig = &CacheConfig{ - TrieNodeLimit: 256 * 1024 * 1024, - TrieTimeLimit: 2 * time.Minute, - } - } + bodyCache, _ := lru.New(bodyCacheLimit) bodyRLPCache, _ := lru.New(bodyCacheLimit) receiptsCache, _ := lru.New(receiptsCacheLimit) @@ -242,6 +240,7 @@ func newBlockChainWithOptions( validatorListByDelegatorCache, _ := lru.New(validatorListByDelegatorCacheLimit) pendingCrossLinksCache, _ := lru.New(pendingCrossLinksCacheLimit) blockAccumulatorCache, _ := lru.New(blockAccumulatorCacheLimit) + leaderPubKeyFromCoinbase, _ := lru.New(chainConfig.LeaderRotationBlocksCount + 2) bc := &BlockChainImpl{ chainConfig: chainConfig, @@ -265,6 +264,7 @@ func newBlockChainWithOptions( validatorListByDelegatorCache: validatorListByDelegatorCache, pendingCrossLinksCache: pendingCrossLinksCache, blockAccumulatorCache: blockAccumulatorCache, + leaderPubKeyFromCoinbase: leaderPubKeyFromCoinbase, blockchainPruner: newBlockchainPruner(db), engine: engine, vmConfig: vmConfig, @@ -1071,11 +1071,11 @@ func (bc *BlockChainImpl) Stop() { // We're writing three different states to catch different restart scenarios: // - HEAD: So we don't need to reprocess any blocks in the general case // - HEAD-1: So we don't do large reorgs if our HEAD becomes an uncle - // - HEAD-127: So we have a hard limit on the number of blocks reexecuted + // - HEAD-TriesInMemory: So we have a configurable hard limit on the number of blocks reexecuted (default 128) if !bc.cacheConfig.Disabled { triedb := bc.stateCache.TrieDB() - for _, offset := range []uint64{0, 1, triesInMemory - 1} { + for _, offset := range []uint64{0, 1, bc.cacheConfig.TriesInMemory - 1} { if number := bc.CurrentBlock().NumberU64(); number > offset { recent := bc.GetHeaderByNumber(number - offset) if recent != nil { @@ -1402,7 +1402,7 @@ func (bc *BlockChainImpl) WriteBlockWithState( triedb.Reference(root, common.Hash{}) // metadata reference to keep trie alive bc.triegc.Push(root, -int64(block.NumberU64())) - if current := block.NumberU64(); current > triesInMemory { + if current := block.NumberU64(); current > bc.cacheConfig.TriesInMemory { // If we exceeded our memory allowance, flush matured singleton nodes to disk var ( nodes, imgs = triedb.Size() @@ -1412,7 +1412,7 @@ func (bc *BlockChainImpl) WriteBlockWithState( triedb.Cap(limit - ethdb.IdealBatchSize) } // Find the next state trie we need to commit - header := bc.GetHeaderByNumber(current - triesInMemory) + header := bc.GetHeaderByNumber(current - bc.cacheConfig.TriesInMemory) if header != nil { chosen := header.Number().Uint64() @@ -1420,11 +1420,11 @@ func (bc *BlockChainImpl) WriteBlockWithState( if bc.gcproc > bc.cacheConfig.TrieTimeLimit { // If we're exceeding limits but haven't reached a large enough memory gap, // warn the user that the system is becoming unstable. - if chosen < lastWrite+triesInMemory && bc.gcproc >= 2*bc.cacheConfig.TrieTimeLimit { + if chosen < lastWrite+bc.cacheConfig.TriesInMemory && bc.gcproc >= 2*bc.cacheConfig.TrieTimeLimit { utils.Logger().Info(). Dur("time", bc.gcproc). Dur("allowance", bc.cacheConfig.TrieTimeLimit). - Float64("optimum", float64(chosen-lastWrite)/triesInMemory). + Float64("optimum", float64(chosen-lastWrite)/float64(bc.cacheConfig.TriesInMemory)). Msg("State in memory for too long, committing") } // Flush an entire trie and restart the counters @@ -3256,6 +3256,60 @@ func (bc *BlockChainImpl) SuperCommitteeForNextEpoch( return nextCommittee, err } +// GetLeaderPubKeyFromCoinbase retrieve corresponding blsPublicKey from Coinbase Address +func (bc *BlockChainImpl) GetLeaderPubKeyFromCoinbase(h *block.Header) (*bls.PublicKeyWrapper, error) { + if cached, ok := bc.leaderPubKeyFromCoinbase.Get(h.Number().Uint64()); ok { + return cached.(*bls.PublicKeyWrapper), nil + } + rs, err := bc.getLeaderPubKeyFromCoinbase(h) + if err != nil { + return nil, err + } + bc.leaderPubKeyFromCoinbase.Add(h.Number().Uint64(), rs) + return rs, nil +} + +// getLeaderPubKeyFromCoinbase retrieve corresponding blsPublicKey from Coinbase Address +func (bc *BlockChainImpl) getLeaderPubKeyFromCoinbase(h *block.Header) (*bls.PublicKeyWrapper, error) { + shardState, err := bc.ReadShardState(h.Epoch()) + if err != nil { + return nil, errors.Wrapf(err, "cannot read shard state %v %s", + h.Epoch(), + h.Coinbase().Hash().Hex(), + ) + } + + committee, err := shardState.FindCommitteeByID(h.ShardID()) + if err != nil { + return nil, err + } + + committerKey := new(bls2.PublicKey) + isStaking := bc.Config().IsStaking(h.Epoch()) + for _, member := range committee.Slots { + if isStaking { + // After staking the coinbase address will be the address of bls public key + if utils.GetAddressFromBLSPubKeyBytes(member.BLSPublicKey[:]) == h.Coinbase() { + if committerKey, err = bls.BytesToBLSPublicKey(member.BLSPublicKey[:]); err != nil { + return nil, err + } + return &bls.PublicKeyWrapper{Object: committerKey, Bytes: member.BLSPublicKey}, nil + } + } else { + if member.EcdsaAddress == h.Coinbase() { + if committerKey, err = bls.BytesToBLSPublicKey(member.BLSPublicKey[:]); err != nil { + return nil, err + } + return &bls.PublicKeyWrapper{Object: committerKey, Bytes: member.BLSPublicKey}, nil + } + } + } + return nil, errors.Errorf( + "cannot find corresponding BLS Public Key coinbase %s", + h.Coinbase().Hex(), + ) +} + func (bc *BlockChainImpl) EnablePruneBeaconChainFeature() { bc.pruneBeaconChainEnable = true } diff --git a/core/blockchain_stub.go b/core/blockchain_stub.go index ccb1c9847..f9e9111ea 100644 --- a/core/blockchain_stub.go +++ b/core/blockchain_stub.go @@ -13,6 +13,7 @@ import ( "github.com/harmony-one/harmony/core/state" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/core/vm" + "github.com/harmony-one/harmony/crypto/bls" harmonyconfig "github.com/harmony-one/harmony/internal/configs/harmony" "github.com/harmony-one/harmony/internal/params" "github.com/harmony-one/harmony/internal/tikv/redis_helper" @@ -403,6 +404,10 @@ func (a Stub) CommitOffChainData(batch rawdb.DatabaseWriter, block *types.Block, return 0, errors.Errorf("method CommitOffChainData not implemented for %s", a.Name) } +func (a Stub) GetLeaderPubKeyFromCoinbase(h *block.Header) (*bls.PublicKeyWrapper, error) { + return nil, errors.Errorf("method GetLeaderPubKeyFromCoinbase not implemented for %s", a.Name) +} + func (a Stub) IsTikvWriterMaster() bool { return false } diff --git a/core/state/statedb.go b/core/state/statedb.go index d571d3372..0a7cd3b56 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -542,7 +542,10 @@ func (db *DB) createObject(addr common.Address) (newobj, prev *Object) { db.journal.append(resetObjectChange{prev: prev}) } db.setStateObject(newobj) - return newobj, prev + if prev != nil && !prev.deleted { + return newobj, prev + } + return newobj, nil } // CreateAccount explicitly creates a state object. If a state object with the address diff --git a/core/vm/instructions.go b/core/vm/instructions.go index 2c1c3154a..091ba28ff 100644 --- a/core/vm/instructions.go +++ b/core/vm/instructions.go @@ -24,6 +24,7 @@ import ( "github.com/ethereum/go-ethereum/common/math" "github.com/harmony-one/harmony/core/types" "github.com/harmony-one/harmony/internal/params" + "github.com/harmony-one/harmony/shard" "golang.org/x/crypto/sha3" ) @@ -477,6 +478,16 @@ func opReturnDataCopy(pc *uint64, interpreter *EVMInterpreter, contract *Contrac func opExtCodeSize(pc *uint64, interpreter *EVMInterpreter, contract *Contract, memory *Memory, stack *Stack) ([]byte, error) { slot := stack.peek() + address := common.BigToAddress(slot) + fixValidatorCode := interpreter.evm.chainRules.IsValidatorCodeFix && + interpreter.evm.ShardID == shard.BeaconChainShardID && + interpreter.evm.StateDB.IsValidator(address) + if fixValidatorCode { + // https://github.com/ethereum/solidity/blob/develop/Changelog.md#081-2021-01-27 + // per this link,
.code.length calls extcodesize on the address so this fix will work + slot.SetUint64(0) + return nil, nil + } slot.SetUint64(uint64(interpreter.evm.StateDB.GetCodeSize(common.BigToAddress(slot)))) return nil, nil @@ -509,7 +520,17 @@ func opExtCodeCopy(pc *uint64, interpreter *EVMInterpreter, contract *Contract, codeOffset = stack.pop() length = stack.pop() ) - codeCopy := getDataBig(interpreter.evm.StateDB.GetCode(addr), codeOffset, length) + var code []byte + fixValidatorCode := interpreter.evm.chainRules.IsValidatorCodeFix && + interpreter.evm.ShardID == shard.BeaconChainShardID && + interpreter.evm.StateDB.IsValidator(addr) + if fixValidatorCode { + // for EOAs that are not validators, statedb returns nil + code = nil + } else { + code = interpreter.evm.StateDB.GetCode(addr) + } + codeCopy := getDataBig(code, codeOffset, length) memory.Set(memOffset.Uint64(), length.Uint64(), codeCopy) interpreter.intPool.put(memOffset, codeOffset, length) @@ -555,7 +576,14 @@ func opExtCodeHash(pc *uint64, interpreter *EVMInterpreter, contract *Contract, if interpreter.evm.StateDB.Empty(address) { slot.SetUint64(0) } else { - slot.SetBytes(interpreter.evm.StateDB.GetCodeHash(address).Bytes()) + fixValidatorCode := interpreter.evm.chainRules.IsValidatorCodeFix && + interpreter.evm.ShardID == shard.BeaconChainShardID && + interpreter.evm.StateDB.IsValidator(address) + if fixValidatorCode { + slot.SetBytes(emptyCodeHash.Bytes()) + } else { + slot.SetBytes(interpreter.evm.StateDB.GetCodeHash(address).Bytes()) + } } return nil, nil } diff --git a/go.mod b/go.mod index 901fc1f0e..18e16a0c0 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/harmony-one/harmony go 1.19 require ( - github.com/RoaringBitmap/roaring v1.2.1 + github.com/RoaringBitmap/roaring v1.2.3 github.com/VictoriaMetrics/fastcache v1.5.7 github.com/Workiva/go-datastructures v1.0.50 github.com/allegro/bigcache v1.2.1 @@ -71,6 +71,7 @@ require ( github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b github.com/ledgerwatch/erigon-lib v0.0.0-20221218022306-0f8fdd40c2db github.com/ledgerwatch/log/v3 v3.6.0 + github.com/libp2p/go-libp2p-core v0.20.1 ) require ( diff --git a/go.sum b/go.sum index 0b7ab4b96..2ebb5e94a 100644 --- a/go.sum +++ b/go.sum @@ -68,8 +68,8 @@ github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/OpenPeeDeeP/depguard v1.0.1 h1:VlW4R6jmBIv3/u1JNlawEvJMM4J+dPORPaZasQee8Us= github.com/OpenPeeDeeP/depguard v1.0.1/go.mod h1:xsIw86fROiiwelg+jB2uM9PiKihMMmUx/1V+TNhjQvM= -github.com/RoaringBitmap/roaring v1.2.1 h1:58/LJlg/81wfEHd5L9qsHduznOIhyv4qb1yWcSvVq9A= -github.com/RoaringBitmap/roaring v1.2.1/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= +github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= +github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VictoriaMetrics/fastcache v1.5.3/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE= github.com/VictoriaMetrics/fastcache v1.5.7 h1:4y6y0G8PRzszQUYIQHHssv/jgPHAb5qQuuDNdCbyAgw= @@ -594,6 +594,8 @@ github.com/libp2p/go-libp2p v0.24.0 h1:DQk/5bBon+yUVIGTeRVBmOYpZzoBHx/VTC0xoLgJG github.com/libp2p/go-libp2p v0.24.0/go.mod h1:28t24CYDlnBs23rIs1OclU89YbhgibrBq2LFbMe+cFw= github.com/libp2p/go-libp2p-asn-util v0.2.0 h1:rg3+Os8jbnO5DxkC7K/Utdi+DkY3q/d1/1q+8WeNAsw= github.com/libp2p/go-libp2p-asn-util v0.2.0/go.mod h1:WoaWxbHKBymSN41hWSq/lGKJEca7TNm58+gGJi2WsLI= +github.com/libp2p/go-libp2p-core v0.20.1 h1:fQz4BJyIFmSZAiTbKV8qoYhEH5Dtv/cVhZbG3Ib/+Cw= +github.com/libp2p/go-libp2p-core v0.20.1/go.mod h1:6zR8H7CvQWgYLsbG4on6oLNSGcyKaYFSEYyDt51+bIY= github.com/libp2p/go-libp2p-kad-dht v0.19.0 h1:2HuiInHZTm9ZvQajaqdaPLHr0PCKKigWiflakimttE0= github.com/libp2p/go-libp2p-kad-dht v0.19.0/go.mod h1:qPIXdiZsLczhV4/+4EO1jE8ae0YCW4ZOogc4WVIyTEU= github.com/libp2p/go-libp2p-kbucket v0.5.0 h1:g/7tVm8ACHDxH29BGrpsQlnNeu+6OF1A9bno/4/U1oA= diff --git a/hmy/downloader/const.go b/hmy/downloader/const.go index ac292d974..a6cafb918 100644 --- a/hmy/downloader/const.go +++ b/hmy/downloader/const.go @@ -35,7 +35,8 @@ type ( // Only run stream sync protocol as a server. // TODO: remove this when stream sync is fully up. ServerOnly bool - + // use staged sync + Staged bool // parameters Network nodeconfig.NetworkType Concurrency int // Number of concurrent sync requests diff --git a/hmy/downloader/downloader.go b/hmy/downloader/downloader.go index f5ab8580b..01ec242ab 100644 --- a/hmy/downloader/downloader.go +++ b/hmy/downloader/downloader.go @@ -44,15 +44,16 @@ type ( ) // NewDownloader creates a new downloader -func NewDownloader(host p2p.Host, bc core.BlockChain, config Config) *Downloader { +func NewDownloader(host p2p.Host, bc core.BlockChain, isBeaconNode bool, config Config) *Downloader { config.fixValues() sp := sync.NewProtocol(sync.Config{ - Chain: bc, - Host: host.GetP2PHost(), - Discovery: host.GetDiscovery(), - ShardID: nodeconfig.ShardID(bc.ShardID()), - Network: config.Network, + Chain: bc, + Host: host.GetP2PHost(), + Discovery: host.GetDiscovery(), + ShardID: nodeconfig.ShardID(bc.ShardID()), + Network: config.Network, + BeaconNode: isBeaconNode, SmSoftLowCap: config.SmSoftLowCap, SmHardLowCap: config.SmHardLowCap, diff --git a/hmy/downloader/downloaders.go b/hmy/downloader/downloaders.go index a1cbfa5a8..528e47e84 100644 --- a/hmy/downloader/downloaders.go +++ b/hmy/downloader/downloaders.go @@ -17,7 +17,7 @@ type Downloaders struct { // NewDownloaders creates Downloaders for sync of multiple blockchains func NewDownloaders(host p2p.Host, bcs []core.BlockChain, config Config) *Downloaders { ds := make(map[uint32]*Downloader) - + isBeaconNode := len(bcs) == 1 for _, bc := range bcs { if bc == nil { continue @@ -25,7 +25,7 @@ func NewDownloaders(host p2p.Host, bcs []core.BlockChain, config Config) *Downlo if _, ok := ds[bc.ShardID()]; ok { continue } - ds[bc.ShardID()] = NewDownloader(host, bc, config) + ds[bc.ShardID()] = NewDownloader(host, bc, isBeaconNode, config) } return &Downloaders{ ds: ds, diff --git a/hmy/downloader/longrange.go b/hmy/downloader/longrange.go index b4efbf192..4d4935b8f 100644 --- a/hmy/downloader/longrange.go +++ b/hmy/downloader/longrange.go @@ -149,9 +149,8 @@ func (lsi *lrSyncIter) fetchAndInsertBlocks(targetBN uint64) error { worker := &getBlocksWorker{ gbm: gbm, protocol: lsi.p, - ctx: lsi.ctx, } - go worker.workLoop() + go worker.workLoop(lsi.ctx) } // insert the blocks to chain. Return when the target block number is reached. @@ -243,28 +242,26 @@ func (lsi *lrSyncIter) checkHaveEnoughStreams() error { type getBlocksWorker struct { gbm *getBlocksManager protocol syncProtocol - - ctx context.Context } -func (w *getBlocksWorker) workLoop() { +func (w *getBlocksWorker) workLoop(ctx context.Context) { for { select { - case <-w.ctx.Done(): + case <-ctx.Done(): return default: } batch := w.gbm.GetNextBatch() if len(batch) == 0 { select { - case <-w.ctx.Done(): + case <-ctx.Done(): return case <-time.After(100 * time.Millisecond): continue } } - blocks, stid, err := w.doBatch(batch) + blocks, stid, err := w.doBatch(ctx, batch) if err != nil { if !errors.Is(err, context.Canceled) { w.protocol.RemoveStream(stid) @@ -277,8 +274,8 @@ func (w *getBlocksWorker) workLoop() { } } -func (w *getBlocksWorker) doBatch(bns []uint64) ([]*types.Block, sttypes.StreamID, error) { - ctx, cancel := context.WithTimeout(w.ctx, 10*time.Second) +func (w *getBlocksWorker) doBatch(ctx context.Context, bns []uint64) ([]*types.Block, sttypes.StreamID, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() blocks, stid, err := w.protocol.GetBlocksByNumber(ctx, bns) diff --git a/hmy/downloader/shortrange.go b/hmy/downloader/shortrange.go index f1f6e402c..8276911d4 100644 --- a/hmy/downloader/shortrange.go +++ b/hmy/downloader/shortrange.go @@ -20,7 +20,7 @@ import ( // doShortRangeSync does the short range sync. // Compared with long range sync, short range sync is more focused on syncing to the latest block. // It consist of 3 steps: -// 1. Obtain the block hashes and ompute the longest hash chain.. +// 1. Obtain the block hashes and compute the longest hash chain.. // 2. Get blocks by hashes from computed hash chain. // 3. Insert the blocks to blockchain. func (d *Downloader) doShortRangeSync() (int, error) { diff --git a/internal/chain/engine.go b/internal/chain/engine.go index a340e9c27..6e018dccf 100644 --- a/internal/chain/engine.go +++ b/internal/chain/engine.go @@ -186,7 +186,7 @@ func (e *engineImpl) VerifyVRF( return nil } -// retrieve corresponding blsPublicKey from Coinbase Address +// GetLeaderPubKeyFromCoinbase retrieve corresponding blsPublicKey from Coinbase Address func GetLeaderPubKeyFromCoinbase( blockchain engine.ChainReader, h *block.Header, ) (*bls.PublicKeyWrapper, error) { diff --git a/internal/configs/harmony/harmony.go b/internal/configs/harmony/harmony.go index 90569b96f..d4e8df4b0 100644 --- a/internal/configs/harmony/harmony.go +++ b/internal/configs/harmony/harmony.go @@ -3,6 +3,10 @@ package harmony import ( "reflect" "strings" + "time" + + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/harmony-one/harmony/internal/utils" ) // HarmonyConfig contains all the configs user can set for running harmony binary. Served as the bridge @@ -32,6 +36,62 @@ type HarmonyConfig struct { ShardData ShardDataConfig } +func (hc HarmonyConfig) ToRPCServerConfig() nodeconfig.RPCServerConfig { + readTimeout, err := time.ParseDuration(hc.HTTP.ReadTimeout) + if err != nil { + readTimeout, _ = time.ParseDuration(nodeconfig.DefaultHTTPTimeoutRead) + utils.Logger().Warn(). + Str("provided", hc.HTTP.ReadTimeout). + Dur("updated", readTimeout). + Msg("Sanitizing invalid http read timeout") + } + writeTimeout, err := time.ParseDuration(hc.HTTP.WriteTimeout) + if err != nil { + writeTimeout, _ = time.ParseDuration(nodeconfig.DefaultHTTPTimeoutWrite) + utils.Logger().Warn(). + Str("provided", hc.HTTP.WriteTimeout). + Dur("updated", writeTimeout). + Msg("Sanitizing invalid http write timeout") + } + idleTimeout, err := time.ParseDuration(hc.HTTP.IdleTimeout) + if err != nil { + idleTimeout, _ = time.ParseDuration(nodeconfig.DefaultHTTPTimeoutIdle) + utils.Logger().Warn(). + Str("provided", hc.HTTP.IdleTimeout). + Dur("updated", idleTimeout). + Msg("Sanitizing invalid http idle timeout") + } + evmCallTimeout, err := time.ParseDuration(hc.RPCOpt.EvmCallTimeout) + if err != nil { + evmCallTimeout, _ = time.ParseDuration(nodeconfig.DefaultEvmCallTimeout) + utils.Logger().Warn(). + Str("provided", hc.RPCOpt.EvmCallTimeout). + Dur("updated", evmCallTimeout). + Msg("Sanitizing invalid evm_call timeout") + } + return nodeconfig.RPCServerConfig{ + HTTPEnabled: hc.HTTP.Enabled, + HTTPIp: hc.HTTP.IP, + HTTPPort: hc.HTTP.Port, + HTTPAuthPort: hc.HTTP.AuthPort, + HTTPTimeoutRead: readTimeout, + HTTPTimeoutWrite: writeTimeout, + HTTPTimeoutIdle: idleTimeout, + WSEnabled: hc.WS.Enabled, + WSIp: hc.WS.IP, + WSPort: hc.WS.Port, + WSAuthPort: hc.WS.AuthPort, + DebugEnabled: hc.RPCOpt.DebugEnabled, + EthRPCsEnabled: hc.RPCOpt.EthRPCsEnabled, + StakingRPCsEnabled: hc.RPCOpt.StakingRPCsEnabled, + LegacyRPCsEnabled: hc.RPCOpt.LegacyRPCsEnabled, + RpcFilterFile: hc.RPCOpt.RpcFilterFile, + RateLimiterEnabled: hc.RPCOpt.RateLimterEnabled, + RequestsPerSecond: hc.RPCOpt.RequestsPerSecond, + EvmCallTimeout: evmCallTimeout, + } +} + type DnsSync struct { Port int // replaces: Network.DNSSyncPort Zone string // replaces: Network.DNSZone @@ -46,14 +106,19 @@ type NetworkConfig struct { } type P2pConfig struct { - Port int - IP string - KeyFile string - DHTDataStore *string `toml:",omitempty"` - DiscConcurrency int // Discovery Concurrency value - MaxConnsPerIP int - DisablePrivateIPScan bool - MaxPeers int64 + Port int + IP string + KeyFile string + DHTDataStore *string `toml:",omitempty"` + DiscConcurrency int // Discovery Concurrency value + MaxConnsPerIP int + DisablePrivateIPScan bool + MaxPeers int64 + // In order to disable Connection Manager, it only needs to + // set both the high and low watermarks to zero. In this way, + // using Connection Manager will be an optional feature. + ConnManagerLowWatermark int + ConnManagerHighWatermark int WaitForEachPeerToConnect bool } @@ -69,6 +134,7 @@ type GeneralConfig struct { TraceEnable bool EnablePruneBeaconChain bool RunElasticMode bool + TriesInMemory int } type TiKVConfig struct { @@ -174,6 +240,9 @@ type HttpConfig struct { AuthPort int RosettaEnabled bool RosettaPort int + ReadTimeout string + WriteTimeout string + IdleTimeout string } type WsConfig struct { @@ -191,6 +260,7 @@ type RpcOptConfig struct { RpcFilterFile string // Define filters to enable/disable RPC exposure RateLimterEnabled bool // Enable Rate limiter for RPC RequestsPerSecond int // for RPC rate limiter + EvmCallTimeout string // Timeout for eth_call } type DevnetConfig struct { @@ -222,17 +292,18 @@ type PrometheusConfig struct { type SyncConfig struct { // TODO: Remove this bool after stream sync is fully up. - Enabled bool // enable the stream sync protocol - Downloader bool // start the sync downloader client - StagedSync bool // use staged sync - StagedSyncCfg StagedSyncConfig // staged sync configurations - Concurrency int // concurrency used for stream sync protocol - MinPeers int // minimum streams to start a sync task. - InitStreams int // minimum streams in bootstrap to start sync loop. - DiscSoftLowCap int // when number of streams is below this value, spin discover during check - DiscHardLowCap int // when removing stream, num is below this value, spin discovery immediately - DiscHighCap int // upper limit of streams in one sync protocol - DiscBatch int // size of each discovery + Enabled bool // enable the stream sync protocol + Downloader bool // start the sync downloader client + StagedSync bool // use staged sync + StagedSyncCfg StagedSyncConfig // staged sync configurations + Concurrency int // concurrency used for stream sync protocol + MinPeers int // minimum streams to start a sync task. + InitStreams int // minimum streams in bootstrap to start sync loop. + MaxAdvertiseWaitTime int // maximum time duration between advertisements + DiscSoftLowCap int // when number of streams is below this value, spin discover during check + DiscHardLowCap int // when removing stream, num is below this value, spin discovery immediately + DiscHighCap int // upper limit of streams in one sync protocol + DiscBatch int // size of each discovery } type StagedSyncConfig struct { diff --git a/internal/configs/harmony/harmony_test.go b/internal/configs/harmony/harmony_test.go new file mode 100644 index 000000000..fef7cac9d --- /dev/null +++ b/internal/configs/harmony/harmony_test.go @@ -0,0 +1,81 @@ +package harmony + +import ( + "fmt" + "testing" + "time" + + nodeconfig "github.com/harmony-one/harmony/internal/configs/node" + "github.com/stretchr/testify/assert" +) + +func TestToRPCServerConfig(t *testing.T) { + tests := []struct { + input HarmonyConfig + output nodeconfig.RPCServerConfig + }{ + { + input: HarmonyConfig{ + HTTP: HttpConfig{ + Enabled: true, + RosettaEnabled: false, + IP: "127.0.0.1", + Port: nodeconfig.DefaultRPCPort, + AuthPort: nodeconfig.DefaultAuthRPCPort, + RosettaPort: nodeconfig.DefaultRosettaPort, + ReadTimeout: "-1", + WriteTimeout: "-2", + IdleTimeout: "-3", + }, + WS: WsConfig{ + Enabled: true, + IP: "127.0.0.1", + Port: nodeconfig.DefaultWSPort, + AuthPort: nodeconfig.DefaultAuthWSPort, + }, + RPCOpt: RpcOptConfig{ + DebugEnabled: false, + EthRPCsEnabled: true, + StakingRPCsEnabled: true, + LegacyRPCsEnabled: true, + RpcFilterFile: "./.hmy/rpc_filter.txt", + RateLimterEnabled: true, + RequestsPerSecond: nodeconfig.DefaultRPCRateLimit, + EvmCallTimeout: "-4", + }, + }, + output: nodeconfig.RPCServerConfig{ + HTTPEnabled: true, + HTTPIp: "127.0.0.1", + HTTPPort: nodeconfig.DefaultRPCPort, + HTTPAuthPort: nodeconfig.DefaultAuthRPCPort, + HTTPTimeoutRead: 30 * time.Second, + HTTPTimeoutWrite: 30 * time.Second, + HTTPTimeoutIdle: 120 * time.Second, + WSEnabled: true, + WSIp: "127.0.0.1", + WSPort: nodeconfig.DefaultWSPort, + WSAuthPort: nodeconfig.DefaultAuthWSPort, + DebugEnabled: false, + EthRPCsEnabled: true, + StakingRPCsEnabled: true, + LegacyRPCsEnabled: true, + RpcFilterFile: "./.hmy/rpc_filter.txt", + RateLimiterEnabled: true, + RequestsPerSecond: nodeconfig.DefaultRPCRateLimit, + EvmCallTimeout: 5 * time.Second, + }, + }, + } + for i, tt := range tests { + assertObject := assert.New(t) + name := fmt.Sprintf("TestToRPCServerConfig: #%d", i) + t.Run(name, func(t *testing.T) { + assertObject.Equal( + tt.input.ToRPCServerConfig(), + tt.output, + name, + ) + }) + } +} diff --git a/internal/configs/node/config.go b/internal/configs/node/config.go index 9a0e950ec..9f681fca9 100644 --- a/internal/configs/node/config.go +++ b/internal/configs/node/config.go @@ -8,6 +8,7 @@ import ( "math/big" "strings" "sync" + "time" bls_core "github.com/harmony-one/bls/ffi/go/bls" "github.com/harmony-one/harmony/crypto/bls" @@ -115,6 +116,10 @@ type RPCServerConfig struct { HTTPPort int HTTPAuthPort int + HTTPTimeoutRead time.Duration + HTTPTimeoutWrite time.Duration + HTTPTimeoutIdle time.Duration + WSEnabled bool WSIp string WSPort int @@ -130,6 +135,8 @@ type RPCServerConfig struct { RateLimiterEnabled bool RequestsPerSecond int + + EvmCallTimeout time.Duration } // RosettaServerConfig is the config for the rosetta server diff --git a/internal/configs/node/network.go b/internal/configs/node/network.go index 332b5cce7..03f147212 100644 --- a/internal/configs/node/network.go +++ b/internal/configs/node/network.go @@ -51,6 +51,12 @@ const ( DefaultAuthRPCPort = 9501 // DefaultRosettaPort is the default rosetta port. The actual port used is 9000+700 DefaultRosettaPort = 9700 + // DefaultHTTP timeouts - read, write, and idle + DefaultHTTPTimeoutRead = "30s" + DefaultHTTPTimeoutWrite = "30s" + DefaultHTTPTimeoutIdle = "120s" + // DefaultEvmCallTimeout is the default timeout for evm call + DefaultEvmCallTimeout = "5s" // DefaultWSPort is the default port for web socket endpoint. The actual port used is DefaultWSPort = 9800 // DefaultAuthWSPort is the default port for web socket auth endpoint. The actual port used is @@ -63,7 +69,13 @@ const ( DefaultMaxConnPerIP = 10 // DefaultMaxPeers is the maximum number of remote peers, with 0 representing no limit DefaultMaxPeers = 0 - // DefaultWaitForEachPeerToConnect sets the sync configs to connect to neighbor peers one by one and waits for each peer to connect + // DefaultConnManagerLowWatermark is the lowest number of connections that'll be maintained in connection manager + DefaultConnManagerLowWatermark = 160 + // DefaultConnManagerHighWatermark is the highest number of connections that'll be maintained in connection manager + // When the peer count exceeds the 'high watermark', as many peers will be pruned (and + // their connections terminated) until 'low watermark' peers remain. + DefaultConnManagerHighWatermark = 192 + // DefaultWaitForEachPeerToConnect sets the sync configs to connect to neighbor peers one by one and waits for each peer to connect. DefaultWaitForEachPeerToConnect = false ) diff --git a/internal/configs/sharding/localnet.go b/internal/configs/sharding/localnet.go index 2d04bb649..a31c66f0d 100644 --- a/internal/configs/sharding/localnet.go +++ b/internal/configs/sharding/localnet.go @@ -23,8 +23,8 @@ const ( localnetV1Epoch = 1 localnetEpochBlock1 = 5 - localnetBlocksPerEpoch = 5 - localnetBlocksPerEpochV2 = 10 + localnetBlocksPerEpoch = 64 + localnetBlocksPerEpochV2 = 64 localnetVdfDifficulty = 5000 // This takes about 10s to finish the vdf ) diff --git a/internal/params/config.go b/internal/params/config.go index d24f4be7b..d90038630 100644 --- a/internal/params/config.go +++ b/internal/params/config.go @@ -70,6 +70,9 @@ var ( CrossShardXferPrecompileEpoch: big.NewInt(1323), // Around Wed 8 Feb 11:30PM UTC AllowlistEpoch: EpochTBD, FeeCollectEpoch: EpochTBD, + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 64, + ValidatorCodeFixEpoch: EpochTBD, } // TestnetChainConfig contains the chain parameters to run a node on the harmony test network. @@ -107,7 +110,10 @@ var ( ChainIdFixEpoch: big.NewInt(0), CrossShardXferPrecompileEpoch: big.NewInt(2), AllowlistEpoch: big.NewInt(2), + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 64, FeeCollectEpoch: EpochTBD, + ValidatorCodeFixEpoch: EpochTBD, } // PangaeaChainConfig contains the chain parameters for the Pangaea network. // All features except for CrossLink are enabled at launch. @@ -145,7 +151,10 @@ var ( SlotsLimitedEpoch: EpochTBD, // epoch to enable HIP-16 CrossShardXferPrecompileEpoch: big.NewInt(1), AllowlistEpoch: EpochTBD, + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 64, FeeCollectEpoch: EpochTBD, + ValidatorCodeFixEpoch: EpochTBD, } // PartnerChainConfig contains the chain parameters for the Partner network. @@ -185,6 +194,9 @@ var ( CrossShardXferPrecompileEpoch: big.NewInt(1), AllowlistEpoch: EpochTBD, FeeCollectEpoch: big.NewInt(574), + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 64, + ValidatorCodeFixEpoch: EpochTBD, } // StressnetChainConfig contains the chain parameters for the Stress test network. @@ -224,6 +236,9 @@ var ( CrossShardXferPrecompileEpoch: big.NewInt(1), AllowlistEpoch: EpochTBD, FeeCollectEpoch: EpochTBD, + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 64, + ValidatorCodeFixEpoch: EpochTBD, } // LocalnetChainConfig contains the chain parameters to run for local development. @@ -261,7 +276,10 @@ var ( SlotsLimitedEpoch: EpochTBD, // epoch to enable HIP-16 CrossShardXferPrecompileEpoch: big.NewInt(1), AllowlistEpoch: EpochTBD, + LeaderRotationEpoch: EpochTBD, + LeaderRotationBlocksCount: 5, FeeCollectEpoch: big.NewInt(5), + ValidatorCodeFixEpoch: EpochTBD, } // AllProtocolChanges ... @@ -301,7 +319,10 @@ var ( big.NewInt(0), // SlotsLimitedEpoch big.NewInt(1), // CrossShardXferPrecompileEpoch big.NewInt(0), // AllowlistEpoch + big.NewInt(1), // LeaderRotationEpoch + 64, // LeaderRotationBlocksCount big.NewInt(0), // FeeCollectEpoch + big.NewInt(0), // ValidatorCodeFixEpoch } // TestChainConfig ... @@ -341,7 +362,10 @@ var ( big.NewInt(0), // SlotsLimitedEpoch big.NewInt(1), // CrossShardXferPrecompileEpoch big.NewInt(0), // AllowlistEpoch + big.NewInt(1), // LeaderRotationEpoch + 64, // LeaderRotationBlocksCount big.NewInt(0), // FeeCollectEpoch + big.NewInt(0), // ValidatorCodeFixEpoch } // TestRules ... @@ -481,12 +505,23 @@ type ChainConfig struct { // AllowlistEpoch is the first epoch to support allowlist of HIP18 AllowlistEpoch *big.Int + LeaderRotationEpoch *big.Int `json:"leader-rotation-epoch,omitempty"` + + LeaderRotationBlocksCount int `json:"leader-rotation-blocks-count,omitempty"` + // FeeCollectEpoch is the first epoch that enables txn fees to be collected into the community-managed account. // It should >= StakingEpoch. // Before StakingEpoch, txn fees are paid to miner/leader. // Then before FeeCollectEpoch, txn fees are burned. // After FeeCollectEpoch, txn fees paid to FeeCollector account. FeeCollectEpoch *big.Int + + // ValidatorCodeFixEpoch is the first epoch that fixes the issue of validator code + // being available in Solidity. This is a temporary fix until we have a better + // solution. + // Contracts can check the (presence of) validator code by calling the following: + // extcodesize, extcodecopy and extcodehash. + ValidatorCodeFixEpoch *big.Int `json:"validator-code-fix-epoch,omitempty"` } // String implements the fmt.Stringer interface. @@ -521,6 +556,8 @@ func (c *ChainConfig) mustValid() { "must satisfy: StakingPrecompileEpoch >= PreStakingEpoch") require(c.CrossShardXferPrecompileEpoch.Cmp(c.CrossTxEpoch) > 0, "must satisfy: CrossShardXferPrecompileEpoch > CrossTxEpoch") + require(c.ValidatorCodeFixEpoch.Cmp(c.EthCompatibleEpoch) >= 0, + "must satisfy: ValidatorCodeFixEpoch >= EthCompatibleEpoch") } // IsEIP155 returns whether epoch is either equal to the EIP155 fork epoch or greater. @@ -687,11 +724,19 @@ func (c *ChainConfig) IsAllowlistEpoch(epoch *big.Int) bool { return isForked(c.AllowlistEpoch, epoch) } +func (c *ChainConfig) IsLeaderRotation(epoch *big.Int) bool { + return isForked(c.LeaderRotationEpoch, epoch) +} + // IsFeeCollectEpoch determines whether Txn Fees will be collected into the community-managed account. func (c *ChainConfig) IsFeeCollectEpoch(epoch *big.Int) bool { return isForked(c.FeeCollectEpoch, epoch) } +func (c *ChainConfig) IsValidatorCodeFix(epoch *big.Int) bool { + return isForked(c.ValidatorCodeFixEpoch, epoch) +} + // UpdateEthChainIDByShard update the ethChainID based on shard ID. func UpdateEthChainIDByShard(shardID uint32) { once.Do(func() { @@ -749,6 +794,7 @@ type Rules struct { IsStakingPrecompile, IsCrossShardXferPrecompile, // eip-155 chain id fix IsChainIdFix bool + IsValidatorCodeFix bool } // Rules ensures c's ChainID is not nil. @@ -773,5 +819,6 @@ func (c *ChainConfig) Rules(epoch *big.Int) Rules { IsStakingPrecompile: c.IsStakingPrecompile(epoch), IsCrossShardXferPrecompile: c.IsCrossShardXferPrecompile(epoch), IsChainIdFix: c.IsChainIdFix(epoch), + IsValidatorCodeFix: c.IsValidatorCodeFix(epoch), } } diff --git a/internal/shardchain/shardchains.go b/internal/shardchain/shardchains.go index 8f9b18595..1be2c6841 100644 --- a/internal/shardchain/shardchains.go +++ b/internal/shardchain/shardchains.go @@ -3,6 +3,7 @@ package shardchain import ( "math/big" "sync" + "time" "github.com/harmony-one/harmony/core/state" harmonyconfig "github.com/harmony-one/harmony/internal/configs/harmony" @@ -104,6 +105,15 @@ func (sc *CollectionImpl) ShardChain(shardID uint32, options ...core.Options) (c utils.Logger().Info(). Uint32("shardID", shardID). Msg("disable cache, running in archival mode") + } else { + cacheConfig = &core.CacheConfig{ + TrieNodeLimit: 256 * 1024 * 1024, + TrieTimeLimit: 2 * time.Minute, + TriesInMemory: 128, + } + if sc.harmonyconfig != nil { + cacheConfig.TriesInMemory = uint64(sc.harmonyconfig.General.TriesInMemory) + } } chainConfig := *sc.chainConfig diff --git a/internal/utils/singleton.go b/internal/utils/singleton.go index 6409ea71e..10101d767 100644 --- a/internal/utils/singleton.go +++ b/internal/utils/singleton.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path" + "strconv" "sync" "time" @@ -195,3 +196,19 @@ func updateZeroLogLevel(level int) { childLogger := Logger().Level(zeroLoggerLevel) zeroLogger = &childLogger } + +// GetPort is useful for debugging, returns `--port` flag provided to executable. +func GetPort() int { + ok := false + for _, x := range os.Args { + if x == "--port" { + ok = true + continue + } + if ok { + rs, _ := strconv.ParseInt(x, 10, 64) + return int(rs) + } + } + return 0 +} diff --git a/internal/utils/timer.go b/internal/utils/timer.go index 3502d68ec..2e8a77667 100644 --- a/internal/utils/timer.go +++ b/internal/utils/timer.go @@ -1,7 +1,6 @@ package utils import ( - "sync" "time" ) @@ -20,7 +19,6 @@ type Timeout struct { state TimeoutState d time.Duration start time.Time - mu sync.Mutex } // NewTimeout creates a new timeout class @@ -31,24 +29,18 @@ func NewTimeout(d time.Duration) *Timeout { // Start starts the timeout clock func (timeout *Timeout) Start() { - timeout.mu.Lock() timeout.state = Active timeout.start = time.Now() - timeout.mu.Unlock() } // Stop stops the timeout clock func (timeout *Timeout) Stop() { - timeout.mu.Lock() timeout.state = Inactive timeout.start = time.Now() - timeout.mu.Unlock() } // CheckExpire checks whether the timeout is reached/expired func (timeout *Timeout) CheckExpire() bool { - timeout.mu.Lock() - defer timeout.mu.Unlock() if timeout.state == Active && time.Since(timeout.start) > timeout.d { timeout.state = Expired } @@ -60,23 +52,17 @@ func (timeout *Timeout) CheckExpire() bool { // Duration returns the duration period of timeout func (timeout *Timeout) Duration() time.Duration { - timeout.mu.Lock() - defer timeout.mu.Unlock() return timeout.d } // SetDuration set new duration for the timer func (timeout *Timeout) SetDuration(nd time.Duration) { - timeout.mu.Lock() timeout.d = nd - timeout.mu.Unlock() } // IsActive checks whether timeout clock is active; // A timeout is active means it's not stopped caused by stop // and also not expired with time elapses longer than duration from start func (timeout *Timeout) IsActive() bool { - timeout.mu.Lock() - defer timeout.mu.Unlock() return timeout.state == Active } diff --git a/node/node.go b/node/node.go index bc80ef81a..13988e2ad 100644 --- a/node/node.go +++ b/node/node.go @@ -87,7 +87,7 @@ type ISync interface { UpdateBlockAndStatus(block *types.Block, bc core.BlockChain, verifyAllSig bool) error AddLastMileBlock(block *types.Block) GetActivePeerNumber() int - CreateSyncConfig(peers []p2p.Peer, shardID uint32, waitForEachPeerToConnect bool) error + CreateSyncConfig(peers []p2p.Peer, shardID uint32, selfPeerID libp2p_peer.ID, waitForEachPeerToConnect bool) error SyncLoop(bc core.BlockChain, worker *worker.Worker, isBeacon bool, consensus *consensus.Consensus, loopMinTime time.Duration) IsSynchronized() bool IsSameBlockchainHeight(bc core.BlockChain) (uint64, bool) @@ -100,7 +100,6 @@ type ISync interface { // Node represents a protocol-participating node in the network type Node struct { Consensus *consensus.Consensus // Consensus object containing all Consensus related data (e.g. committee members, signatures, commits) - BlockChannel chan *types.Block // The channel to send newly proposed blocks ConfirmedBlockChannel chan *types.Block // The channel to send confirmed blocks BeaconBlockChannel chan *types.Block // The channel to send beacon blocks for non-beaconchain nodes pendingCXReceipts map[string]*types.CXReceiptsProof // All the receipts received but not yet processed for Consensus @@ -127,9 +126,7 @@ type Node struct { serviceManager *service.Manager ContractDeployerCurrentNonce uint64 // The nonce of the deployer contract at current block ContractAddresses []common.Address - // Channel to notify consensus service to really start consensus - startConsensus chan struct{} - HarmonyConfig *harmonyconfig.HarmonyConfig + HarmonyConfig *harmonyconfig.HarmonyConfig // node configuration, including group ID, shard ID, etc NodeConfig *nodeconfig.ConfigType // Chain configuration. @@ -1032,6 +1029,7 @@ func New( unixTimeAtNodeStart: time.Now().Unix(), TransactionErrorSink: types.NewTransactionErrorSink(), crosslinks: crosslinks.New(), + syncID: GenerateSyncID(), } // Get the node config that's created in the harmony.go program. @@ -1042,7 +1040,6 @@ func New( } node.HarmonyConfig = harmonyconfig - copy(node.syncID[:], GenerateRandomString(SyncIDLength)) if host != nil { node.host = host node.SelfPeer = host.GetSelfPeer() @@ -1083,7 +1080,6 @@ func New( } } - node.BlockChannel = make(chan *types.Block) node.ConfirmedBlockChannel = make(chan *types.Block) node.BeaconBlockChannel = make(chan *types.Block) txPoolConfig := core.DefaultTxPoolConfig @@ -1132,7 +1128,6 @@ func New( Msg("Genesis block hash") // Setup initial state of syncing. node.peerRegistrationRecord = map[string]*syncConfig{} - node.startConsensus = make(chan struct{}) // Broadcast double-signers reported by consensus if node.Consensus != nil { go func() { diff --git a/node/node_handler.go b/node/node_handler.go index 3db4f8dea..3589d1966 100644 --- a/node/node_handler.go +++ b/node/node_handler.go @@ -3,6 +3,7 @@ package node import ( "bytes" "context" + "fmt" "math/rand" "time" @@ -360,9 +361,8 @@ func VerifyNewBlock(nodeConfig *nodeconfig.ConfigType, blockChain core.BlockChai } // PostConsensusProcessing is called by consensus participants, after consensus is done, to: -// 1. add the new block to blockchain -// 2. [leader] send new block to the client -// 3. [leader] send cross shard tx receipts to destination shard +// 1. [leader] send new block to the client +// 2. [leader] send cross shard tx receipts to destination shard func (node *Node) PostConsensusProcessing(newBlock *types.Block) error { if node.Consensus.IsLeader() { if node.IsRunningBeaconChain() { @@ -433,7 +433,7 @@ func (node *Node) PostConsensusProcessing(newBlock *types.Block) error { return nil } -// BootstrapConsensus is the a goroutine to check number of peers and start the consensus +// BootstrapConsensus is a goroutine to check number of peers and start the consensus func (node *Node) BootstrapConsensus() error { ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() @@ -447,6 +447,7 @@ func (node *Node) BootstrapConsensus() error { if numPeersNow >= min { utils.Logger().Info().Msg("[bootstrap] StartConsensus") enoughMinPeers <- struct{}{} + fmt.Println("Bootstrap consensus done.", numPeersNow, " peers are connected") return } utils.Logger().Info(). @@ -462,7 +463,7 @@ func (node *Node) BootstrapConsensus() error { return ctx.Err() case <-enoughMinPeers: go func() { - node.startConsensus <- struct{}{} + node.Consensus.StartChannel() }() return nil } diff --git a/node/node_newblock.go b/node/node_newblock.go index 03fd69d9d..5050e4d6a 100644 --- a/node/node_newblock.go +++ b/node/node_newblock.go @@ -104,7 +104,7 @@ func (node *Node) WaitForConsensusReadyV2(readySignal chan consensus.ProposalTyp // Send the new block to Consensus so it can be confirmed. node.proposedBlock[newBlock.NumberU64()] = newBlock delete(node.proposedBlock, newBlock.NumberU64()-10) - node.BlockChannel <- newBlock + node.Consensus.BlockChannel(newBlock) break } else { retryCount++ diff --git a/node/node_syncing.go b/node/node_syncing.go index d9a6c95b4..05ddd4b6b 100644 --- a/node/node_syncing.go +++ b/node/node_syncing.go @@ -5,10 +5,10 @@ import ( "math/rand" "net" "strconv" - "sync" "time" "github.com/harmony-one/harmony/internal/tikv" + "github.com/multiformats/go-multiaddr" prom "github.com/harmony-one/harmony/api/service/prometheus" "github.com/prometheus/client_golang/prometheus" @@ -22,11 +22,11 @@ import ( "github.com/harmony-one/harmony/api/service/legacysync" legdownloader "github.com/harmony-one/harmony/api/service/legacysync/downloader" downloader_pb "github.com/harmony-one/harmony/api/service/legacysync/downloader/proto" + "github.com/harmony-one/harmony/api/service/stagedstreamsync" "github.com/harmony-one/harmony/api/service/stagedsync" "github.com/harmony-one/harmony/api/service/synchronize" "github.com/harmony-one/harmony/core" "github.com/harmony-one/harmony/core/types" - "github.com/harmony-one/harmony/hmy/downloader" nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/node/worker" @@ -64,28 +64,11 @@ func GenerateRandomString(n int) string { return string(b) } -// getNeighborPeers is a helper function to return list of peers -// based on different neightbor map -func getNeighborPeers(neighbor *sync.Map) []p2p.Peer { - tmp := []p2p.Peer{} - neighbor.Range(func(k, v interface{}) bool { - p := v.(p2p.Peer) - t := p.Port - p.Port = legacysync.GetSyncingPort(t) - tmp = append(tmp, p) - return true - }) - return tmp -} - -// DoSyncWithoutConsensus gets sync-ed to blockchain without joining consensus -func (node *Node) DoSyncWithoutConsensus() { - go node.DoSyncing(node.Blockchain(), node.Worker, false) //Don't join consensus -} - -// IsSameHeight tells whether node is at same bc height as a peer -func (node *Node) IsSameHeight() (uint64, bool) { - return node.SyncInstance().IsSameBlockchainHeight(node.Blockchain()) +// GenerateSyncID generates a random string with given length +func GenerateSyncID() [SyncIDLength]byte { + var syncID [SyncIDLength]byte + copy(syncID[:], GenerateRandomString(SyncIDLength)) + return syncID } func (node *Node) createStateSync(bc core.BlockChain) *legacysync.StateSync { @@ -105,7 +88,8 @@ func (node *Node) createStateSync(bc core.BlockChain) *legacysync.StateSync { mutatedPort := strconv.Itoa(mySyncPort + legacysync.SyncingPortDifference) role := node.NodeConfig.Role() return legacysync.CreateStateSync(bc, node.SelfPeer.IP, mutatedPort, - node.GetSyncID(), node.NodeConfig.Role() == nodeconfig.ExplorerNode, role) + node.GetSyncID(), node.host.GetID(), + node.NodeConfig.Role() == nodeconfig.ExplorerNode, role) } func (node *Node) createStagedSync(bc core.BlockChain) *stagedsync.StagedSync { @@ -151,14 +135,16 @@ type SyncingPeerProvider interface { // DNSSyncingPeerProvider uses the given DNS zone to resolve syncing peers. type DNSSyncingPeerProvider struct { + selfAddrs []multiaddr.Multiaddr zone, port string lookupHost func(name string) (addrs []string, err error) } // NewDNSSyncingPeerProvider returns a provider that uses given DNS name and // port number to resolve syncing peers. -func NewDNSSyncingPeerProvider(zone, port string) *DNSSyncingPeerProvider { +func NewDNSSyncingPeerProvider(zone, port string, addrs []multiaddr.Multiaddr) *DNSSyncingPeerProvider { return &DNSSyncingPeerProvider{ + selfAddrs: addrs, zone: zone, port: port, lookupHost: net.LookupHost, @@ -174,11 +160,27 @@ func (p *DNSSyncingPeerProvider) SyncingPeers(shardID uint32) (peers []p2p.Peer, "[SYNC] cannot find peers using DNS name %#v", dns) } for _, addr := range addrs { + // no need to have peer itself on the list of connected peers + if p.getSelfAddrIndex(addr, p.port) >= 0 { + continue + } peers = append(peers, p2p.Peer{IP: addr, Port: p.port}) } return peers, nil } +// getSelfAddrIndex returns address index if it is one of self addresses +func (p *DNSSyncingPeerProvider) getSelfAddrIndex(IP string, Port string) int { + peerAddr4 := fmt.Sprintf("/ip4/%s/tcp/%s", IP, Port) + peerAddr6 := fmt.Sprintf("/ip6/%s/tcp/%s", IP, Port) + for addrIndex, addr := range p.selfAddrs { + if addr.String() == peerAddr4 || addr.String() == peerAddr6 { + return addrIndex + } + } + return -1 +} + // LocalSyncingPeerProvider uses localnet deployment convention to synthesize // syncing peers. type LocalSyncingPeerProvider struct { @@ -253,7 +255,8 @@ func (node *Node) doBeaconSyncing() { continue } - if err := node.epochSync.CreateSyncConfig(peers, shard.BeaconChainShardID, node.HarmonyConfig.P2P.WaitForEachPeerToConnect); err != nil { + if err := node.epochSync.CreateSyncConfig(peers, shard.BeaconChainShardID, node.host.GetID(), + node.HarmonyConfig.P2P.WaitForEachPeerToConnect); err != nil { utils.Logger().Warn().Err(err).Msg("[EPOCHSYNC] cannot create beacon sync config") continue } @@ -296,7 +299,7 @@ func (node *Node) doSync(bc core.BlockChain, worker *worker.Worker, willJoinCons Msg("cannot retrieve syncing peers") return } - if err := syncInstance.CreateSyncConfig(peers, shardID, node.HarmonyConfig.P2P.WaitForEachPeerToConnect); err != nil { + if err := syncInstance.CreateSyncConfig(peers, shardID, node.host.GetID(), node.HarmonyConfig.P2P.WaitForEachPeerToConnect); err != nil { utils.Logger().Warn(). Err(err). Interface("peers", peers). @@ -811,7 +814,7 @@ func (node *Node) legacySyncStatus(shardID uint32) (bool, uint64, uint64) { } } -// IsOutOfSync return whether the node is out of sync of the given hsardID +// IsOutOfSync return whether the node is out of sync of the given shardID func (node *Node) IsOutOfSync(shardID uint32) bool { ds := node.getDownloaders() if ds == nil || !ds.IsActive() { @@ -859,14 +862,36 @@ func (node *Node) SyncPeers() map[string]int { return res } -func (node *Node) getDownloaders() *downloader.Downloaders { - syncService := node.serviceManager.GetService(service.Synchronize) - if syncService == nil { - return nil - } - dsService, ok := syncService.(*synchronize.Service) - if !ok { - return nil +type Downloaders interface { + Start() + Close() + DownloadAsync(shardID uint32) + // GetShardDownloader(shardID uint32) *Downloader + NumPeers() map[uint32]int + SyncStatus(shardID uint32) (bool, uint64, uint64) + IsActive() bool +} + +func (node *Node) getDownloaders() Downloaders { + if node.NodeConfig.StagedSync { + syncService := node.serviceManager.GetService(service.StagedStreamSync) + if syncService == nil { + return nil + } + dsService, ok := syncService.(*stagedstreamsync.StagedStreamSyncService) + if !ok { + return nil + } + return dsService.Downloaders + } else { + syncService := node.serviceManager.GetService(service.Synchronize) + if syncService == nil { + return nil + } + dsService, ok := syncService.(*synchronize.Service) + if !ok { + return nil + } + return dsService.Downloaders } - return dsService.Downloaders } diff --git a/node/node_test.go b/node/node_test.go index a4f1af70c..49ba5d164 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -16,6 +16,7 @@ import ( "github.com/harmony-one/harmony/multibls" "github.com/harmony-one/harmony/p2p" "github.com/harmony-one/harmony/shard" + "github.com/multiformats/go-multiaddr" "github.com/stretchr/testify/assert" ) @@ -69,7 +70,8 @@ func TestNewNode(t *testing.T) { func TestDNSSyncingPeerProvider(t *testing.T) { t.Run("Happy", func(t *testing.T) { - p := NewDNSSyncingPeerProvider("example.com", "1234") + addrs := make([]multiaddr.Multiaddr, 0) + p := NewDNSSyncingPeerProvider("example.com", "1234", addrs) lookupCount := 0 lookupName := "" p.lookupHost = func(name string) (addrs []string, err error) { @@ -92,7 +94,8 @@ func TestDNSSyncingPeerProvider(t *testing.T) { } }) t.Run("LookupError", func(t *testing.T) { - p := NewDNSSyncingPeerProvider("example.com", "1234") + addrs := make([]multiaddr.Multiaddr, 0) + p := NewDNSSyncingPeerProvider("example.com", "1234", addrs) p.lookupHost = func(_ string) ([]string, error) { return nil, errors.New("omg") } diff --git a/node/service_setup.go b/node/service_setup.go index 89acaf8fb..a2518110c 100644 --- a/node/service_setup.go +++ b/node/service_setup.go @@ -14,7 +14,7 @@ func (node *Node) RegisterValidatorServices() { // Register consensus service. node.serviceManager.Register( service.Consensus, - consensus.New(node.BlockChannel, node.Consensus, node.startConsensus), + consensus.New(node.Consensus), ) // Register new block service. node.serviceManager.Register( diff --git a/p2p/discovery/discovery.go b/p2p/discovery/discovery.go index fb9591c26..53372edd6 100644 --- a/p2p/discovery/discovery.go +++ b/p2p/discovery/discovery.go @@ -5,6 +5,7 @@ import ( "time" "github.com/harmony-one/harmony/internal/utils" + dht "github.com/libp2p/go-libp2p-kad-dht" libp2p_dht "github.com/libp2p/go-libp2p-kad-dht" "github.com/libp2p/go-libp2p/core/discovery" libp2p_host "github.com/libp2p/go-libp2p/core/host" @@ -37,19 +38,8 @@ type dhtDiscovery struct { } // NewDHTDiscovery creates a new dhtDiscovery that implements Discovery interface. -func NewDHTDiscovery(host libp2p_host.Host, opt DHTConfig) (Discovery, error) { - opts, err := opt.getLibp2pRawOptions() - if err != nil { - return nil, err - } - ctx, cancel := context.WithCancel(context.Background()) - dht, err := libp2p_dht.New(ctx, host, opts...) - if err != nil { - cancel() - return nil, err - } +func NewDHTDiscovery(ctx context.Context, cancel context.CancelFunc, host libp2p_host.Host, dht *dht.IpfsDHT, opt DHTConfig) (Discovery, error) { d := libp2p_dis.NewRoutingDiscovery(dht) - logger := utils.Logger().With().Str("module", "discovery").Logger() return &dhtDiscovery{ dht: dht, diff --git a/p2p/discovery/discovery_test.go b/p2p/discovery/discovery_test.go index 6ea902375..f2c2f5b99 100644 --- a/p2p/discovery/discovery_test.go +++ b/p2p/discovery/discovery_test.go @@ -3,9 +3,11 @@ package discovery // TODO: test this module import ( + "context" "testing" "github.com/libp2p/go-libp2p" + dht "github.com/libp2p/go-libp2p-kad-dht" ) func TestNewDHTDiscovery(t *testing.T) { @@ -13,7 +15,13 @@ func TestNewDHTDiscovery(t *testing.T) { if err != nil { t.Fatal(err) } - _, err = NewDHTDiscovery(host, DHTConfig{}) + ctx, cancel := context.WithCancel(context.Background()) + var idht *dht.IpfsDHT + idht, err = dht.New(ctx, host) + if err != nil { + t.Fatal(err) + } + _, err = NewDHTDiscovery(ctx, cancel, host, idht, DHTConfig{}) if err != nil { t.Fatal(err) } diff --git a/p2p/discovery/option.go b/p2p/discovery/option.go index 0afe6b8a2..fff8eea20 100644 --- a/p2p/discovery/option.go +++ b/p2p/discovery/option.go @@ -5,6 +5,7 @@ import ( p2ptypes "github.com/harmony-one/harmony/p2p/types" badger "github.com/ipfs/go-ds-badger" + dht "github.com/libp2p/go-libp2p-kad-dht" libp2p_dht "github.com/libp2p/go-libp2p-kad-dht" ) @@ -14,10 +15,11 @@ type DHTConfig struct { BootNodes []string DataStoreFile *string // File path to store DHT data. Shall be only used for bootstrap nodes. DiscConcurrency int + DHT *dht.IpfsDHT } -// getLibp2pRawOptions get the raw libp2p options as a slice. -func (opt DHTConfig) getLibp2pRawOptions() ([]libp2p_dht.Option, error) { +// GetLibp2pRawOptions get the raw libp2p options as a slice. +func (opt DHTConfig) GetLibp2pRawOptions() ([]libp2p_dht.Option, error) { var opts []libp2p_dht.Option bootOption, err := getBootstrapOption(opt.BootNodes) @@ -40,6 +42,10 @@ func (opt DHTConfig) getLibp2pRawOptions() ([]libp2p_dht.Option, error) { opts = append(opts, libp2p_dht.Concurrency(opt.DiscConcurrency)) } + // TODO: to disable auto refresh to make sure there is no conflicts with protocol discovery functions + // it's not applicable for legacy sync + // opts = append(opts, libp2p_dht.DisableAutoRefresh()) + return opts, nil } diff --git a/p2p/discovery/option_test.go b/p2p/discovery/option_test.go index 747d7ca95..1829e77c8 100644 --- a/p2p/discovery/option_test.go +++ b/p2p/discovery/option_test.go @@ -58,7 +58,7 @@ func TestDHTOption_getLibp2pRawOptions(t *testing.T) { }, } for i, test := range tests { - opts, err := test.opt.getLibp2pRawOptions() + opts, err := test.opt.GetLibp2pRawOptions() if assErr := assertError(err, test.expErr); assErr != nil { t.Errorf("Test %v: %v", i, assErr) } diff --git a/p2p/host.go b/p2p/host.go index a2326c812..9395e1df4 100644 --- a/p2p/host.go +++ b/p2p/host.go @@ -12,13 +12,21 @@ import ( "time" "github.com/libp2p/go-libp2p" + "github.com/libp2p/go-libp2p-core/host" + "github.com/libp2p/go-libp2p-core/routing" + dht "github.com/libp2p/go-libp2p-kad-dht" libp2p_pubsub "github.com/libp2p/go-libp2p-pubsub" + libp2p_config "github.com/libp2p/go-libp2p/config" libp2p_crypto "github.com/libp2p/go-libp2p/core/crypto" libp2p_host "github.com/libp2p/go-libp2p/core/host" libp2p_network "github.com/libp2p/go-libp2p/core/network" libp2p_peer "github.com/libp2p/go-libp2p/core/peer" libp2p_peerstore "github.com/libp2p/go-libp2p/core/peerstore" "github.com/libp2p/go-libp2p/core/protocol" + "github.com/libp2p/go-libp2p/p2p/net/connmgr" + + "github.com/libp2p/go-libp2p/p2p/security/noise" + libp2ptls "github.com/libp2p/go-libp2p/p2p/security/tls" ma "github.com/multiformats/go-multiaddr" "github.com/pkg/errors" "github.com/rs/zerolog" @@ -88,7 +96,10 @@ type HostConfig struct { MaxConnPerIP int DisablePrivateIPScan bool MaxPeers int64 + ConnManagerLowWatermark int + ConnManagerHighWatermark int WaitForEachPeerToConnect bool + ForceReachabilityPublic bool } func init() { @@ -111,34 +122,91 @@ func NewHost(cfg HostConfig) (Host, error) { self = cfg.Self key = cfg.BLSKey ) - listenAddr, err := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/tcp/%s", self.IP, self.Port)) - if err != nil { - return nil, errors.Wrapf(err, - "cannot create listen multiaddr from port %#v", self.Port) - } + + addr := fmt.Sprintf("/ip4/%s/tcp/%s", self.IP, self.Port) + listenAddr := libp2p.ListenAddrStrings( + addr, // regular tcp connections + addr+"/quic", // a UDP endpoint for the QUIC transport + ) ctx, cancel := context.WithCancel(context.Background()) - p2pHost, err := libp2p.New( - libp2p.ListenAddrs(listenAddr), + + // create connection manager + low := cfg.ConnManagerLowWatermark + high := cfg.ConnManagerHighWatermark + if high < low { + cancel() + utils.Logger().Error(). + Int("low", cfg.ConnManagerLowWatermark). + Int("high", cfg.ConnManagerHighWatermark). + Msg("connection manager watermarks are invalid") + return nil, errors.New("invalid connection manager watermarks") + } + + // prepare host options + var idht *dht.IpfsDHT + var opt discovery.DHTConfig + p2pHostConfig := []libp2p.Option{ + listenAddr, libp2p.Identity(key), + // Support TLS connections + libp2p.Security(libp2ptls.ID, libp2ptls.New), + // Support noise connections + libp2p.Security(noise.ID, noise.New), + // Support any other default transports (TCP) + libp2p.DefaultTransports, + // Prevent the peer from having too many + // connections by attaching a connection manager. + connectionManager(low, high), + // Attempt to open ports using uPNP for NATed hosts. + libp2p.NATPortMap(), + libp2p.Routing(func(h host.Host) (routing.PeerRouting, error) { + opt = discovery.DHTConfig{ + BootNodes: cfg.BootNodes, + DataStoreFile: cfg.DataStoreFile, + DiscConcurrency: cfg.DiscConcurrency, + } + opts, err := opt.GetLibp2pRawOptions() + if err != nil { + return nil, err + } + idht, err = dht.New(ctx, h, opts...) + return idht, err + }), + // To help other peers to figure out if they are behind + // NATs, launch the server-side of AutoNAT too (AutoRelay + // already runs the client) + // This service is highly rate-limited and should not cause any + // performance issues. libp2p.EnableNATService(), - libp2p.ForceReachabilityPublic(), + // Bandwidth Reporter libp2p.BandwidthReporter(newCounter()), - // prevent dialing of public addresses - libp2p.ConnectionGater(NewGater(cfg.DisablePrivateIPScan)), - ) + // Enable relay service, to disable relay we can use libp2p.DisableRelay() + libp2p.EnableRelayService(), + } + + if cfg.ForceReachabilityPublic { + // ForceReachabilityPublic overrides automatic reachability detection in the AutoNAT subsystem, + // forcing the local node to believe it is reachable externally + p2pHostConfig = append(p2pHostConfig, libp2p.ForceReachabilityPublic()) + } + + if cfg.DisablePrivateIPScan { + // Prevent dialing of public addresses + p2pHostConfig = append(p2pHostConfig, libp2p.ConnectionGater(NewGater(cfg.DisablePrivateIPScan))) + } + + // create p2p host + p2pHost, err := libp2p.New(p2pHostConfig...) if err != nil { cancel() return nil, errors.Wrapf(err, "cannot initialize libp2p host") } - disc, err := discovery.NewDHTDiscovery(p2pHost, discovery.DHTConfig{ - BootNodes: cfg.BootNodes, - DataStoreFile: cfg.DataStoreFile, - DiscConcurrency: cfg.DiscConcurrency, - }) + disc, err := discovery.NewDHTDiscovery(ctx, cancel, p2pHost, idht, opt) if err != nil { cancel() + p2pHost.Close() return nil, errors.Wrap(err, "cannot create DHT discovery") } @@ -179,6 +247,7 @@ func NewHost(cfg HostConfig) (Host, error) { pubsub, err := libp2p_pubsub.NewGossipSub(ctx, p2pHost, options...) if err != nil { cancel() + p2pHost.Close() return nil, errors.Wrapf(err, "cannot initialize libp2p pub-sub") } @@ -210,6 +279,33 @@ func NewHost(cfg HostConfig) (Host, error) { return h, nil } +// connectionManager creates a new connection manager and configures libp2p to use the +// given connection manager. +// lo and hi are watermarks governing the number of connections that'll be maintained. +// When the peer count exceeds the 'high watermark', as many peers will be pruned (and +// their connections terminated) until 'low watermark' peers remain. +func connectionManager(low int, high int) libp2p_config.Option { + if low > 0 && high > low { + connmgr, err := connmgr.NewConnManager( + low, // Low Watermark + high, // High Watermark + connmgr.WithGracePeriod(time.Minute), + ) + if err != nil { + utils.Logger().Error(). + Err(err). + Int("low", low). + Int("high", high). + Msg("create connection manager failed") + return nil + } + return libp2p.ConnectionManager(connmgr) + } + return func(p2pConfig *libp2p_config.Config) error { + return nil + } +} + // HostV2 is the version 2 p2p host type HostV2 struct { h libp2p_host.Host @@ -277,6 +373,10 @@ func (host *HostV2) AddStreamProtocol(protocols ...sttypes.Protocol) { for _, proto := range protocols { host.streamProtos = append(host.streamProtos, proto) host.h.SetStreamHandlerMatch(protocol.ID(proto.ProtoID()), proto.Match, proto.HandleStream) + // TODO: do we need to add handler match for shard proto id? + // if proto.IsBeaconNode() { + // host.h.SetStreamHandlerMatch(protocol.ID(proto.ShardProtoID()), proto.Match, proto.HandleStream) + // } } } diff --git a/p2p/stream/common/requestmanager/interface_test.go b/p2p/stream/common/requestmanager/interface_test.go index fe163164d..c51303ccb 100644 --- a/p2p/stream/common/requestmanager/interface_test.go +++ b/p2p/stream/common/requestmanager/interface_test.go @@ -114,6 +114,18 @@ func (st *testStream) CloseOnExit() error { return nil } +func (st *testStream) FailedTimes() int { + return 0 +} + +func (st *testStream) AddFailedTimes() { + return +} + +func (st *testStream) ResetFailedTimes() { + return +} + func makeDummyTestStreams(indexes []int) []sttypes.Stream { sts := make([]sttypes.Stream, 0, len(indexes)) diff --git a/p2p/stream/common/streammanager/cooldown.go b/p2p/stream/common/streammanager/cooldown.go index 0f837c01d..ef6d21bc9 100644 --- a/p2p/stream/common/streammanager/cooldown.go +++ b/p2p/stream/common/streammanager/cooldown.go @@ -2,6 +2,7 @@ package streammanager import ( "container/list" + "sync" "time" "github.com/libp2p/go-libp2p/core/peer" @@ -14,6 +15,7 @@ const ( ) type coolDownCache struct { + mu sync.Mutex timeCache *timecache.TimeCache } @@ -26,15 +28,26 @@ func newCoolDownCache() *coolDownCache { // Has check and add the peer ID to the cache func (cache *coolDownCache) Has(id peer.ID) bool { + cache.mu.Lock() + defer cache.mu.Unlock() + has := cache.timeCache.Has(string(id)) + return has +} + +// Add adds the peer ID to the cache +func (cache *coolDownCache) Add(id peer.ID) { + cache.mu.Lock() + defer cache.mu.Unlock() has := cache.timeCache.Has(string(id)) if !has { cache.timeCache.Add(string(id)) } - return has } // Reset the cool down cache func (cache *coolDownCache) Reset() { + cache.mu.Lock() + defer cache.mu.Unlock() cache.timeCache.Q = list.New() cache.timeCache.M = make(map[string]time.Time) } diff --git a/p2p/stream/common/streammanager/interface_test.go b/p2p/stream/common/streammanager/interface_test.go index fc280b47e..5a9bb4436 100644 --- a/p2p/stream/common/streammanager/interface_test.go +++ b/p2p/stream/common/streammanager/interface_test.go @@ -17,7 +17,7 @@ var _ StreamManager = &streamManager{} var ( myPeerID = makePeerID(0) - testProtoID = sttypes.ProtoID("harmony/sync/unitest/0/1.0.0") + testProtoID = sttypes.ProtoID("harmony/sync/unitest/0/1.0.0/1") ) const ( @@ -70,6 +70,18 @@ func (st *testStream) ReadBytes() ([]byte, error) { return nil, nil } +func (st *testStream) FailedTimes() int { + return 0 +} + +func (st *testStream) AddFailedTimes() { + return +} + +func (st *testStream) ResetFailedTimes() { + return +} + func (st *testStream) Close() error { if st.closed { return errors.New("already closed") diff --git a/p2p/stream/common/streammanager/streammanager.go b/p2p/stream/common/streammanager/streammanager.go index 4b3d19b92..e57919a76 100644 --- a/p2p/stream/common/streammanager/streammanager.go +++ b/p2p/stream/common/streammanager/streammanager.go @@ -10,6 +10,7 @@ import ( "github.com/harmony-one/abool" "github.com/harmony-one/harmony/internal/utils" sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/harmony-one/harmony/shard" "github.com/libp2p/go-libp2p/core/network" libp2p_peer "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" @@ -73,6 +74,12 @@ func newStreamManager(pid sttypes.ProtoID, host host, pf peerFinder, handleStrea protoSpec, _ := sttypes.ProtoIDToProtoSpec(pid) + // if it is a beacon node or shard node, print the peer id and proto id + if protoSpec.BeaconNode || protoSpec.ShardID != shard.BeaconChainShardID { + fmt.Println("My peer id: ", host.ID().String()) + fmt.Println("My proto id: ", pid) + } + return &streamManager{ myProtoID: pid, myProtoSpec: protoSpec, @@ -304,16 +311,23 @@ func (sm *streamManager) discoverAndSetupStream(discCtx context.Context) (int, e connecting := 0 for peer := range peers { - if peer.ID == sm.host.ID() || sm.coolDownCache.Has(peer.ID) { + if peer.ID == sm.host.ID() { + continue + } + if sm.coolDownCache.Has(peer.ID) { // If the peer has the same ID and was just connected, skip. continue } + if _, ok := sm.streams.get(sttypes.StreamID(peer.ID)); ok { + continue + } discoveredPeersCounterVec.With(prometheus.Labels{"topic": string(sm.myProtoID)}).Inc() connecting += 1 go func(pid libp2p_peer.ID) { // The ctx here is using the module context instead of discover context err := sm.setupStreamWithPeer(sm.ctx, pid) if err != nil { + sm.coolDownCache.Add(pid) sm.logger.Warn().Err(err).Str("peerID", string(pid)).Msg("failed to setup stream with peer") return } @@ -323,7 +337,7 @@ func (sm *streamManager) discoverAndSetupStream(discCtx context.Context) (int, e } func (sm *streamManager) discover(ctx context.Context) (<-chan libp2p_peer.AddrInfo, error) { - protoID := string(sm.myProtoID) + protoID := sm.targetProtoID() discBatch := sm.config.DiscBatch if sm.config.HiCap-sm.streams.size() < sm.config.DiscBatch { discBatch = sm.config.HiCap - sm.streams.size() @@ -340,6 +354,14 @@ func (sm *streamManager) discover(ctx context.Context) (<-chan libp2p_peer.AddrI return sm.pf.FindPeers(ctx2, protoID, discBatch) } +func (sm *streamManager) targetProtoID() string { + targetSpec := sm.myProtoSpec + if targetSpec.ShardID == shard.BeaconChainShardID { // for beacon chain, only connect to beacon nodes + targetSpec.BeaconNode = true + } + return string(targetSpec.ToProtoID()) +} + func (sm *streamManager) setupStreamWithPeer(ctx context.Context, pid libp2p_peer.ID) error { timer := prometheus.NewTimer(setupStreamDuration.With(prometheus.Labels{"topic": string(sm.myProtoID)})) defer timer.ObserveDuration() @@ -347,7 +369,7 @@ func (sm *streamManager) setupStreamWithPeer(ctx context.Context, pid libp2p_pee nCtx, cancel := context.WithTimeout(ctx, connectTimeout) defer cancel() - st, err := sm.host.NewStream(nCtx, pid, protocol.ID(sm.myProtoID)) + st, err := sm.host.NewStream(nCtx, pid, protocol.ID(sm.targetProtoID())) if err != nil { return err } @@ -392,6 +414,10 @@ func (ss *streamSet) get(id sttypes.StreamID) (sttypes.Stream, bool) { ss.lock.RLock() defer ss.lock.RUnlock() + if id == "" { + return nil, false + } + st, ok := ss.streams[id] return st, ok } @@ -400,9 +426,13 @@ func (ss *streamSet) addStream(st sttypes.Stream) { ss.lock.Lock() defer ss.lock.Unlock() - ss.streams[st.ID()] = st - spec, _ := st.ProtoSpec() - ss.numByProto[spec]++ + if spec, err := st.ProtoSpec(); err != nil { + return + } else { + ss.streams[st.ID()] = st + ss.numByProto[spec]++ + } + } func (ss *streamSet) deleteStream(st sttypes.Stream) { diff --git a/p2p/stream/common/streammanager/streammanager_test.go b/p2p/stream/common/streammanager/streammanager_test.go index 5d82c8585..2b49a5f16 100644 --- a/p2p/stream/common/streammanager/streammanager_test.go +++ b/p2p/stream/common/streammanager/streammanager_test.go @@ -209,7 +209,7 @@ func TestStreamSet_numStreamsWithMinProtoID(t *testing.T) { pid1 = testProtoID numPid1 = 5 - pid2 = sttypes.ProtoID("harmony/sync/unitest/0/1.0.1") + pid2 = sttypes.ProtoID("harmony/sync/unitest/0/1.0.1/1") numPid2 = 10 ) diff --git a/p2p/stream/protocols/sync/client.go b/p2p/stream/protocols/sync/client.go index 2523ef4f7..7ad479885 100644 --- a/p2p/stream/protocols/sync/client.go +++ b/p2p/stream/protocols/sync/client.go @@ -43,6 +43,36 @@ func (p *Protocol) GetBlocksByNumber(ctx context.Context, bns []uint64, opts ... return } +func (p *Protocol) GetRawBlocksByNumber(ctx context.Context, bns []uint64, opts ...Option) (blockBytes [][]byte, sigBytes [][]byte, stid sttypes.StreamID, err error) { + timer := p.doMetricClientRequest("getBlocksByNumber") + defer p.doMetricPostClientRequest("getBlocksByNumber", err, timer) + + if len(bns) == 0 { + err = fmt.Errorf("zero block numbers requested") + return + } + if len(bns) > GetBlocksByNumAmountCap { + err = fmt.Errorf("number of blocks exceed cap of %v", GetBlocksByNumAmountCap) + return + } + req := newGetBlocksByNumberRequest(bns) + resp, stid, err := p.rm.DoRequest(ctx, req, opts...) + if err != nil { + // At this point, error can be context canceled, context timed out, or waiting queue + // is already full. + return + } + + // Parse and return blocks + sResp, ok := resp.(*syncResponse) + if !ok || sResp == nil { + err = errors.New("not sync response") + return + } + blockBytes, sigBytes, err = req.parseBlockBytesAndSigs(sResp) + return +} + // GetCurrentBlockNumber get the current block number from remote node func (p *Protocol) GetCurrentBlockNumber(ctx context.Context, opts ...Option) (bn uint64, stid sttypes.StreamID, err error) { timer := p.doMetricClientRequest("getBlockNumber") diff --git a/p2p/stream/protocols/sync/const.go b/p2p/stream/protocols/sync/const.go index f536d4a78..1e1fc612f 100644 --- a/p2p/stream/protocols/sync/const.go +++ b/p2p/stream/protocols/sync/const.go @@ -17,6 +17,9 @@ const ( // See comments for GetBlocksByNumAmountCap. GetBlocksByHashesAmountCap = 10 + // MaxStreamFailures is the maximum allowed failures before stream gets removed + MaxStreamFailures = 3 + // minAdvertiseInterval is the minimum advertise interval minAdvertiseInterval = 1 * time.Minute diff --git a/p2p/stream/protocols/sync/protocol.go b/p2p/stream/protocols/sync/protocol.go index facdf601d..80ea0927b 100644 --- a/p2p/stream/protocols/sync/protocol.go +++ b/p2p/stream/protocols/sync/protocol.go @@ -2,6 +2,7 @@ package sync import ( "context" + "fmt" "strconv" "time" @@ -15,6 +16,7 @@ import ( "github.com/harmony-one/harmony/p2p/stream/common/requestmanager" "github.com/harmony-one/harmony/p2p/stream/common/streammanager" sttypes "github.com/harmony-one/harmony/p2p/stream/types" + "github.com/harmony-one/harmony/shard" "github.com/hashicorp/go-version" libp2p_host "github.com/libp2p/go-libp2p/core/host" libp2p_network "github.com/libp2p/go-libp2p/core/network" @@ -39,12 +41,13 @@ var ( type ( // Protocol is the protocol for sync streaming Protocol struct { - chain engine.ChainReader // provide SYNC data - schedule shardingconfig.Schedule // provide schedule information - rl ratelimiter.RateLimiter // limit the incoming request rate - sm streammanager.StreamManager // stream management - rm requestmanager.RequestManager // deliver the response from stream - disc discovery.Discovery + chain engine.ChainReader // provide SYNC data + beaconNode bool // is beacon node or shard chain node + schedule shardingconfig.Schedule // provide schedule information + rl ratelimiter.RateLimiter // limit the incoming request rate + sm streammanager.StreamManager // stream management + rm requestmanager.RequestManager // deliver the response from stream + disc discovery.Discovery config Config logger zerolog.Logger @@ -56,12 +59,13 @@ type ( // Config is the sync protocol config Config struct { - Chain engine.ChainReader - Host libp2p_host.Host - Discovery discovery.Discovery - ShardID nodeconfig.ShardID - Network nodeconfig.NetworkType - + Chain engine.ChainReader + Host libp2p_host.Host + Discovery discovery.Discovery + ShardID nodeconfig.ShardID + Network nodeconfig.NetworkType + BeaconNode bool + MaxAdvertiseWaitTime int // stream manager config SmSoftLowCap int SmHardLowCap int @@ -75,12 +79,13 @@ func NewProtocol(config Config) *Protocol { ctx, cancel := context.WithCancel(context.Background()) sp := &Protocol{ - chain: config.Chain, - disc: config.Discovery, - config: config, - ctx: ctx, - cancel: cancel, - closeC: make(chan struct{}), + chain: config.Chain, + beaconNode: config.BeaconNode, + disc: config.Discovery, + config: config, + ctx: ctx, + cancel: cancel, + closeC: make(chan struct{}), } smConfig := streammanager.Config{ SoftLoCap: config.SmSoftLowCap, @@ -104,7 +109,10 @@ func (p *Protocol) Start() { p.sm.Start() p.rm.Start() p.rl.Start() - go p.advertiseLoop() + // If it's not EpochChain, advertise + if p.beaconNode || p.chain.ShardID() != shard.BeaconChainShardID { + go p.advertiseLoop() + } } // Close close the protocol @@ -126,11 +134,21 @@ func (p *Protocol) ProtoID() sttypes.ProtoID { return p.protoIDByVersion(MyVersion) } +// ShardProtoID returns the ProtoID of the sync protocol for shard nodes +func (p *Protocol) ShardProtoID() sttypes.ProtoID { + return p.protoIDByVersionForShardNodes(MyVersion) +} + // Version returns the sync protocol version func (p *Protocol) Version() *version.Version { return MyVersion } +// IsBeaconNode returns true if it is a beacon chain node +func (p *Protocol) IsBeaconNode() bool { + return p.beaconNode +} + // Match checks the compatibility to the target protocol ID. func (p *Protocol) Match(targetID string) bool { target, err := sttypes.ProtoIDToProtoSpec(sttypes.ProtoID(targetID)) @@ -162,16 +180,21 @@ func (p *Protocol) HandleStream(raw libp2p_network.Stream) { Msg("failed to add new stream") return } + fmt.Println("Connected to", raw.Conn().RemotePeer().String(), "(", st.ProtoID(), ")") st.run() } func (p *Protocol) advertiseLoop() { for { sleep := p.advertise() + maxSleepTime := time.Duration(p.config.MaxAdvertiseWaitTime) * time.Minute + if sleep > maxSleepTime { + sleep = maxSleepTime + } select { - case <-time.After(sleep): case <-p.closeC: return + case <-time.After(sleep): } } } @@ -205,6 +228,11 @@ func (p *Protocol) supportedProtoIDs() []sttypes.ProtoID { pids := make([]sttypes.ProtoID, 0, len(vs)) for _, v := range vs { pids = append(pids, p.protoIDByVersion(v)) + // beacon node needs to inform shard nodes about it supports them as well for EpochChain + // basically beacon node can accept connection from shard nodes to share last epoch blocks + if p.beaconNode { + pids = append(pids, p.protoIDByVersionForShardNodes(v)) + } } return pids } @@ -219,18 +247,51 @@ func (p *Protocol) protoIDByVersion(v *version.Version) sttypes.ProtoID { NetworkType: p.config.Network, ShardID: p.config.ShardID, Version: v, + BeaconNode: p.beaconNode, + } + return spec.ToProtoID() +} + +func (p *Protocol) protoIDByVersionForShardNodes(v *version.Version) sttypes.ProtoID { + spec := sttypes.ProtoSpec{ + Service: serviceSpecifier, + NetworkType: p.config.Network, + ShardID: p.config.ShardID, + Version: v, + BeaconNode: false, } return spec.ToProtoID() } // RemoveStream removes the stream of the given stream ID +// TODO: add reason to parameters func (p *Protocol) RemoveStream(stID sttypes.StreamID) { - if stID == "" { - return - } st, exist := p.sm.GetStreamByID(stID) if exist && st != nil { + //TODO: log this incident with reason st.Close() + // stream manager removes this stream from the list and triggers discovery if number of streams are not enough + p.sm.RemoveStream(stID) //TODO: double check to see if this part is needed + } +} + +func (p *Protocol) StreamFailed(stID sttypes.StreamID, reason string) { + st, exist := p.sm.GetStreamByID(stID) + if exist && st != nil { + st.AddFailedTimes() + p.logger.Info(). + Str("stream ID", string(st.ID())). + Int("num failures", st.FailedTimes()). + Str("reason", reason). + Msg("stream failed") + if st.FailedTimes() >= MaxStreamFailures { + st.Close() + // stream manager removes this stream from the list and triggers discovery if number of streams are not enough + p.sm.RemoveStream(stID) //TODO: double check to see if this part is needed + p.logger.Warn(). + Str("stream ID", string(st.ID())). + Msg("stream removed") + } } } diff --git a/p2p/stream/protocols/sync/protocol_test.go b/p2p/stream/protocols/sync/protocol_test.go index aff6691ec..0e40f6017 100644 --- a/p2p/stream/protocols/sync/protocol_test.go +++ b/p2p/stream/protocols/sync/protocol_test.go @@ -15,16 +15,18 @@ func TestProtocol_Match(t *testing.T) { targetID string exp bool }{ - {"harmony/sync/unitest/0/1.0.1", true}, + {"harmony/sync/unitest/0/1.0.1/1", true}, + {"harmony/sync/unitest/0/1.0.1/0", true}, {"h123456", false}, - {"harmony/sync/unitest/0/0.9.9", false}, - {"harmony/epoch/unitest/0/1.0.1", false}, - {"harmony/sync/mainnet/0/1.0.1", false}, - {"harmony/sync/unitest/1/1.0.1", false}, + {"harmony/sync/unitest/0/0.9.9/1", false}, + {"harmony/epoch/unitest/0/1.0.1/1", false}, + {"harmony/sync/mainnet/0/1.0.1/1", false}, + {"harmony/sync/unitest/1/1.0.1/1", false}, } for i, test := range tests { p := &Protocol{ + beaconNode: true, config: Config{ Network: "unitest", ShardID: 0, diff --git a/p2p/stream/types/interface.go b/p2p/stream/types/interface.go index 424382cc8..d7b60f78c 100644 --- a/p2p/stream/types/interface.go +++ b/p2p/stream/types/interface.go @@ -13,6 +13,8 @@ type Protocol interface { Specifier() string Version() *version.Version ProtoID() ProtoID + // ShardProtoID() ProtoID + IsBeaconNode() bool Match(string) bool HandleStream(st libp2p_network.Stream) } diff --git a/p2p/stream/types/stream.go b/p2p/stream/types/stream.go index 3abdf4f52..18b47f615 100644 --- a/p2p/stream/types/stream.go +++ b/p2p/stream/types/stream.go @@ -21,6 +21,9 @@ type Stream interface { ReadBytes() ([]byte, error) Close() error CloseOnExit() error + FailedTimes() int + AddFailedTimes() + ResetFailedTimes() } // BaseStream is the wrapper around @@ -34,14 +37,17 @@ type BaseStream struct { spec ProtoSpec specErr error specOnce sync.Once + + failedTimes int } // NewBaseStream creates BaseStream as the wrapper of libp2p Stream func NewBaseStream(st libp2p_network.Stream) *BaseStream { reader := bufio.NewReader(st) return &BaseStream{ - raw: st, - reader: reader, + raw: st, + reader: reader, + failedTimes: 0, } } @@ -72,6 +78,18 @@ func (st *BaseStream) Close() error { return st.raw.Reset() } +func (st *BaseStream) FailedTimes() int { + return st.failedTimes +} + +func (st *BaseStream) AddFailedTimes() { + st.failedTimes++ +} + +func (st *BaseStream) ResetFailedTimes() { + st.failedTimes = 0 +} + const ( maxMsgBytes = 20 * 1024 * 1024 // 20MB sizeBytes = 4 // uint32 diff --git a/p2p/stream/types/utils.go b/p2p/stream/types/utils.go index d096115fb..c27d95d60 100644 --- a/p2p/stream/types/utils.go +++ b/p2p/stream/types/utils.go @@ -11,7 +11,7 @@ import ( nodeconfig "github.com/harmony-one/harmony/internal/configs/node" "github.com/hashicorp/go-version" - libp2p_proto "github.com/libp2p/go-libp2p/core/protocol" + libp2p_proto "github.com/libp2p/go-libp2p-core/protocol" "github.com/pkg/errors" ) @@ -20,35 +20,41 @@ const ( ProtoIDCommonPrefix = "harmony" // ProtoIDFormat is the format of stream protocol ID - ProtoIDFormat = "%s/%s/%s/%d/%s" + ProtoIDFormat = "%s/%s/%s/%d/%s/%d" // protoIDNumElem is the number of elements of the ProtoID. See comments in ProtoID - protoIDNumElem = 5 + protoIDNumElem = 6 ) // ProtoID is the protocol id for streaming, an alias of libp2p stream protocol ID。 // The stream protocol ID is composed of following components: +// ex: harmony/sync/partner/0/1.0.0/1 // 1. Service - Currently, only sync service is supported. // 2. NetworkType - mainnet, testnet, stn, e.t.c. // 3. ShardID - shard ID of the current protocol. // 4. Version - Stream protocol version for backward compatibility. +// 5. BeaconNode - whether stream is from a beacon chain node or shard chain node type ProtoID libp2p_proto.ID // ProtoSpec is the un-serialized stream proto id specification // TODO: move this to service wise module since different protocol might have different -// -// protoID information +// protoID information type ProtoSpec struct { Service string NetworkType nodeconfig.NetworkType ShardID nodeconfig.ShardID Version *version.Version + BeaconNode bool } // ToProtoID convert a ProtoSpec to ProtoID. func (spec ProtoSpec) ToProtoID() ProtoID { + var versionStr string + if spec.Version != nil { + versionStr = spec.Version.String() + } s := fmt.Sprintf(ProtoIDFormat, ProtoIDCommonPrefix, spec.Service, - spec.NetworkType, spec.ShardID, spec.Version.String()) + spec.NetworkType, spec.ShardID, versionStr, bool2int(spec.BeaconNode)) return ProtoID(s) } @@ -59,11 +65,12 @@ func ProtoIDToProtoSpec(id ProtoID) (ProtoSpec, error) { return ProtoSpec{}, errors.New("unexpected protocol size") } var ( - prefix = comps[0] - service = comps[1] - networkType = comps[2] - shardIDStr = comps[3] - versionStr = comps[4] + prefix = comps[0] + service = comps[1] + networkType = comps[2] + shardIDStr = comps[3] + versionStr = comps[4] + beaconnodeStr = comps[5] ) shardID, err := strconv.Atoi(shardIDStr) if err != nil { @@ -76,11 +83,16 @@ func ProtoIDToProtoSpec(id ProtoID) (ProtoSpec, error) { if err != nil { return ProtoSpec{}, errors.Wrap(err, "unexpected version string") } + isBeaconNode, err := strconv.Atoi(beaconnodeStr) + if err != nil { + return ProtoSpec{}, errors.Wrap(err, "invalid beacon node flag") + } return ProtoSpec{ Service: service, NetworkType: nodeconfig.NetworkType(networkType), ShardID: nodeconfig.ShardID(uint32(shardID)), Version: version, + BeaconNode: int2bool(isBeaconNode), }, nil } @@ -90,3 +102,14 @@ func GenReqID() uint64 { rand.Read(rnd[:]) return binary.BigEndian.Uint64(rnd[:]) } + +func bool2int(b bool) int { + if b { + return 1 + } + return 0 +} + +func int2bool(i int) bool { + return i > 0 +} diff --git a/rosetta/infra/harmony-mainnet.conf b/rosetta/infra/harmony-mainnet.conf index 534b80887..8d51609cb 100644 --- a/rosetta/infra/harmony-mainnet.conf +++ b/rosetta/infra/harmony-mainnet.conf @@ -1,4 +1,4 @@ -Version = "2.5.9" +Version = "2.5.13" [BLSKeys] KMSConfigFile = "" @@ -35,14 +35,18 @@ Version = "2.5.9" RunElasticMode = false ShardID = 0 TraceEnable = false + TriesInMemory = 128 [HTTP] AuthPort = 9501 Enabled = true IP = "0.0.0.0" + IdleTimeout = "120s" Port = 9500 + ReadTimeout = "30s" RosettaEnabled = true RosettaPort = 9700 + WriteTimeout = "30s" [Log] Console = false @@ -61,6 +65,8 @@ Version = "2.5.9" NetworkType = "mainnet" [P2P] + ConnManagerHighWatermark = 192 + ConnManagerLowWatermark = 160 DisablePrivateIPScan = false DiscConcurrency = 0 IP = "0.0.0.0" @@ -68,6 +74,7 @@ Version = "2.5.9" MaxConnsPerIP = 10 MaxPeers = 0 Port = 9000 + WaitForEachPeerToConnect = false [Pprof] Enabled = false @@ -80,6 +87,7 @@ Version = "2.5.9" [RPCOpt] DebugEnabled = false EthRPCsEnabled = true + EvmCallTimeout = "5s" LegacyRPCsEnabled = true RateLimterEnabled = true RequestsPerSecond = 1000 @@ -100,10 +108,23 @@ Version = "2.5.9" DiscHighCap = 128 DiscSoftLowCap = 8 Downloader = false - StagedSync = false Enabled = false InitStreams = 8 + MaxAdvertiseWaitTime = 30 MinPeers = 5 + StagedSync = false + + [Sync.StagedSyncCfg] + DoubleCheckBlockHashes = false + InsertChainBatchSize = 0 + LogProgress = false + MaxBackgroundBlocks = 0 + MaxBlocksPerSyncCycle = 0 + MaxMemSyncCycleSize = 0 + TurboMode = false + UseMemDB = false + VerifyAllSig = false + VerifyHeaderBatchSize = 0 [TxPool] AccountSlots = 16 diff --git a/rosetta/infra/harmony-pstn.conf b/rosetta/infra/harmony-pstn.conf index ed4c116c6..1bb865c1a 100644 --- a/rosetta/infra/harmony-pstn.conf +++ b/rosetta/infra/harmony-pstn.conf @@ -1,4 +1,4 @@ -Version = "2.5.9" +Version = "2.5.13" [BLSKeys] KMSConfigFile = "" @@ -35,14 +35,18 @@ Version = "2.5.9" RunElasticMode = false ShardID = 0 TraceEnable = false + TriesInMemory = 128 [HTTP] AuthPort = 9501 Enabled = true IP = "0.0.0.0" + IdleTimeout = "120s" Port = 9500 + ReadTimeout = "30s" RosettaEnabled = true RosettaPort = 9700 + WriteTimeout = "30s" [Log] Console = false @@ -61,6 +65,8 @@ Version = "2.5.9" NetworkType = "partner" [P2P] + ConnManagerHighWatermark = 192 + ConnManagerLowWatermark = 160 DisablePrivateIPScan = false DiscConcurrency = 0 IP = "0.0.0.0" @@ -68,6 +74,7 @@ Version = "2.5.9" MaxConnsPerIP = 10 MaxPeers = 0 Port = 9000 + WaitForEachPeerToConnect = false [Pprof] Enabled = false @@ -80,6 +87,7 @@ Version = "2.5.9" [RPCOpt] DebugEnabled = false EthRPCsEnabled = true + EvmCallTimeout = "5s" LegacyRPCsEnabled = true RateLimterEnabled = true RequestsPerSecond = 1000 @@ -100,10 +108,23 @@ Version = "2.5.9" DiscHighCap = 128 DiscSoftLowCap = 8 Downloader = false - StagedSync = false Enabled = false InitStreams = 8 + MaxAdvertiseWaitTime = 30 MinPeers = 2 + StagedSync = false + + [Sync.StagedSyncCfg] + DoubleCheckBlockHashes = false + InsertChainBatchSize = 0 + LogProgress = false + MaxBackgroundBlocks = 0 + MaxBlocksPerSyncCycle = 0 + MaxMemSyncCycleSize = 0 + TurboMode = false + UseMemDB = false + VerifyAllSig = false + VerifyHeaderBatchSize = 0 [TxPool] AccountSlots = 16 diff --git a/rosetta/rosetta.go b/rosetta/rosetta.go index 860c2d0d6..a05630057 100644 --- a/rosetta/rosetta.go +++ b/rosetta/rosetta.go @@ -84,7 +84,11 @@ func getRouter(asserter *asserter.Asserter, hmy *hmy.Harmony, limiterEnable bool server.NewMempoolAPIController(services.NewMempoolAPI(hmy), asserter), server.NewNetworkAPIController(services.NewNetworkAPI(hmy), asserter), server.NewConstructionAPIController(services.NewConstructionAPI(hmy), asserter), - server.NewCallAPIController(services.NewCallAPIService(hmy, limiterEnable, rateLimit), asserter), + server.NewCallAPIController( + services.NewCallAPIService(hmy, limiterEnable, rateLimit, + hmy.NodeAPI.GetConfig().NodeConfig.RPCServer.EvmCallTimeout), + asserter, + ), server.NewEventsAPIController(services.NewEventAPI(hmy), asserter), server.NewSearchAPIController(services.NewSearchAPI(hmy), asserter), ) diff --git a/rosetta/services/call_service.go b/rosetta/services/call_service.go index 46f528d7b..9d26bab28 100644 --- a/rosetta/services/call_service.go +++ b/rosetta/services/call_service.go @@ -3,6 +3,7 @@ package services import ( "context" "encoding/json" + "time" "github.com/coinbase/rosetta-sdk-go/server" "github.com/coinbase/rosetta-sdk-go/types" @@ -82,10 +83,15 @@ func (c *CallAPIService) Call( } -func NewCallAPIService(hmy *hmy.Harmony, limiterEnable bool, rateLimit int) server.CallAPIServicer { +func NewCallAPIService( + hmy *hmy.Harmony, + limiterEnable bool, + rateLimit int, + evmCallTimeout time.Duration, +) server.CallAPIServicer { return &CallAPIService{ hmy: hmy, - publicContractAPI: rpc2.NewPublicContractAPI(hmy, rpc2.V2, limiterEnable, rateLimit), + publicContractAPI: rpc2.NewPublicContractAPI(hmy, rpc2.V2, limiterEnable, rateLimit, evmCallTimeout), publicStakingAPI: rpc2.NewPublicStakingAPI(hmy, rpc2.V2), publicBlockChainAPI: rpc2.NewPublicBlockchainAPI(hmy, rpc2.V2, limiterEnable, rateLimit), } diff --git a/rosetta/services/construction.go b/rosetta/services/construction.go index 3442d4c62..f2bd6e6c9 100644 --- a/rosetta/services/construction.go +++ b/rosetta/services/construction.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "math/big" + "time" "github.com/coinbase/rosetta-sdk-go/server" "github.com/coinbase/rosetta-sdk-go/types" @@ -24,17 +25,19 @@ const ( // ConstructAPI implements the server.ConstructAPIServicer interface. type ConstructAPI struct { - hmy *hmy.Harmony - signer hmyTypes.Signer - stakingSigner stakingTypes.Signer + hmy *hmy.Harmony + signer hmyTypes.Signer + stakingSigner stakingTypes.Signer + evmCallTimeout time.Duration } // NewConstructionAPI creates a new instance of a ConstructAPI. func NewConstructionAPI(hmy *hmy.Harmony) server.ConstructionAPIServicer { return &ConstructAPI{ - hmy: hmy, - signer: hmyTypes.NewEIP155Signer(new(big.Int).SetUint64(hmy.ChainID)), - stakingSigner: stakingTypes.NewEIP155Signer(new(big.Int).SetUint64(hmy.ChainID)), + hmy: hmy, + signer: hmyTypes.NewEIP155Signer(new(big.Int).SetUint64(hmy.ChainID)), + stakingSigner: stakingTypes.NewEIP155Signer(new(big.Int).SetUint64(hmy.ChainID)), + evmCallTimeout: hmy.NodeAPI.GetConfig().NodeConfig.RPCServer.EvmCallTimeout, } } diff --git a/rosetta/services/construction_check.go b/rosetta/services/construction_check.go index 1c3e1db69..c842770ab 100644 --- a/rosetta/services/construction_check.go +++ b/rosetta/services/construction_check.go @@ -270,7 +270,7 @@ func (s *ConstructAPI) ConstructionMetadata( callArgs.To = &contractAddress } evmExe, err := rpc.DoEVMCall( - ctx, s.hmy, callArgs, latest, rpc.CallTimeout, + ctx, s.hmy, callArgs, latest, s.evmCallTimeout, ) if err != nil { return nil, common.NewError(common.CatchAllError, map[string]interface{}{ diff --git a/rpc/blockchain.go b/rpc/blockchain.go index 6f3e2293f..46e495944 100644 --- a/rpc/blockchain.go +++ b/rpc/blockchain.go @@ -2,13 +2,12 @@ package rpc import ( "context" + "encoding/hex" "fmt" "math/big" "reflect" "time" - "encoding/hex" - "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/crypto" diff --git a/rpc/contract.go b/rpc/contract.go index 337bea7cd..abcb4f941 100644 --- a/rpc/contract.go +++ b/rpc/contract.go @@ -31,11 +31,18 @@ type PublicContractService struct { hmy *hmy.Harmony version Version // TEMP SOLUTION to rpc node spamming issue - limiterCall *rate.Limiter + limiterCall *rate.Limiter + evmCallTimeout time.Duration } // NewPublicContractAPI creates a new API for the RPC interface -func NewPublicContractAPI(hmy *hmy.Harmony, version Version, limiterEnable bool, limit int) rpc.API { +func NewPublicContractAPI( + hmy *hmy.Harmony, + version Version, + limiterEnable bool, + limit int, + evmCallTimeout time.Duration, +) rpc.API { var limiter *rate.Limiter if limiterEnable { limiter = rate.NewLimiter(rate.Limit(limit), limit) @@ -44,8 +51,13 @@ func NewPublicContractAPI(hmy *hmy.Harmony, version Version, limiterEnable bool, return rpc.API{ Namespace: version.Namespace(), Version: APIVersion, - Service: &PublicContractService{hmy, version, limiter}, - Public: true, + Service: &PublicContractService{ + hmy: hmy, + version: version, + limiterCall: limiter, + evmCallTimeout: evmCallTimeout, + }, + Public: true, } } @@ -80,7 +92,7 @@ func (s *PublicContractService) Call( } // Execute call - result, err := DoEVMCall(ctx, s.hmy, args, blockNrOrHash, CallTimeout) + result, err := DoEVMCall(ctx, s.hmy, args, blockNrOrHash, s.evmCallTimeout) if err != nil { return nil, err } diff --git a/rpc/rpc.go b/rpc/rpc.go index 2c94f0a5e..a8f1e121a 100644 --- a/rpc/rpc.go +++ b/rpc/rpc.go @@ -4,7 +4,6 @@ import ( "fmt" "net" "strings" - "time" "github.com/harmony-one/harmony/eth/rpc" "github.com/harmony-one/harmony/hmy" @@ -28,8 +27,6 @@ const ( const ( // APIVersion used for DApp's, bumped after RPC refactor (7/2020) APIVersion = "1.1" - // CallTimeout is the timeout given to all contract calls - CallTimeout = 5 * time.Second // LogTag is the tag found in the log for all RPC logs LogTag = "[RPC]" // HTTPPortOffset .. @@ -58,7 +55,6 @@ var ( wsEndpoint = "" wsAuthEndpoint = "" httpVirtualHosts = []string{"*"} - httpTimeouts = rpc.DefaultHTTPTimeouts httpOrigins = []string{"*"} wsOrigins = []string{"*"} ) @@ -86,13 +82,18 @@ func StartServers(hmy *hmy.Harmony, apis []rpc.API, config nodeconfig.RPCServerC rmf.ExposeAll() } if config.HTTPEnabled { + timeouts := rpc.HTTPTimeouts{ + ReadTimeout: config.HTTPTimeoutRead, + WriteTimeout: config.HTTPTimeoutWrite, + IdleTimeout: config.HTTPTimeoutIdle, + } httpEndpoint = fmt.Sprintf("%v:%v", config.HTTPIp, config.HTTPPort) - if err := startHTTP(apis, &rmf); err != nil { + if err := startHTTP(apis, &rmf, timeouts); err != nil { return err } httpAuthEndpoint = fmt.Sprintf("%v:%v", config.HTTPIp, config.HTTPAuthPort) - if err := startAuthHTTP(authApis, &rmf); err != nil { + if err := startAuthHTTP(authApis, &rmf, timeouts); err != nil { return err } } @@ -158,8 +159,8 @@ func getAPIs(hmy *hmy.Harmony, config nodeconfig.RPCServerConfig) []rpc.API { NewPublicHarmonyAPI(hmy, V2), NewPublicBlockchainAPI(hmy, V1, config.RateLimiterEnabled, config.RequestsPerSecond), NewPublicBlockchainAPI(hmy, V2, config.RateLimiterEnabled, config.RequestsPerSecond), - NewPublicContractAPI(hmy, V1, config.RateLimiterEnabled, config.RequestsPerSecond), - NewPublicContractAPI(hmy, V2, config.RateLimiterEnabled, config.RequestsPerSecond), + NewPublicContractAPI(hmy, V1, config.RateLimiterEnabled, config.RequestsPerSecond, config.EvmCallTimeout), + NewPublicContractAPI(hmy, V2, config.RateLimiterEnabled, config.RequestsPerSecond, config.EvmCallTimeout), NewPublicTransactionAPI(hmy, V1), NewPublicTransactionAPI(hmy, V2), NewPublicPoolAPI(hmy, V1, config.RateLimiterEnabled, config.RequestsPerSecond), @@ -185,7 +186,7 @@ func getAPIs(hmy *hmy.Harmony, config nodeconfig.RPCServerConfig) []rpc.API { publicAPIs = append(publicAPIs, NewPublicHarmonyAPI(hmy, Eth), NewPublicBlockchainAPI(hmy, Eth, config.RateLimiterEnabled, config.RequestsPerSecond), - NewPublicContractAPI(hmy, Eth, config.RateLimiterEnabled, config.RequestsPerSecond), + NewPublicContractAPI(hmy, Eth, config.RateLimiterEnabled, config.RequestsPerSecond, config.EvmCallTimeout), NewPublicTransactionAPI(hmy, Eth), NewPublicPoolAPI(hmy, Eth, config.RateLimiterEnabled, config.RequestsPerSecond), eth.NewPublicEthService(hmy, "eth"), @@ -210,7 +211,7 @@ func getAPIs(hmy *hmy.Harmony, config nodeconfig.RPCServerConfig) []rpc.API { return publicAPIs } -func startHTTP(apis []rpc.API, rmf *rpc.RpcMethodFilter) (err error) { +func startHTTP(apis []rpc.API, rmf *rpc.RpcMethodFilter, httpTimeouts rpc.HTTPTimeouts) (err error) { httpListener, httpHandler, err = rpc.StartHTTPEndpoint( httpEndpoint, apis, HTTPModules, rmf, httpOrigins, httpVirtualHosts, httpTimeouts, ) @@ -227,7 +228,7 @@ func startHTTP(apis []rpc.API, rmf *rpc.RpcMethodFilter) (err error) { return nil } -func startAuthHTTP(apis []rpc.API, rmf *rpc.RpcMethodFilter) (err error) { +func startAuthHTTP(apis []rpc.API, rmf *rpc.RpcMethodFilter, httpTimeouts rpc.HTTPTimeouts) (err error) { httpListener, httpHandler, err = rpc.StartHTTPEndpoint( httpAuthEndpoint, apis, HTTPModules, rmf, httpOrigins, httpVirtualHosts, httpTimeouts, ) diff --git a/scripts/go_executable_build.sh b/scripts/go_executable_build.sh index 2c1cc99f0..6331aa522 100755 --- a/scripts/go_executable_build.sh +++ b/scripts/go_executable_build.sh @@ -21,7 +21,7 @@ VERBOSE= if [ "$(uname -s)" == "Darwin" ]; then GO_GCFLAGS="" else - GO_GCFLAGS="all=-c 2" + GO_GCFLAGS="" fi DEBUG=false STATIC=true diff --git a/test/build-localnet-validator.sh b/test/build-localnet-validator.sh new file mode 100644 index 000000000..bffc7b94a --- /dev/null +++ b/test/build-localnet-validator.sh @@ -0,0 +1,70 @@ +#! /bin/bash + +echo "Make sure the validator account are imported" +hmy keys import-ks .hmy/extkeystore/one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t.key 2> /dev/null +hmy keys import-ks .hmy/extkeystore/one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x.key 2> /dev/null +hmy keys import-ks .hmy/extkeystore/one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj.key 2> /dev/null +hmy keys import-ks .hmy/extkeystore/one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf.key 2> /dev/null +hmy keys import-ks .hmy/extkeystore/one19zzwsxr0uf2fe34y8qkadek2v0eh6h5mg2deg6.key 2> /dev/null +hmy keys import-ks .hmy/extkeystore/one1lctumupg2y009pjmnhnmn4nqjk0zf0dspjanf7.key 2> /dev/null + +echo "Let's fund all the validator account" +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t --from-shard 0 --to-shard 0 --amount 110000 +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x --from-shard 0 --to-shard 0 --amount 110000 +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj --from-shard 0 --to-shard 0 --amount 110000 +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf --from-shard 0 --to-shard 0 --amount 110000 +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one19zzwsxr0uf2fe34y8qkadek2v0eh6h5mg2deg6 --from-shard 0 --to-shard 0 --amount 110000 +hmy --node=http://127.0.0.1:9500 transfer --from one1zksj3evekayy90xt4psrz8h6j2v3hla4qwz4ur --to one1lctumupg2y009pjmnhnmn4nqjk0zf0dspjanf7 --from-shard 0 --to-shard 0 --amount 110000 + + +#wait for epoch 2 +epoch=$(hmy blockchain latest-headers --node="http://localhost:9500" | jq -r '.["result"]["beacon-chain-header"]["epoch"]') +while (( epoch < 2 )); do + echo "Not yet on epoch 2 .. waiting 30s" + epoch=$(hmy blockchain latest-headers --node="http://localhost:9500" | jq -r '.["result"]["beacon-chain-header"]["epoch"]') + sleep 30 +done + +echo "Now in epoch 2, we'll create the external validators" + +hmy --node="http://localhost:9500" staking create-validator \ + --validator-addr one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t --amount 10000 \ + --bls-pubkeys 4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c,7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c \ + --name "s0-localnet-validator1" --identity "validator1" --details "validator1" \ + --security-contact "localnet" --website "localnet.one" \ + --max-change-rate 0.1 --max-rate 0.1 --rate 0.1 \ + --max-total-delegation 100000000 --min-self-delegation 10000 --bls-pubkeys-dir .hmy/extbls/ + +hmy --node="http://localhost:9500" staking create-validator \ + --validator-addr one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x --amount 10000 \ + --bls-pubkeys b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c \ + --name "s0-localnet-validator2" --identity "validator2" --details "validator2" \ + --security-contact "localnet" --website "localnet.one" \ + --max-change-rate 0.1 --max-rate 0.1 --rate 0.1 \ + --max-total-delegation 100000000 --min-self-delegation 10000 --bls-pubkeys-dir .hmy/extbls/ + +hmy --node="http://localhost:9500" staking create-validator \ + --validator-addr one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj --amount 10000 \ + --bls-pubkeys 5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97,81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285 \ + --name "s1-localnet-validator3" --identity "validator3" --details "validator3" \ + --security-contact "localnet" --website "localnet.one" \ + --max-change-rate 0.1 --max-rate 0.1 --rate 0.1 \ + --max-total-delegation 100000000 --min-self-delegation 10000 --bls-pubkeys-dir .hmy/extbls/ + +hmy --node="http://localhost:9500" staking create-validator \ + --validator-addr one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf --amount 10000 \ + --bls-pubkeys 89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915 \ + --name "s1-localnet-validator4" --identity "validator4" --details "validator4" \ + --security-contact "localnet" --website "localnet.one" \ + --max-change-rate 0.1 --max-rate 0.1 --rate 0.1 \ + --max-total-delegation 100000000 --min-self-delegation 10000 --bls-pubkeys-dir .hmy/extbls/ + + +echo "validator created" +echo '''check their information +hmy blockchain validator information one17ughrllgnzx9sfa46p568k8rdmtz7qj85slc6t --node="http://localhost:9500" +hmy blockchain validator information one1auqndgthqu5lznsn7tuma8s5333cq0y07cwc6x --node="http://localhost:9500" +hmy blockchain validator information one19aw2wcr5y4lxeuwt0ajgt5aw3a3qkjdgg67ygj --node="http://localhost:9500" +hmy blockchain validator information one1eenp9ujcrmyaq22ef6jrpry2k97tjz4xs6ppcf --node="http://localhost:9500" +''' + diff --git a/test/configs/local-resharding-with-external.txt b/test/configs/local-resharding-with-external.txt new file mode 100644 index 000000000..318a08b9b --- /dev/null +++ b/test/configs/local-resharding-with-external.txt @@ -0,0 +1,37 @@ +# shard 0 +# internal node +127.0.0.1 9000 validator .hmy/65f55eb3052f9e9f632b2923be594ba77c55543f5c58ee1454b9cfd658d25e06373b0f7d42a19c84768139ea294f6204.key +127.0.0.1 9002 validator .hmy/02c8ff0b88f313717bc3a627d2f8bb172ba3ad3bb9ba3ecb8eed4b7c878653d3d4faf769876c528b73f343967f74a917.key +127.0.0.1 9004 validator .hmy/e751ec995defe4931273aaebcb2cd14bf37e629c554a57d3f334c37881a34a6188a93e76113c55ef3481da23b7d7ab09.key +127.0.0.1 9006 validator .hmy/2d61379e44a772e5757e27ee2b3874254f56073e6bd226eb8b160371cc3c18b8c4977bd3dcb71fd57dc62bf0e143fd08.key +127.0.0.1 9008 validator .hmy/86dc2fdc2ceec18f6923b99fd86a68405c132e1005cf1df72dca75db0adfaeb53d201d66af37916d61f079f34f21fb96.key +127.0.0.1 9010 validator .hmy/95117937cd8c09acd2dfae847d74041a67834ea88662a7cbed1e170350bc329e53db151e5a0ef3e712e35287ae954818.key +# external node +127.0.0.1 9014 external .hmy/extbls/4f41a37a3a8d0695dd6edcc58142c6b7d98e74da5c90e79b587b3b960b6a4f5e048e6d8b8a000d77a478d44cd640270c.key +127.0.0.1 9016 external .hmy/extbls/7dcc035a943e29e17959dabe636efad7303d2c6f273ace457ba9dcc2fd19d3f37e70ba1cd8d082cf8ff7be2f861db48c.key +127.0.0.1 9018 external .hmy/extbls/b0917378b179a519a5055259c4f8980cce37d58af300b00dd98b07076d3d9a3b16c4a55f84522f553872225a7b1efc0c.key +# fn node +127.0.0.1 9050 validator .hmy/52ecce5f64db21cbe374c9268188f5d2cdd5bec1a3112276a350349860e35fb81f8cfe447a311e0550d961cf25cb988d.key +127.0.0.1 9052 validator .hmy/678ec9670899bf6af85b877058bea4fc1301a5a3a376987e826e3ca150b80e3eaadffedad0fedfa111576fa76ded980c.key +#127.0.0.1 9054 validator .hmy/16513c487a6bb76f37219f3c2927a4f281f9dd3fd6ed2e3a64e500de6545cf391dd973cc228d24f9bd01efe94912e714.key +# explorer node +127.0.0.1 9098 explorer null 0 + +# shard 1 +# internal node +127.0.0.1 9100 validator .hmy/40379eed79ed82bebfb4310894fd33b6a3f8413a78dc4d43b98d0adc9ef69f3285df05eaab9f2ce5f7227f8cb920e809.key +127.0.0.1 9102 validator .hmy/ee2474f93cba9241562efc7475ac2721ab0899edf8f7f115a656c0c1f9ef8203add678064878d174bb478fa2e6630502.key +127.0.0.1 9104 validator .hmy/776f3b8704f4e1092a302a60e84f81e476c212d6f458092b696df420ea19ff84a6179e8e23d090b9297dc041600bc100.key +127.0.0.1 9106 validator .hmy/c4e4708b6cf2a2ceeb59981677e9821eebafc5cf483fb5364a28fa604cc0ce69beeed40f3f03815c9e196fdaec5f1097.key +127.0.0.1 9108 validator .hmy/49d15743b36334399f9985feb0753430a2b287b2d68b84495bbb15381854cbf01bca9d1d9f4c9c8f18509b2bfa6bd40f.key +127.0.0.1 9110 validator .hmy/68ae289d73332872ec8d04ac256ca0f5453c88ad392730c5741b6055bc3ec3d086ab03637713a29f459177aaa8340615.key +# external node +127.0.0.1 9114 external .hmy/extbls/5a18d4aa3e6aff4835f07588ae66be19684476d38799f63e54c6b5732fad1e86dce7458b1c295404fb54a0d61e50bb97.key +127.0.0.1 9116 external .hmy/extbls/81296eedba05047594385e3086e1dab52c9eb9e56f46d86f58447cccc20535d646120171961d74968d27a2ec0f8af285.key +127.0.0.1 9118 external .hmy/extbls/89eab762e7364d6cf89f7a6c54da794f74eba2e29147992ac66adcef0f0654ef8a727710ee55ad8b532da0dd87811915.key +# fn node +127.0.0.1 9150 validator .hmy/a547a9bf6fdde4f4934cde21473748861a3cc0fe8bbb5e57225a29f483b05b72531f002f8187675743d819c955a86100.key +127.0.0.1 9152 validator .hmy/63f479f249c59f0486fda8caa2ffb247209489dae009dfde6144ff38c370230963d360dffd318cfb26c213320e89a512.key +#127.0.0.1 9154 validator .hmy/576d3c48294e00d6be4a22b07b66a870ddee03052fe48a5abbd180222e5d5a1f8946a78d55b025de21635fd743bbad90.key +# explorer node +127.0.0.1 9096 explorer null 1 diff --git a/test/debug-external.sh b/test/debug-external.sh new file mode 100755 index 000000000..198b01b38 --- /dev/null +++ b/test/debug-external.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +./test/kill_node.sh +rm -rf tmp_log* +rm *.rlp +rm -rf .dht* +scripts/go_executable_build.sh -S || exit 1 # dynamic builds are faster for debug iteration... +./test/deploy.sh -B -D 600000 ./test/configs/local-resharding-with-external.txt diff --git a/test/deploy.sh b/test/deploy.sh index 6bbb12eb9..d310a4ed1 100755 --- a/test/deploy.sh +++ b/test/deploy.sh @@ -54,7 +54,7 @@ function setup() { function launch_bootnode() { echo "launching boot node ..." - ${DRYRUN} ${ROOT}/bin/bootnode -port 19876 -max_conn_per_ip 100 >"${log_folder}"/bootnode.log 2>&1 | tee -a "${LOG_FILE}" & + ${DRYRUN} ${ROOT}/bin/bootnode -port 19876 -max_conn_per_ip 100 -force_public true >"${log_folder}"/bootnode.log 2>&1 | tee -a "${LOG_FILE}" & sleep 1 BN_MA=$(grep "BN_MA" "${log_folder}"/bootnode.log | awk -F\= ' { print $2 } ') echo "bootnode launched." + " $BN_MA" @@ -83,8 +83,8 @@ function launch_localnet() { # Read config for i-th node form config file IFS=' ' read -r ip port mode bls_key shard node_config <<<"${line}" args=("${base_args[@]}" --ip "${ip}" --port "${port}" --key "/tmp/${ip}-${port}.key" --db_dir "${ROOT}/db-${ip}-${port}" "--broadcast_invalid_tx=false") - if [[ -z "$ip" || -z "$port" ]]; then - echo "skip empty node" + if [[ -z "$ip" || -z "$port" || "$ip" == "#" ]]; then + echo "skip empty line or node or comment" continue fi if [[ $EXPOSEAPIS == "true" ]]; then