Backward sync: use retry switching peer when fetching data from peers (#4656)

Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net>
pull/4664/head
Fabio Di Fabio 2 years ago committed by GitHub
parent d556668d99
commit bb4f3b3593
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 4
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/manager/task/GetBodiesFromPeerTask.java
  3. 112
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/manager/task/RetryingGetBlocksFromPeersTask.java
  4. 43
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/manager/task/RetryingGetHeadersEndingAtFromPeerByHashTask.java
  5. 7
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/BackwardSyncStep.java
  6. 18
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/BackwardsSyncAlgorithm.java
  7. 30
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/ForwardSyncStep.java
  8. 2
      ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/SyncStepStep.java
  9. 2
      ethereum/eth/src/test/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/BackwardSyncAlgSpec.java
  10. 8
      ethereum/eth/src/test/java/org/hyperledger/besu/ethereum/eth/sync/backwardsync/BackwardSyncStepTest.java

@ -12,6 +12,7 @@
- Added new RPC endpoints `debug_setHead` & `debug_replayBlock [4580](https://github.com/hyperledger/besu/pull/4580)
- Upgrade OpenTelemetry to version 1.19.0 [#3675](https://github.com/hyperledger/besu/pull/3675)
- Backward sync log UX improvements [#4655](https://github.com/hyperledger/besu/pull/4655)
- Backward sync: use retry switching peer when fetching data from peers [#4656](https://github.com/hyperledger/besu/pull/4656)
### Bug Fixes

@ -65,7 +65,7 @@ public class GetBodiesFromPeerTask extends AbstractPeerRequestTask<List<Block>>
headers.forEach(
(header) -> {
final BodyIdentifier bodyId = new BodyIdentifier(header);
bodyToHeaders.putIfAbsent(bodyId, new ArrayList<>());
bodyToHeaders.putIfAbsent(bodyId, new ArrayList<>(headers.size()));
bodyToHeaders.get(bodyId).add(header);
});
}
@ -112,7 +112,7 @@ public class GetBodiesFromPeerTask extends AbstractPeerRequestTask<List<Block>>
return Optional.empty();
}
final List<Block> blocks = new ArrayList<>();
final List<Block> blocks = new ArrayList<>(headers.size());
for (final BlockBody body : bodies) {
final List<BlockHeader> headers = bodyToHeaders.get(new BodyIdentifier(body));
if (headers == null) {

@ -0,0 +1,112 @@
/*
* Copyright contributors to Hyperledger Besu
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/
package org.hyperledger.besu.ethereum.eth.manager.task;
import static org.hyperledger.besu.util.Slf4jLambdaHelper.debugLambda;
import org.hyperledger.besu.ethereum.core.Block;
import org.hyperledger.besu.ethereum.core.BlockHeader;
import org.hyperledger.besu.ethereum.eth.manager.EthContext;
import org.hyperledger.besu.ethereum.eth.manager.EthPeer;
import org.hyperledger.besu.ethereum.eth.manager.exceptions.IncompleteResultsException;
import org.hyperledger.besu.ethereum.eth.manager.task.AbstractPeerTask.PeerTaskResult;
import org.hyperledger.besu.ethereum.mainnet.ProtocolSchedule;
import org.hyperledger.besu.plugin.services.MetricsSystem;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RetryingGetBlocksFromPeersTask
extends AbstractRetryingSwitchingPeerTask<PeerTaskResult<List<Block>>> {
private static final Logger LOG = LoggerFactory.getLogger(RetryingGetBlocksFromPeersTask.class);
private final ProtocolSchedule protocolSchedule;
private final List<BlockHeader> headers;
protected RetryingGetBlocksFromPeersTask(
final EthContext ethContext,
final ProtocolSchedule protocolSchedule,
final MetricsSystem metricsSystem,
final int maxRetries,
final List<BlockHeader> headers) {
super(ethContext, metricsSystem, Objects::isNull, maxRetries);
this.protocolSchedule = protocolSchedule;
this.headers = headers;
}
public static RetryingGetBlocksFromPeersTask forHeaders(
final ProtocolSchedule protocolSchedule,
final EthContext ethContext,
final MetricsSystem metricsSystem,
final int maxRetries,
final List<BlockHeader> headers) {
return new RetryingGetBlocksFromPeersTask(
ethContext, protocolSchedule, metricsSystem, maxRetries, headers);
}
@Override
protected CompletableFuture<PeerTaskResult<List<Block>>> executeTaskOnCurrentPeer(
final EthPeer currentPeer) {
final GetBodiesFromPeerTask getBodiesTask =
GetBodiesFromPeerTask.forHeaders(
protocolSchedule, getEthContext(), headers, getMetricsSystem());
getBodiesTask.assignPeer(currentPeer);
return executeSubTask(getBodiesTask::run)
.thenApply(
peerResult -> {
debugLambda(
LOG,
"Got {} blocks from peer {}, attempt {}",
peerResult.getResult()::size,
peerResult.getPeer()::toString,
this::getRetryCount);
if (peerResult.getResult().isEmpty()) {
currentPeer.recordUselessResponse("GetBodiesFromPeerTask");
throw new IncompleteResultsException(
"No blocks returned by peer " + currentPeer.getShortNodeId());
}
result.complete(peerResult);
return peerResult;
});
}
@Override
protected boolean isRetryableError(final Throwable error) {
return super.isRetryableError(error) || error instanceof IncompleteResultsException;
}
@Override
protected void handleTaskError(final Throwable error) {
if (getRetryCount() < getMaxRetries()) {
debugLambda(
LOG,
"Failed to get {} blocks from peer {}, attempt {}, retrying later",
headers::size,
this::getAssignedPeer,
this::getRetryCount);
} else {
LOG.debug("Failed to get {} blocks after {} retries", headers.size(), getRetryCount());
}
super.handleTaskError(error);
}
}

@ -21,17 +21,21 @@ import org.hyperledger.besu.datatypes.Hash;
import org.hyperledger.besu.ethereum.core.BlockHeader;
import org.hyperledger.besu.ethereum.eth.manager.EthContext;
import org.hyperledger.besu.ethereum.eth.manager.EthPeer;
import org.hyperledger.besu.ethereum.eth.manager.exceptions.IncompleteResultsException;
import org.hyperledger.besu.ethereum.mainnet.ProtocolSchedule;
import org.hyperledger.besu.plugin.services.MetricsSystem;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RetryingGetHeadersEndingAtFromPeerByHashTask
extends AbstractRetryingPeerTask<List<BlockHeader>> {
extends AbstractRetryingSwitchingPeerTask<List<BlockHeader>> {
private static final Logger LOG =
LoggerFactory.getLogger(RetryingGetHeadersEndingAtFromPeerByHashTask.class);
private final Hash referenceHash;
private final ProtocolSchedule protocolSchedule;
@ -43,8 +47,9 @@ public class RetryingGetHeadersEndingAtFromPeerByHashTask
final EthContext ethContext,
final Hash referenceHash,
final int count,
final MetricsSystem metricsSystem) {
super(ethContext, 4, List::isEmpty, metricsSystem);
final MetricsSystem metricsSystem,
final int maxRetries) {
super(ethContext, metricsSystem, List::isEmpty, maxRetries);
this.protocolSchedule = protocolSchedule;
this.count = count;
checkNotNull(referenceHash);
@ -56,23 +61,43 @@ public class RetryingGetHeadersEndingAtFromPeerByHashTask
final EthContext ethContext,
final Hash referenceHash,
final int count,
final MetricsSystem metricsSystem) {
final MetricsSystem metricsSystem,
final int maxRetries) {
return new RetryingGetHeadersEndingAtFromPeerByHashTask(
protocolSchedule, ethContext, referenceHash, count, metricsSystem);
protocolSchedule, ethContext, referenceHash, count, metricsSystem, maxRetries);
}
@Override
protected CompletableFuture<List<BlockHeader>> executePeerTask(
final Optional<EthPeer> assignedPeer) {
protected CompletableFuture<List<BlockHeader>> executeTaskOnCurrentPeer(
final EthPeer currentPeer) {
final AbstractGetHeadersFromPeerTask task =
GetHeadersFromPeerByHashTask.endingAtHash(
protocolSchedule, getEthContext(), referenceHash, count, getMetricsSystem());
assignedPeer.ifPresent(task::assignPeer);
task.assignPeer(currentPeer);
return executeSubTask(task::run)
.thenApply(
peerResult -> {
LOG.debug(
"Get {} block headers by hash {} from peer {} has result {}",
count,
referenceHash,
currentPeer,
peerResult.getResult());
if (peerResult.getResult().isEmpty()) {
currentPeer.recordUselessResponse("GetHeadersFromPeerByHashTask");
throw new IncompleteResultsException(
"No block headers for hash "
+ referenceHash
+ " returned by peer "
+ currentPeer.getShortNodeId());
}
result.complete(peerResult.getResult());
return peerResult.getResult();
});
}
@Override
protected boolean isRetryableError(final Throwable error) {
return super.isRetryableError(error) || error instanceof IncompleteResultsException;
}
}

@ -69,17 +69,14 @@ public class BackwardSyncStep {
context.getEthContext(),
hash,
batchSize,
context.getMetricsSystem());
context.getMetricsSystem(),
context.getEthContext().getEthPeers().peerCount());
return context
.getEthContext()
.getScheduler()
.scheduleSyncWorkerTask(retryingGetHeadersEndingAtFromPeerByHashTask::run)
.thenApply(
blockHeaders -> {
if (blockHeaders.isEmpty()) {
throw new BackwardSyncException(
"Did not receive a headers for hash " + hash.toHexString(), true);
}
debugLambda(
LOG,
"Got headers {} -> {}",

@ -14,7 +14,6 @@
* SPDX-License-Identifier: Apache-2.0
*
*/
package org.hyperledger.besu.ethereum.eth.sync.backwardsync;
import static org.hyperledger.besu.util.Slf4jLambdaHelper.debugLambda;
@ -24,10 +23,12 @@ import org.hyperledger.besu.datatypes.Hash;
import org.hyperledger.besu.ethereum.chain.MutableBlockchain;
import org.hyperledger.besu.ethereum.core.Block;
import org.hyperledger.besu.ethereum.core.BlockHeader;
import org.hyperledger.besu.ethereum.eth.manager.task.WaitForPeersTask;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
@ -142,12 +143,17 @@ public class BackwardsSyncAlgorithm {
LOG.debug("Waiting for preconditions...");
final boolean await = latch.await(2, TimeUnit.MINUTES);
if (await) {
LOG.debug("Preconditions meet...");
LOG.debug("Preconditions meet, ensure at least one peer is connected");
waitForPeers(1).get();
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new BackwardSyncException("Wait for TTD preconditions interrupted");
throw new BackwardSyncException(
"Wait for TTD preconditions interrupted (" + e.getMessage() + ")");
} catch (ExecutionException e) {
throw new BackwardSyncException(
"Error while waiting for at least one connected peer (" + e.getMessage() + ")", true);
} finally {
context.getSyncState().unsubscribeTTDReached(idTTD);
context.getSyncState().unsubscribeInitialConditionReached(idIS);
@ -205,4 +211,10 @@ public class BackwardsSyncAlgorithm {
blockchain.setFinalized(newFinalized);
}
private CompletableFuture<Void> waitForPeers(final int count) {
final WaitForPeersTask waitForPeersTask =
WaitForPeersTask.create(context.getEthContext(), count, context.getMetricsSystem());
return waitForPeersTask.run();
}
}

@ -19,7 +19,7 @@ import static org.hyperledger.besu.util.Slf4jLambdaHelper.debugLambda;
import org.hyperledger.besu.ethereum.core.Block;
import org.hyperledger.besu.ethereum.core.BlockHeader;
import org.hyperledger.besu.ethereum.eth.manager.task.AbstractPeerTask;
import org.hyperledger.besu.ethereum.eth.manager.task.GetBodiesFromPeerTask;
import org.hyperledger.besu.ethereum.eth.manager.task.RetryingGetBlocksFromPeersTask;
import java.util.Comparator;
import java.util.List;
@ -59,18 +59,31 @@ public class ForwardSyncStep {
() -> blockHeaders.get(0).getNumber(),
() -> blockHeaders.get(blockHeaders.size() - 1).getNumber(),
() -> blockHeaders.get(0).getHash().toHexString());
return requestBodies(blockHeaders).thenApply(this::saveBlocks);
return requestBodies(blockHeaders)
.thenApply(this::saveBlocks)
.exceptionally(
throwable -> {
context.halveBatchSize();
debugLambda(
LOG,
"Getting {} blocks from peers failed with reason {}, reducing batch size to {}",
blockHeaders::size,
throwable::getMessage,
context::getBatchSize);
return null;
});
}
}
@VisibleForTesting
protected CompletableFuture<List<Block>> requestBodies(final List<BlockHeader> blockHeaders) {
final GetBodiesFromPeerTask getBodiesFromPeerTask =
GetBodiesFromPeerTask.forHeaders(
final RetryingGetBlocksFromPeersTask getBodiesFromPeerTask =
RetryingGetBlocksFromPeersTask.forHeaders(
context.getProtocolSchedule(),
context.getEthContext(),
blockHeaders,
context.getMetricsSystem());
context.getMetricsSystem(),
context.getEthContext().getEthPeers().peerCount(),
blockHeaders);
final CompletableFuture<AbstractPeerTask.PeerTaskResult<List<Block>>> run =
getBodiesFromPeerTask.run();
@ -112,7 +125,10 @@ public class ForwardSyncStep {
}
}
context.resetBatchSize();
if (blocks.size() == context.getBatchSize()) {
// reset the batch size only if we got a full batch
context.resetBatchSize();
}
return null;
}
}

@ -55,7 +55,7 @@ public class SyncStepStep {
context.getProtocolSchedule(),
context.getEthContext(),
context.getMetricsSystem(),
context.getEthContext().getEthPeers().getMaxPeers(),
context.getEthContext().getEthPeers().peerCount(),
Optional.of(targetHash),
UNUSED);
return context

@ -149,6 +149,7 @@ public class BackwardSyncAlgSpec {
when(context.getSyncState().subscribeTTDReached(any())).thenReturn(88L);
when(context.getSyncState().subscribeCompletionReached(any())).thenReturn(99L);
when(context.getEthContext().getEthPeers().peerCount()).thenReturn(1);
final CompletableFuture<Void> voidCompletableFuture = algorithm.waitForReady();
@ -175,6 +176,7 @@ public class BackwardSyncAlgSpec {
when(context.getSyncState().subscribeTTDReached(any())).thenReturn(88L);
when(context.getSyncState().subscribeCompletionReached(any())).thenReturn(99L);
when(context.getEthContext().getEthPeers().peerCount()).thenReturn(1);
final CompletableFuture<Void> voidCompletableFuture = algorithm.waitForReady();
Thread.sleep(50);

@ -33,6 +33,7 @@ import org.hyperledger.besu.ethereum.eth.manager.EthContext;
import org.hyperledger.besu.ethereum.eth.manager.EthProtocolManager;
import org.hyperledger.besu.ethereum.eth.manager.EthProtocolManagerTestUtil;
import org.hyperledger.besu.ethereum.eth.manager.RespondingEthPeer;
import org.hyperledger.besu.ethereum.eth.manager.exceptions.MaxRetriesReachedException;
import org.hyperledger.besu.ethereum.mainnet.MainnetBlockHeaderFunctions;
import org.hyperledger.besu.ethereum.mainnet.MainnetProtocolSchedule;
import org.hyperledger.besu.ethereum.mainnet.ProtocolSchedule;
@ -181,7 +182,7 @@ public class BackwardSyncStepTest {
}
@Test
public void shouldThrowWhenResponseIsEmptyWhenRequestingHeader() throws Exception {
public void shouldThrowWhenResponseIsEmptyWhenRequestingHeader() {
BackwardSyncStep step = new BackwardSyncStep(context, createBackwardChain(REMOTE_HEIGHT - 1));
final Block lookingForBlock = getBlockByNumber(REMOTE_HEIGHT - 2);
@ -191,10 +192,7 @@ public class BackwardSyncStepTest {
step.requestHeaders(lookingForBlock.getHeader().getHash());
peer.respondWhileOtherThreadsWork(responder, () -> !future.isDone());
assertThatThrownBy(future::get)
.getCause()
.isInstanceOf(BackwardSyncException.class)
.hasMessageContaining("Did not receive a headers for hash");
assertThatThrownBy(future::get).cause().isInstanceOf(MaxRetriesReachedException.class);
}
@Test

Loading…
Cancel
Save