Skip to content

Commit

Permalink
HBASE-23205 Correctly update the position of WALs currently being rep…
Browse files Browse the repository at this point in the history
…licated
  • Loading branch information
Jeongdae Kim committed Oct 23, 2019
1 parent ce65db3 commit b3e9a4e
Show file tree
Hide file tree
Showing 7 changed files with 548 additions and 289 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.hadoop.hbase.replication.regionserver;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.Service;
Expand Down Expand Up @@ -145,8 +146,6 @@ public enum WorkerState {
FINISHED // The worker is done processing a recovered queue
}

private AtomicLong totalBufferUsed;

/**
* Instantiation method used by region servers
*
Expand Down Expand Up @@ -192,7 +191,6 @@ public void init(final Configuration conf, final FileSystem fs,
defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
currentBandwidth = getCurrentBandwidth();
this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0);
this.totalBufferUsed = manager.getTotalBufferUsed();
LOG.info("peerClusterZnode=" + peerClusterZnode + ", ReplicationSource : " + peerId
+ ", currentBandwidth=" + this.currentBandwidth);
}
Expand Down Expand Up @@ -439,14 +437,22 @@ public String getPeerClusterId() {
}

@Override
@VisibleForTesting
public Path getCurrentPath() {
// only for testing
for (ReplicationSourceShipperThread worker : workerThreads.values()) {
if (worker.getCurrentPath() != null) return worker.getCurrentPath();
}
return null;
}

@VisibleForTesting
public long getLastLoggedPosition() {
for (ReplicationSourceShipperThread worker : workerThreads.values()) {
return worker.getLastLoggedPosition();
}
return 0;
}

private boolean isSourceActive() {
return !this.stopper.isStopped() && this.sourceRunning;
}
Expand Down Expand Up @@ -481,7 +487,7 @@ public String getStats() {
for (Map.Entry<String, ReplicationSourceShipperThread> entry : workerThreads.entrySet()) {
String walGroupId = entry.getKey();
ReplicationSourceShipperThread worker = entry.getValue();
long position = worker.getCurrentPosition();
long position = worker.getLastLoggedPosition();
Path currentPath = worker.getCurrentPath();
sb.append("walGroup [").append(walGroupId).append("]: ");
if (currentPath != null) {
Expand Down Expand Up @@ -535,7 +541,7 @@ public Map<String, ReplicationStatus> getWalGroupStatus() {
.withQueueSize(queueSize)
.withWalGroup(walGroupId)
.withCurrentPath(currentPath)
.withCurrentPosition(worker.getCurrentPosition())
.withCurrentPosition(worker.getLastLoggedPosition())
.withFileSize(fileSize)
.withAgeOfLastShippedOp(ageOfLastShippedOp)
.withReplicationDelay(replicationDelay);
Expand Down Expand Up @@ -599,22 +605,19 @@ public void run() {
try {
WALEntryBatch entryBatch = entryReader.take();
shipEdits(entryBatch);
releaseBufferQuota((int) entryBatch.getHeapSize());
if (replicationQueueInfo.isQueueRecovered() && entryBatch.getWalEntries().isEmpty()
&& entryBatch.getLastSeqIds().isEmpty()) {
LOG.debug("Finished recovering queue for group " + walGroupId + " of peer "
+ peerClusterZnode);
manager.releaseBufferQuota(entryBatch.getHeapSizeExcludeBulkLoad());
if (!entryBatch.hasMoreEntries()) {
LOG.debug("Finished recovering queue for group " + walGroupId + " of peer " + peerClusterZnode);
metrics.incrCompletedRecoveryQueue();
setWorkerState(WorkerState.FINISHED);
continue;
}
} catch (InterruptedException e) {
LOG.trace("Interrupted while waiting for next replication entry batch", e);
Thread.currentThread().interrupt();
}
}

if (replicationQueueInfo.isQueueRecovered() && getWorkerState() == WorkerState.FINISHED) {
if (getWorkerState() == WorkerState.FINISHED) {
// use synchronize to make sure one last thread will clean the queue
synchronized (this) {
Threads.sleep(100);// wait a short while for other worker thread to fully exit
Expand Down Expand Up @@ -676,18 +679,6 @@ private void checkBandwidthChangeAndResetThrottler() {
}
}

/**
* get batchEntry size excludes bulk load file sizes.
* Uses ReplicationSourceWALReader's static method.
*/
private int getBatchEntrySizeExcludeBulkLoad(WALEntryBatch entryBatch) {
int totalSize = 0;
for(Entry entry : entryBatch.getWalEntries()) {
totalSize += entryReader.getEntrySizeExcludeBulkLoad(entry);
}
return totalSize;
}

/**
* Do the shipping logic
*/
Expand All @@ -697,16 +688,14 @@ protected void shipEdits(WALEntryBatch entryBatch) {
currentPath = entryBatch.getLastWalPath();
int sleepMultiplier = 0;
if (entries.isEmpty()) {
if (lastLoggedPosition != lastReadPosition) {
updateLogPosition(lastReadPosition);
// if there was nothing to ship and it's not an error
// set "ageOfLastShippedOp" to <now> to indicate that we're current
metrics.setAgeOfLastShippedOp(EnvironmentEdgeManager.currentTime(), walGroupId);
}
updateLogPosition(lastReadPosition);
// if there was nothing to ship and it's not an error
// set "ageOfLastShippedOp" to <now> to indicate that we're current
metrics.setAgeOfLastShippedOp(EnvironmentEdgeManager.currentTime(), walGroupId);
return;
}
int currentSize = (int) entryBatch.getHeapSize();
int sizeExcludeBulkLoad = getBatchEntrySizeExcludeBulkLoad(entryBatch);
int sizeExcludeBulkLoad = (int) entryBatch.getHeapSizeExcludeBulkLoad();
while (isWorkerActive()) {
try {
checkBandwidthChangeAndResetThrottler();
Expand Down Expand Up @@ -787,7 +776,6 @@ protected void shipEdits(WALEntryBatch entryBatch) {
}

private void updateLogPosition(long lastReadPosition) {
manager.setPendingShipment(false);
manager.logPositionAndCleanOldLogs(currentPath, peerClusterZnode, lastReadPosition,
this.replicationQueueInfo.isQueueRecovered(), false);
lastLoggedPosition = lastReadPosition;
Expand Down Expand Up @@ -938,11 +926,11 @@ private Path getReplSyncUpPath(Path path) throws IOException {
}

public Path getCurrentPath() {
return this.entryReader.getCurrentPath();
return currentPath;
}

public long getCurrentPosition() {
return this.lastLoggedPosition;
public long getLastLoggedPosition() {
return lastLoggedPosition;
}

private boolean isWorkerActive() {
Expand Down Expand Up @@ -983,9 +971,5 @@ public void setWorkerState(WorkerState state) {
public WorkerState getWorkerState() {
return state;
}

private void releaseBufferQuota(int size) {
totalBufferUsed.addAndGet(-size);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,9 @@ public class ReplicationSourceManager implements ReplicationListener {
private final Random rand;
private final boolean replicationForBulkLoadDataEnabled;

private long totalBufferQuota;
private AtomicLong totalBufferUsed = new AtomicLong();

private boolean pendingShipment;

/**
* Creates a replication manager and sets the watch on all the other registered region servers
* @param replicationQueues the interface for manipulating replication queues
Expand Down Expand Up @@ -177,6 +176,8 @@ public ReplicationSourceManager(final ReplicationQueues replicationQueues,
replicationForBulkLoadDataEnabled =
conf.getBoolean(HConstants.REPLICATION_BULKLOAD_ENABLE_KEY,
HConstants.REPLICATION_BULKLOAD_ENABLE_DEFAULT);
totalBufferQuota = conf.getLong(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY,
HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
}

/**
Expand All @@ -192,18 +193,12 @@ public ReplicationSourceManager(final ReplicationQueues replicationQueues,
*/
public synchronized void logPositionAndCleanOldLogs(Path log, String id, long position,
boolean queueRecovered, boolean holdLogInZK) {
if (!this.pendingShipment) {
String fileName = log.getName();
this.replicationQueues.setLogPosition(id, fileName, position);
if (holdLogInZK) {
return;
}
cleanOldLogs(fileName, id, queueRecovered);
String fileName = log.getName();
this.replicationQueues.setLogPosition(id, fileName, position);
if (holdLogInZK) {
return;
}
}

public synchronized void setPendingShipment(boolean pendingShipment) {
this.pendingShipment = pendingShipment;
cleanOldLogs(fileName, id, queueRecovered);
}

/**
Expand Down Expand Up @@ -466,8 +461,27 @@ void postLogRoll(Path newLog) throws IOException {
}

@VisibleForTesting
public AtomicLong getTotalBufferUsed() {
return totalBufferUsed;
public long getTotalBufferUsed() {
return totalBufferUsed.get();
}

/**
* @param size delta size for grown buffer
* @return true if total buffer size limit reached, after adding size
*/
public boolean acquireBufferQuota(long size) {
return totalBufferUsed.addAndGet(size) >= totalBufferQuota;
}

public void releaseBufferQuota(long size) {
totalBufferUsed.addAndGet(-size);
}

/**
* @return true if total buffer size limit reached
*/
public boolean isBufferQuotaReached() {
return totalBufferUsed.get() >= totalBufferQuota;
}

/**
Expand Down
Loading

0 comments on commit b3e9a4e

Please sign in to comment.