From 07fe47cc64f931149416d035eb5d0582d7b4e9c5 Mon Sep 17 00:00:00 2001 From: huiruan Date: Wed, 2 Nov 2022 12:45:43 +0800 Subject: [PATCH 1/4] Use ReadWriteLock for region scanner readpoint map Signed-off-by: huiruan --- .../hadoop/hbase/regionserver/HRegion.java | 34 ++++++++++++++----- .../hbase/regionserver/RegionScannerImpl.java | 31 ++++++++++++----- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 5ec6984d1ad1..4d3e293fec89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -399,6 +399,9 @@ public void setRestoredRegion(boolean restoredRegion) { private final int miniBatchSize; final ConcurrentHashMap scannerReadPoints; + // Lock to manage concurrency between RegionScanner and getSmallestReadPoint + final ReentrantReadWriteLock scannerReadPointsLock = new ReentrantReadWriteLock(); + final boolean useReadWriteLockForReadPoints; /** * The sequence ID that was enLongAddered when this region was opened. @@ -446,18 +449,26 @@ public long getSmallestReadPoint() { long minimumReadPoint; // We need to ensure that while we are calculating the smallestReadPoint // no new RegionScanners can grab a readPoint that we are unaware of. - // We achieve this by synchronizing on the scannerReadPoints object. - synchronized (scannerReadPoints) { - minimumReadPoint = mvcc.getReadPoint(); - for (Long readPoint : this.scannerReadPoints.values()) { - if (readPoint < minimumReadPoint) { - minimumReadPoint = readPoint; - } + if (useReadWriteLockForReadPoints) { + scannerReadPointsLock.writeLock().lock(); + try { + minimumReadPoint = calculateSmallestReadPoint(); + } finally { + scannerReadPointsLock.writeLock().unlock(); + } + } else { + // We achieve this by synchronizing on the scannerReadPoints object. + synchronized (scannerReadPoints) { + minimumReadPoint = calculateSmallestReadPoint(); } } return minimumReadPoint; } + private long calculateSmallestReadPoint() { + return scannerReadPoints.values().stream().mapToLong(Long::longValue).min().orElse(0L); + } + /* * Data structure of write state flags used coordinating flushes, compactions and closes. */ @@ -798,6 +809,13 @@ public HRegion(final HRegionFileSystem fs, final WAL wal, final Configuration co } this.rowLockWaitDuration = tmpRowLockDuration; + this.useReadWriteLockForReadPoints = + conf.getBoolean("hbase.readpoints.read.write.lock.enable", false); + if (LOG.isDebugEnabled()) { + LOG.debug("region = {}, useReadWriteLockForReadPoints = {}", getRegionInfo(), + useReadWriteLockForReadPoints); + } + this.isLoadingCfsOnDemandDefault = conf.getBoolean(LOAD_CFS_ON_DEMAND_CONFIG_KEY, true); this.htableDescriptor = htd; Set families = this.htableDescriptor.getColumnFamilyNames(); @@ -8145,7 +8163,7 @@ private void doAttachReplicateRegionReplicaAction(WALKeyImpl walKey, WALEdit wal (3 * ClassSize.CONCURRENT_HASHMAP) + // lockedRows, scannerReadPoints, regionLockHolders WriteState.HEAP_SIZE + // writestate ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + // stores - (2 * ClassSize.REENTRANT_LOCK) + // lock, updatesLock + (3 * ClassSize.REENTRANT_LOCK) + // lock, updatesLock, scannerReadPointsLock MultiVersionConcurrencyControl.FIXED_SIZE // mvcc + 2 * ClassSize.TREEMAP // maxSeqIdInStores, replicationScopes + 2 * ClassSize.ATOMIC_INTEGER // majorInProgress, minorInProgress diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index d010d71f6cf9..404666c3e4d7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -130,19 +130,34 @@ private static boolean hasNonce(HRegion region, long nonce) { long mvccReadPoint = PackagePrivateFieldAccessor.getMvccReadPoint(scan); this.scannerReadPoints = region.scannerReadPoints; this.rsServices = region.getRegionServerServices(); - synchronized (scannerReadPoints) { - if (mvccReadPoint > 0) { - this.readPt = mvccReadPoint; - } else if (hasNonce(region, nonce)) { - this.readPt = rsServices.getNonceManager().getMvccFromOperationContext(nonceGroup, nonce); - } else { - this.readPt = region.getReadPoint(isolationLevel); + if (region.useReadWriteLockForReadPoints) { + region.scannerReadPointsLock.readLock().lock(); + try { + this.readPt = calculateReadPoint(isolationLevel, mvccReadPoint, nonceGroup, nonce); + scannerReadPoints.put(this, this.readPt); + } finally { + region.scannerReadPointsLock.readLock().unlock(); + } + } else { + synchronized (scannerReadPoints) { + this.readPt = calculateReadPoint(isolationLevel, mvccReadPoint, nonceGroup, nonce); + scannerReadPoints.put(this, this.readPt); } - scannerReadPoints.put(this, this.readPt); } initializeScanners(scan, additionalScanners); } + private long calculateReadPoint(IsolationLevel isolationLevel, long mvccReadPoint, + long nonceGroup, long nonce) { + if (mvccReadPoint > 0) { + return mvccReadPoint; + } + if (hasNonce(region, nonce)) { + return rsServices.getNonceManager().getMvccFromOperationContext(nonceGroup, nonce); + } + return region.getReadPoint(isolationLevel); + } + private void initializeScanners(Scan scan, List additionalScanners) throws IOException { // Here we separate all scanners into two lists - scanner that provide data required From 7180d9108a80b03f0eb37483c4c84e4a497097e0 Mon Sep 17 00:00:00 2001 From: huiruan Date: Wed, 2 Nov 2022 20:44:49 +0800 Subject: [PATCH 2/4] rename scannerReadPointsLock to smallestReadPointCalcLock Signed-off-by: huiruan --- .../java/org/apache/hadoop/hbase/regionserver/HRegion.java | 6 +++--- .../apache/hadoop/hbase/regionserver/RegionScannerImpl.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 4d3e293fec89..783cd0755711 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -400,7 +400,7 @@ public void setRestoredRegion(boolean restoredRegion) { final ConcurrentHashMap scannerReadPoints; // Lock to manage concurrency between RegionScanner and getSmallestReadPoint - final ReentrantReadWriteLock scannerReadPointsLock = new ReentrantReadWriteLock(); + final ReentrantReadWriteLock smallestReadPointCalcLock = new ReentrantReadWriteLock(); final boolean useReadWriteLockForReadPoints; /** @@ -450,11 +450,11 @@ public long getSmallestReadPoint() { // We need to ensure that while we are calculating the smallestReadPoint // no new RegionScanners can grab a readPoint that we are unaware of. if (useReadWriteLockForReadPoints) { - scannerReadPointsLock.writeLock().lock(); + smallestReadPointCalcLock.writeLock().lock(); try { minimumReadPoint = calculateSmallestReadPoint(); } finally { - scannerReadPointsLock.writeLock().unlock(); + smallestReadPointCalcLock.writeLock().unlock(); } } else { // We achieve this by synchronizing on the scannerReadPoints object. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 404666c3e4d7..636ea0e14279 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -131,12 +131,12 @@ private static boolean hasNonce(HRegion region, long nonce) { this.scannerReadPoints = region.scannerReadPoints; this.rsServices = region.getRegionServerServices(); if (region.useReadWriteLockForReadPoints) { - region.scannerReadPointsLock.readLock().lock(); + region.smallestReadPointCalcLock.readLock().lock(); try { this.readPt = calculateReadPoint(isolationLevel, mvccReadPoint, nonceGroup, nonce); scannerReadPoints.put(this, this.readPt); } finally { - region.scannerReadPointsLock.readLock().unlock(); + region.smallestReadPointCalcLock.readLock().unlock(); } } else { synchronized (scannerReadPoints) { From 80fba109be62e71051a400ddab7182c75184558d Mon Sep 17 00:00:00 2001 From: huiruan Date: Wed, 2 Nov 2022 22:39:44 +0800 Subject: [PATCH 3/4] introduce the ReadPointCalculationLock Signed-off-by: huiruan --- .../hadoop/hbase/regionserver/HRegion.java | 39 +++------ .../ReadPointCalculationLock.java | 79 +++++++++++++++++++ .../hbase/regionserver/RegionScannerImpl.java | 34 +++----- 3 files changed, 102 insertions(+), 50 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 783cd0755711..566c1e0a32c9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -399,9 +399,7 @@ public void setRestoredRegion(boolean restoredRegion) { private final int miniBatchSize; final ConcurrentHashMap scannerReadPoints; - // Lock to manage concurrency between RegionScanner and getSmallestReadPoint - final ReentrantReadWriteLock smallestReadPointCalcLock = new ReentrantReadWriteLock(); - final boolean useReadWriteLockForReadPoints; + final ReadPointCalculationLock smallestReadPointCalcLock; /** * The sequence ID that was enLongAddered when this region was opened. @@ -446,27 +444,18 @@ public void setRestoredRegion(boolean restoredRegion) { * this readPoint, are included in every read operation. */ public long getSmallestReadPoint() { - long minimumReadPoint; // We need to ensure that while we are calculating the smallestReadPoint // no new RegionScanners can grab a readPoint that we are unaware of. - if (useReadWriteLockForReadPoints) { - smallestReadPointCalcLock.writeLock().lock(); - try { - minimumReadPoint = calculateSmallestReadPoint(); - } finally { - smallestReadPointCalcLock.writeLock().unlock(); - } - } else { - // We achieve this by synchronizing on the scannerReadPoints object. - synchronized (scannerReadPoints) { - minimumReadPoint = calculateSmallestReadPoint(); + smallestReadPointCalcLock.lock(ReadPointCalculationLock.LockType.CALCULATION_LOCK); + try { + long minimumReadPoint = mvcc.getReadPoint(); + for (Long readPoint : this.scannerReadPoints.values()) { + minimumReadPoint = Math.min(minimumReadPoint, readPoint); } + return minimumReadPoint; + } finally { + smallestReadPointCalcLock.unlock(ReadPointCalculationLock.LockType.CALCULATION_LOCK); } - return minimumReadPoint; - } - - private long calculateSmallestReadPoint() { - return scannerReadPoints.values().stream().mapToLong(Long::longValue).min().orElse(0L); } /* @@ -809,12 +798,7 @@ public HRegion(final HRegionFileSystem fs, final WAL wal, final Configuration co } this.rowLockWaitDuration = tmpRowLockDuration; - this.useReadWriteLockForReadPoints = - conf.getBoolean("hbase.readpoints.read.write.lock.enable", false); - if (LOG.isDebugEnabled()) { - LOG.debug("region = {}, useReadWriteLockForReadPoints = {}", getRegionInfo(), - useReadWriteLockForReadPoints); - } + this.smallestReadPointCalcLock = new ReadPointCalculationLock(conf); this.isLoadingCfsOnDemandDefault = conf.getBoolean(LOAD_CFS_ON_DEMAND_CONFIG_KEY, true); this.htableDescriptor = htd; @@ -8156,6 +8140,7 @@ private void doAttachReplicateRegionReplicaAction(WALKeyImpl walKey, WALEdit wal // 1 x RegionSplitPolicy - splitPolicy // 1 x MetricsRegion - metricsRegion // 1 x MetricsRegionWrapperImpl - metricsRegionWrapper + // 1 x ReadPointCalculationLock - smallestReadPointCalcLock public static final long DEEP_OVERHEAD = FIXED_OVERHEAD + ClassSize.OBJECT + // closeLock (2 * ClassSize.ATOMIC_BOOLEAN) + // closed, closing (3 * ClassSize.ATOMIC_LONG) + // numPutsWithoutWAL, dataInMemoryWithoutWAL, @@ -8163,7 +8148,7 @@ private void doAttachReplicateRegionReplicaAction(WALKeyImpl walKey, WALEdit wal (3 * ClassSize.CONCURRENT_HASHMAP) + // lockedRows, scannerReadPoints, regionLockHolders WriteState.HEAP_SIZE + // writestate ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + // stores - (3 * ClassSize.REENTRANT_LOCK) + // lock, updatesLock, scannerReadPointsLock + (2 * ClassSize.REENTRANT_LOCK) + // lock, updatesLock MultiVersionConcurrencyControl.FIXED_SIZE // mvcc + 2 * ClassSize.TREEMAP // maxSeqIdInStores, replicationScopes + 2 * ClassSize.ATOMIC_INTEGER // majorInProgress, minorInProgress diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java new file mode 100644 index 000000000000..2e22810697aa --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.apache.hadoop.conf.Configuration; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Lock to manage concurrency between RegionScanner and getSmallestReadPoint. + */ +@InterfaceAudience.Private +public class ReadPointCalculationLock { + + public enum LockType { + CALCULATION_LOCK, + RECORDING_LOCK + } + + private final boolean useReadWriteLockForReadPoints; + private Lock lock; + private ReadWriteLock readWriteLock; + + ReadPointCalculationLock(Configuration conf) { + this.useReadWriteLockForReadPoints = + conf.getBoolean("hbase.readpoints.read.write.lock.enable", false); + if (useReadWriteLockForReadPoints) { + readWriteLock = new ReentrantReadWriteLock(); + } else { + lock = new ReentrantLock(); + } + } + + void lock(LockType lockType) { + if (useReadWriteLockForReadPoints) { + assert lock == null; + if (lockType == LockType.CALCULATION_LOCK) { + readWriteLock.writeLock().lock(); + } else { + readWriteLock.readLock().lock(); + } + } else { + assert readWriteLock == null; + lock.lock(); + } + } + + void unlock(LockType lockType) { + if (useReadWriteLockForReadPoints) { + assert lock == null; + if (lockType == LockType.CALCULATION_LOCK) { + readWriteLock.writeLock().unlock(); + } else { + readWriteLock.readLock().unlock(); + } + } else { + assert readWriteLock == null; + lock.unlock(); + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 636ea0e14279..11d4c20f581b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -130,34 +130,22 @@ private static boolean hasNonce(HRegion region, long nonce) { long mvccReadPoint = PackagePrivateFieldAccessor.getMvccReadPoint(scan); this.scannerReadPoints = region.scannerReadPoints; this.rsServices = region.getRegionServerServices(); - if (region.useReadWriteLockForReadPoints) { - region.smallestReadPointCalcLock.readLock().lock(); - try { - this.readPt = calculateReadPoint(isolationLevel, mvccReadPoint, nonceGroup, nonce); - scannerReadPoints.put(this, this.readPt); - } finally { - region.smallestReadPointCalcLock.readLock().unlock(); - } - } else { - synchronized (scannerReadPoints) { - this.readPt = calculateReadPoint(isolationLevel, mvccReadPoint, nonceGroup, nonce); - scannerReadPoints.put(this, this.readPt); + region.smallestReadPointCalcLock.lock(ReadPointCalculationLock.LockType.RECORDING_LOCK); + try { + if (mvccReadPoint > 0) { + this.readPt = mvccReadPoint; + } else if (hasNonce(region, nonce)) { + this.readPt = rsServices.getNonceManager().getMvccFromOperationContext(nonceGroup, nonce); + } else { + this.readPt = region.getReadPoint(isolationLevel); } + scannerReadPoints.put(this, this.readPt); + } finally { + region.smallestReadPointCalcLock.unlock(ReadPointCalculationLock.LockType.RECORDING_LOCK); } initializeScanners(scan, additionalScanners); } - private long calculateReadPoint(IsolationLevel isolationLevel, long mvccReadPoint, - long nonceGroup, long nonce) { - if (mvccReadPoint > 0) { - return mvccReadPoint; - } - if (hasNonce(region, nonce)) { - return rsServices.getNonceManager().getMvccFromOperationContext(nonceGroup, nonce); - } - return region.getReadPoint(isolationLevel); - } - private void initializeScanners(Scan scan, List additionalScanners) throws IOException { // Here we separate all scanners into two lists - scanner that provide data required From d65fdb3366610a645ef5a1eebddd8dcff95f0da9 Mon Sep 17 00:00:00 2001 From: huiruan Date: Thu, 3 Nov 2022 13:50:08 +0800 Subject: [PATCH 4/4] add some comments --- .../regionserver/ReadPointCalculationLock.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java index 2e22810697aa..380b82de93a6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReadPointCalculationLock.java @@ -25,7 +25,18 @@ import org.apache.yetus.audience.InterfaceAudience; /** - * Lock to manage concurrency between RegionScanner and getSmallestReadPoint. + * Lock to manage concurrency between {@link RegionScanner} and + * {@link HRegion#getSmallestReadPoint()}. We need to ensure that while we are calculating the + * smallest read point, no new scanners can modify the scannerReadPoints Map. We used to achieve + * this by synchronizing on the scannerReadPoints object. But this may block the read thread and + * reduce the read performance. Since the scannerReadPoints object is a + * {@link java.util.concurrent.ConcurrentHashMap}, which is thread-safe, so the + * {@link RegionScanner} can record their read points concurrently, what it needs to do is just + * acquiring a shared lock. When we calculate the smallest read point, we need to acquire an + * exclusive lock. This can improve read performance in most scenarios, only not when we have a lot + * of delta operations, like {@link org.apache.hadoop.hbase.client.Append} or + * {@link org.apache.hadoop.hbase.client.Increment}. So we introduce a flag to enable/disable this + * feature. */ @InterfaceAudience.Private public class ReadPointCalculationLock { @@ -41,7 +52,7 @@ public enum LockType { ReadPointCalculationLock(Configuration conf) { this.useReadWriteLockForReadPoints = - conf.getBoolean("hbase.readpoints.read.write.lock.enable", false); + conf.getBoolean("hbase.region.readpoints.read.write.lock.enable", false); if (useReadWriteLockForReadPoints) { readWriteLock = new ReentrantReadWriteLock(); } else {