From 7d9182906f60e4496ed3e421bf0314a611158019 Mon Sep 17 00:00:00 2001 From: Monani Mihir Date: Mon, 12 Feb 2024 09:14:06 -0800 Subject: [PATCH] HBASE-28204 : Region Canary can take lot more time If any region (except the first region) starts with delete markers (#5675) Signed-off-by: David Manning Signed-off-by: Viraj Jasani --- .../apache/hadoop/hbase/tool/CanaryTool.java | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java index 95c92536567b..9252c6687734 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java @@ -510,19 +510,44 @@ public Void call() { private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { byte[] startKey = null; - Get get = null; Scan scan = null; ResultScanner rs = null; StopWatch stopWatch = new StopWatch(); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. if (startKey.length > 0) { - get = new Get(startKey); + Get get = new Get(startKey); get.setCacheBlocks(false); get.setFilter(new FirstKeyOnlyFilter()); get.addFamily(column.getName()); + // Converting get object to scan to enable RAW SCAN. + // This will work for all the regions of the HBase tables except first region of the table. + scan = new Scan(get); + scan.setRaw(rawScanEnabled); } else { scan = new Scan(); + // In case of first region of the HBase Table, we do not have start-key for the region. + // For Region Canary, we only need to scan a single row/cell in the region to make sure that + // region is accessible. + // + // When HBase table has more than 1 empty regions at start of the row-key space, Canary will + // create multiple scan object to find first available row in the table by scanning all the + // regions in sequence until it can find first available row. + // + // This could result in multiple millions of scans based on the size of table and number of + // empty regions in sequence. In test environment, A table with no data and 1100 empty + // regions, Single canary run was creating close to half million to 1 million scans to + // successfully do canary run for the table. + // + // Since First region of the table doesn't have any start key, We should set End Key as + // stop row and set inclusive=false to limit scan to single region only. + // + // TODO : In future, we can streamline Canary behaviour for all the regions by doing scan + // with startRow inclusive and stopRow exclusive instead of different behaviour for First + // Region of the table and rest of the region of the table. This way implementation is + // simplified. As of now this change has been kept minimal to avoid any unnecessary + // perf impact. + scan.withStopRow(region.getEndKey(), false); LOG.debug("rawScan {} for {}", rawScanEnabled, region.getTable()); scan.setRaw(rawScanEnabled); scan.setCaching(1); @@ -536,12 +561,8 @@ private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { column.getNameAsString(), Bytes.toStringBinary(startKey)); try { stopWatch.start(); - if (startKey.length > 0) { - table.get(get); - } else { - rs = table.getScanner(scan); - rs.next(); - } + rs = table.getScanner(scan); + rs.next(); stopWatch.stop(); this.readWriteLatency.add(stopWatch.getTime()); sink.publishReadTiming(serverName, region, column, stopWatch.getTime());