From 75a66f15d5f6edfd0237a62f579e6a652005d0cb Mon Sep 17 00:00:00 2001 From: stack Date: Tue, 24 Mar 2020 14:36:09 -0700 Subject: [PATCH] HBASE-24043 [Flakey Tests] TestAsyncRegionAdminApi, TestRegionMergeTransactionOnCluster fixes and debug hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java Edit of log about archiving that shows in middle of a table create; try to make it less disorientating. hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java Loosen assert. Compaction may have produced a single file only. Allow for this. hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java Make this test less furious given it is inline w/ a bunch of unit tests. hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide3.java Add debug hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaObserverChoreRegionReports.java Add wait on quota table to show up before moving forward; otherwise, attempt at quota setting fails. hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java Debug hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java Remove asserts that expected regions to still have a presence in fs after merge when a catalogjanitor may have cleaned up parent dirs. hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java Catch exception on way out and log it rather than let it fail test. hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestSnapshotScannerHDFSAclController.java Wait on acl table before proceeding. --- .../master/procedure/DeleteTableProcedure.java | 5 +++-- .../hbase/client/TestAsyncRegionAdminApi.java | 7 +++++-- .../client/TestAsyncTableGetMultiThreaded.java | 2 +- .../hadoop/hbase/client/TestFromClientSide3.java | 2 +- .../TestQuotaObserverChoreRegionReports.java | 14 ++++++++++++-- .../hadoop/hbase/regionserver/TestHRegion.java | 2 ++ .../TestRegionMergeTransactionOnCluster.java | 4 ++-- .../hbase/regionserver/TestRegionReplicas.java | 7 ++++++- .../TestSnapshotScannerHDFSAclController.java | 2 ++ 9 files changed, 34 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index c7923164f3f6..6c862af7b7f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; - import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -319,7 +318,9 @@ protected static void deleteFromFs(final MasterProcedureEnv env, .collect(Collectors.toList()); HFileArchiver.archiveRegions(env.getMasterConfiguration(), fs, mfs.getRootDir(), tempTableDir, regionDirList); - LOG.debug("Table '{}' archived!", tableName); + if (!regionDirList.isEmpty()) { + LOG.debug("Archived {} regions", tableName); + } } // Archive mob data diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java index 33778a7f0d5a..38f19c436dc7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java @@ -429,8 +429,11 @@ private void compactionTest(final TableName tableName, final int flushes, int countAfterSingleFamily = countStoreFilesInFamily(regions, family); assertTrue(countAfter < countBefore); if (!singleFamily) { - if (expectedState == CompactionState.MAJOR) assertTrue(families.length == countAfter); - else assertTrue(families.length < countAfter); + if (expectedState == CompactionState.MAJOR) { + assertEquals(families.length, countAfter); + } else { + assertTrue(families.length <= countAfter); + } } else { int singleFamDiff = countBeforeSingleFamily - countAfterSingleFamily; // assert only change was to single column family diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java index 8a993b871fc8..7c896957fa05 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java @@ -134,7 +134,7 @@ private void run(AtomicBoolean stop) throws InterruptedException, ExecutionExcep @Test public void test() throws Exception { LOG.info("====== Test started ======"); - int numThreads = 20; + int numThreads = 10; AtomicBoolean stop = new AtomicBoolean(false); ExecutorService executor = Executors.newFixedThreadPool(numThreads, Threads.newDaemonThreadFactory("TestAsyncGet-")); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide3.java index 6ac6086bef11..91f751855fbd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide3.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide3.java @@ -250,7 +250,7 @@ public void testScanAfterDeletingSpecifiedRowV2() throws IOException, Interrupte table.put(put); Result r = table.get(new Get(row)); - assertEquals(3, r.size()); + assertEquals(r.toString(), 3, r.size()); assertEquals("testValue", Bytes.toString(CellUtil.cloneValue(r.rawCells()[0]))); assertEquals("qual0", Bytes.toString(CellUtil.cloneValue(r.rawCells()[1]))); assertEquals("qual1", Bytes.toString(CellUtil.cloneValue(r.rawCells()[2]))); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaObserverChoreRegionReports.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaObserverChoreRegionReports.java index 7391fa1c456e..2e344dfd7678 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaObserverChoreRegionReports.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaObserverChoreRegionReports.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,16 +19,17 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; - import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.Admin; @@ -42,6 +43,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; @@ -144,6 +146,13 @@ public void testMissingReportsRemovesQuota() throws Exception { // Expire the reports after 5 seconds conf.setInt(QuotaObserverChore.REGION_REPORT_RETENTION_DURATION_KEY, 5000); TEST_UTIL.startMiniCluster(1); + // Wait till quota table onlined. + TEST_UTIL.waitFor(10000, new Waiter.Predicate() { + @Override public boolean evaluate() throws Exception { + return MetaTableAccessor.tableExists(TEST_UTIL.getConnection(), + QuotaTableUtil.QUOTA_TABLE_NAME); + } + }); final String FAM1 = "f1"; @@ -158,6 +167,7 @@ public void testMissingReportsRemovesQuota() throws Exception { final SpaceViolationPolicy violationPolicy = SpaceViolationPolicy.NO_INSERTS; QuotaSettings settings = QuotaSettingsFactory.limitTableSpace(tn, sizeLimit, violationPolicy); final Admin admin = TEST_UTIL.getAdmin(); + LOG.info("SET QUOTA"); admin.setQuota(settings); final Connection conn = TEST_UTIL.getConnection(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 69da92d7bd97..ae146b7c97b8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -2111,6 +2111,8 @@ public void testCheckAndDelete_ThatDeleteWasWritten() throws IOException { put.addColumn(fam1, qf3, val1); region.put(put); + LOG.info("get={}", region.get(new Get(row1).addColumn(fam1, qf1)).toString()); + // Multi-column delete Delete delete = new Delete(row1); delete.addColumn(fam1, qf1); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java index 8dc2ada82e38..018a72b530c8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java @@ -282,8 +282,8 @@ public void testCleanMergeReference() throws Exception { ProcedureTestingUtility.waitNoProcedureRunning( TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()); } - assertFalse(regionAdir.toString(), fs.exists(regionAdir)); - assertFalse(regionBdir.toString(), fs.exists(regionBdir)); + // We used to check for existence of region in fs but sometimes the region dir was + // cleaned up by the time we got here making the test sometimes flakey. assertTrue(cleaned > 0); mergedRegionResult = MetaTableAccessor.getRegionResult( diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java index 9f1bd1f7fa4f..b4914aa413e3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TestMetaTableAccessor; import org.apache.hadoop.hbase.client.Consistency; @@ -427,7 +428,11 @@ public void run() { } } finally { HTU.deleteNumericRows(table, HConstants.CATALOG_FAMILY, startKey, endKey); - closeRegion(HTU, getRS(), hriSecondary); + try { + closeRegion(HTU, getRS(), hriSecondary); + } catch (NotServingRegionException e) { + LOG.info("Closing wrong region {}", hriSecondary, e); + } } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestSnapshotScannerHDFSAclController.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestSnapshotScannerHDFSAclController.java index 7424bc446760..990e1394f6be 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestSnapshotScannerHDFSAclController.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestSnapshotScannerHDFSAclController.java @@ -624,6 +624,7 @@ public void testRestoreSnapshot() throws Exception { String snapshot = namespace + "s1"; String snapshot2 = namespace + "s2"; String snapshot3 = namespace + "s3"; + TEST_UTIL.waitTableAvailable(PermissionStorage.ACL_TABLE_NAME); try (Table t = TestHDFSAclHelper.createTable(TEST_UTIL, table)) { TestHDFSAclHelper.put(t); @@ -633,6 +634,7 @@ public void testRestoreSnapshot() throws Exception { // delete admin.disableTable(table); admin.deleteTable(table); + LOG.info("Before scan of shapshot!"); TestHDFSAclHelper.canUserScanSnapshot(TEST_UTIL, grantUser, snapshot, -1); // restore snapshot and restore acl