From ed5fbe8b37ec707395517936ef56d07874c6507d Mon Sep 17 00:00:00 2001 From: Liangjun He Date: Tue, 16 Jul 2024 21:50:54 +0800 Subject: [PATCH] HBASE-28702 TestBackupMerge fails 100% of times on flaky dashboard --- .../apache/hadoop/hbase/backup/BackupDriver.java | 3 +++ .../apache/hadoop/hbase/backup/BackupInfo.java | 13 +++++++++++++ .../apache/hadoop/hbase/backup/BackupRequest.java | 15 +++++++++++++++ .../hbase/backup/BackupRestoreConstants.java | 8 +++++++- .../hadoop/hbase/backup/impl/BackupAdminImpl.java | 2 +- .../hadoop/hbase/backup/impl/BackupCommands.java | 8 +++++++- .../hadoop/hbase/backup/impl/BackupManager.java | 4 +++- .../hbase/backup/impl/FullTableBackupClient.java | 3 +++ .../hbase/backup/impl/TableBackupClient.java | 3 ++- .../hadoop/hbase/backup/TestBackupBase.java | 9 +++++++-- .../hadoop/hbase/backup/TestBackupMerge.java | 6 +++--- .../hadoop/hbase/snapshot/ExportSnapshot.java | 6 ++++-- 12 files changed, 68 insertions(+), 12 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java index 547a39c8d623..d55a280b4aa4 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java @@ -22,6 +22,8 @@ import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST; @@ -151,6 +153,7 @@ protected void addOptions() { addOptWithArg(OPTION_BANDWIDTH, OPTION_BANDWIDTH_DESC); addOptWithArg(OPTION_LIST, OPTION_BACKUP_LIST_DESC); addOptWithArg(OPTION_WORKERS, OPTION_WORKERS_DESC); + addOptNoArg(OPTION_IGNORECHECKSUM, OPTION_IGNORECHECKSUM_DESC); addOptWithArg(OPTION_RECORD_NUMBER, OPTION_RECORD_NUMBER_DESC); addOptWithArg(OPTION_SET, OPTION_SET_DESC); addOptWithArg(OPTION_PATH, OPTION_PATH_DESC); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java index fdad0d549830..1fad5b6cfdb1 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java @@ -164,6 +164,11 @@ public enum BackupPhase { */ private long bandwidth = -1; + /** + * Do not verify checksum between source snapshot and exported snapshot + */ + private boolean noChecksumVerify; + public BackupInfo() { backupTableInfoMap = new HashMap<>(); } @@ -197,6 +202,14 @@ public void setBandwidth(long bandwidth) { this.bandwidth = bandwidth; } + public void setNoChecksumVerify(boolean noChecksumVerify) { + this.noChecksumVerify = noChecksumVerify; + } + + public boolean getNoChecksumVerify() { + return noChecksumVerify; + } + public void setBackupTableInfoMap(Map backupTableInfoMap) { this.backupTableInfoMap = backupTableInfoMap; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRequest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRequest.java index c9c7a5b61810..aa2d5b44259f 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRequest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRequest.java @@ -65,6 +65,11 @@ public Builder withBandwidthPerTasks(int bandwidth) { return this; } + public Builder withNoChecksumVerify(boolean noChecksumVerify) { + request.setNoChecksumVerify(noChecksumVerify); + return this; + } + public Builder withYarnPoolName(String name) { request.setYarnPoolName(name); return this; @@ -81,6 +86,7 @@ public BackupRequest build() { private String targetRootDir; private int totalTasks = -1; private long bandwidth = -1L; + private boolean noChecksumVerify = false; private String backupSetName; private String yarnPoolName; @@ -132,6 +138,15 @@ public long getBandwidth() { return this.bandwidth; } + private BackupRequest setNoChecksumVerify(boolean noChecksumVerify) { + this.noChecksumVerify = noChecksumVerify; + return this; + } + + public boolean getNoChecksumVerify() { + return noChecksumVerify; + } + public String getBackupSetName() { return backupSetName; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index 56c454519d81..30a5674eb021 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -62,7 +62,7 @@ public interface BackupRestoreConstants { String OPTION_TABLE = "t"; String OPTION_TABLE_DESC = - "Table name. If specified, only backup images," + " which contain this table will be listed."; + "Table name. If specified, only backup images, which contain this table will be listed."; String OPTION_LIST = "l"; String OPTION_TABLE_LIST_DESC = "Table name list, comma-separated."; @@ -74,6 +74,12 @@ public interface BackupRestoreConstants { String OPTION_WORKERS = "w"; String OPTION_WORKERS_DESC = "Number of parallel MapReduce tasks to execute"; + String OPTION_IGNORECHECKSUM = "i"; + String OPTION_IGNORECHECKSUM_DESC = + "Ignore checksum verify between source snapshot and exported snapshot." + + " Especially when the source and target file system types are different," + + " we should use -i option to skip checksum-checks."; + String OPTION_RECORD_NUMBER = "n"; String OPTION_RECORD_NUMBER_DESC = "Number of records of backup history. Default: 10"; diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java index f500581e9d85..c36b398e5e86 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java @@ -581,7 +581,7 @@ public String backupTables(BackupRequest request) throws IOException { request = builder.withBackupType(request.getBackupType()).withTableList(tableList) .withTargetRootDir(request.getTargetRootDir()).withBackupSetName(request.getBackupSetName()) .withTotalTasks(request.getTotalTasks()).withBandwidthPerTasks((int) request.getBandwidth()) - .build(); + .withNoChecksumVerify(request.getNoChecksumVerify()).build(); TableBackupClient client; try { diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java index ce9c5bbe8fae..3bb3ed33f34d 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java @@ -22,6 +22,8 @@ import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST; @@ -329,6 +331,8 @@ public void execute() throws IOException { ? Integer.parseInt(cmdline.getOptionValue(OPTION_WORKERS)) : -1; + boolean ignoreChecksum = cmdline.hasOption(OPTION_IGNORECHECKSUM); + if (cmdline.hasOption(OPTION_YARN_QUEUE_NAME)) { String queueName = cmdline.getOptionValue(OPTION_YARN_QUEUE_NAME); // Set system property value for MR job @@ -341,7 +345,8 @@ public void execute() throws IOException { .withTableList( tables != null ? Lists.newArrayList(BackupUtils.parseTableNames(tables)) : null) .withTargetRootDir(targetBackupDir).withTotalTasks(workers) - .withBandwidthPerTasks(bandwidth).withBackupSetName(setName).build(); + .withBandwidthPerTasks(bandwidth).withNoChecksumVerify(ignoreChecksum) + .withBackupSetName(setName).build(); String backupId = admin.backupTables(request); System.out.println("Backup session " + backupId + " finished. Status: SUCCESS"); } catch (IOException e) { @@ -394,6 +399,7 @@ protected void printUsage() { options.addOption(OPTION_TABLE, true, OPTION_TABLE_LIST_DESC); options.addOption(OPTION_YARN_QUEUE_NAME, true, OPTION_YARN_QUEUE_NAME_DESC); options.addOption(OPTION_DEBUG, false, OPTION_DEBUG_DESC); + options.addOption(OPTION_IGNORECHECKSUM, false, OPTION_IGNORECHECKSUM_DESC); HelpFormatter helpFormatter = new HelpFormatter(); helpFormatter.setLeftPadding(2); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java index f0c93db4b4c2..41dc300abfaf 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java @@ -193,7 +193,8 @@ public void close() { * @throws BackupException exception */ public BackupInfo createBackupInfo(String backupId, BackupType type, List tableList, - String targetRootDir, int workers, long bandwidth) throws BackupException { + String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify) + throws BackupException { if (targetRootDir == null) { throw new BackupException("Wrong backup request parameter: target backup root directory"); } @@ -230,6 +231,7 @@ public BackupInfo createBackupInfo(String backupId, BackupType type, List(backupInfo.getTables()); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java index ed17ef8a1173..86aa0f8bd923 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java @@ -395,9 +395,14 @@ Table insertIntoTable(Connection conn, TableName table, byte[] family, int id, i protected BackupRequest createBackupRequest(BackupType type, List tables, String path) { + return createBackupRequest(type, tables, path, false); + } + + protected BackupRequest createBackupRequest(BackupType type, List tables, String path, + boolean noChecksumVerify) { BackupRequest.Builder builder = new BackupRequest.Builder(); - BackupRequest request = - builder.withBackupType(type).withTableList(tables).withTargetRootDir(path).build(); + BackupRequest request = builder.withBackupType(type).withTableList(tables) + .withTargetRootDir(path).withNoChecksumVerify(noChecksumVerify).build(); return request; } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupMerge.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupMerge.java index 5a6d21dad84f..38204f68e31a 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupMerge.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupMerge.java @@ -138,15 +138,15 @@ public void testIncBackupMergeRestoreSeparateFs() throws Exception { BackupAdminImpl client = new BackupAdminImpl(conn); List tables = Lists.newArrayList(table1, table2); - BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR); + BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR, true); String backupIdFull = client.backupTables(request); assertTrue(checkSucceeded(backupIdFull)); - request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); + request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true); String backupIdIncMultiple = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple)); - request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); + request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true); String backupIdIncMultiple2 = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple2)); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 4e0c54b718bb..a8a8a44ff466 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -588,8 +588,10 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu errMessage .append(" You can choose file-level checksum validation via " + "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes" - + " or filesystems are different.") - .append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n") + + " or filesystems are different.\n") + .append(" Or you can skip checksum-checks altogether with -no-checksum-verify,") + .append( + " for the table backup scenario, you should use -i option to skip checksum-checks.\n") .append(" (NOTE: By skipping checksums, one runs the risk of " + "masking data-corruption during file-transfer.)\n"); }