Skip to content

Commit

Permalink
HBASE-28702 TestBackupMerge fails 100% of times on flaky dashboard (#…
Browse files Browse the repository at this point in the history
…6078)

Signed-off-by: Duo Zhang <[email protected]>
Signed-off-by: Bryan Beaudreault <[email protected]>
  • Loading branch information
2005hithlj committed Jul 20, 2024
1 parent 6bed93e commit 10e7105
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST;
Expand Down Expand Up @@ -151,6 +153,7 @@ protected void addOptions() {
addOptWithArg(OPTION_BANDWIDTH, OPTION_BANDWIDTH_DESC);
addOptWithArg(OPTION_LIST, OPTION_BACKUP_LIST_DESC);
addOptWithArg(OPTION_WORKERS, OPTION_WORKERS_DESC);
addOptNoArg(OPTION_IGNORECHECKSUM, OPTION_IGNORECHECKSUM_DESC);
addOptWithArg(OPTION_RECORD_NUMBER, OPTION_RECORD_NUMBER_DESC);
addOptWithArg(OPTION_SET, OPTION_SET_DESC);
addOptWithArg(OPTION_PATH, OPTION_PATH_DESC);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ public enum BackupPhase {
*/
private long bandwidth = -1;

/**
* Do not verify checksum between source snapshot and exported snapshot
*/
private boolean noChecksumVerify;

public BackupInfo() {
backupTableInfoMap = new HashMap<>();
}
Expand Down Expand Up @@ -197,6 +202,14 @@ public void setBandwidth(long bandwidth) {
this.bandwidth = bandwidth;
}

public void setNoChecksumVerify(boolean noChecksumVerify) {
this.noChecksumVerify = noChecksumVerify;
}

public boolean getNoChecksumVerify() {
return noChecksumVerify;
}

public void setBackupTableInfoMap(Map<TableName, BackupTableInfo> backupTableInfoMap) {
this.backupTableInfoMap = backupTableInfoMap;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ public Builder withBandwidthPerTasks(int bandwidth) {
return this;
}

public Builder withNoChecksumVerify(boolean noChecksumVerify) {
request.setNoChecksumVerify(noChecksumVerify);
return this;
}

public Builder withYarnPoolName(String name) {
request.setYarnPoolName(name);
return this;
Expand All @@ -81,6 +86,7 @@ public BackupRequest build() {
private String targetRootDir;
private int totalTasks = -1;
private long bandwidth = -1L;
private boolean noChecksumVerify = false;
private String backupSetName;
private String yarnPoolName;

Expand Down Expand Up @@ -132,6 +138,15 @@ public long getBandwidth() {
return this.bandwidth;
}

private BackupRequest setNoChecksumVerify(boolean noChecksumVerify) {
this.noChecksumVerify = noChecksumVerify;
return this;
}

public boolean getNoChecksumVerify() {
return noChecksumVerify;
}

public String getBackupSetName() {
return backupSetName;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public interface BackupRestoreConstants {

String OPTION_TABLE = "t";
String OPTION_TABLE_DESC =
"Table name. If specified, only backup images," + " which contain this table will be listed.";
"Table name. If specified, only backup images, which contain this table will be listed.";

String OPTION_LIST = "l";
String OPTION_TABLE_LIST_DESC = "Table name list, comma-separated.";
Expand All @@ -74,6 +74,12 @@ public interface BackupRestoreConstants {
String OPTION_WORKERS = "w";
String OPTION_WORKERS_DESC = "Number of parallel MapReduce tasks to execute";

String OPTION_IGNORECHECKSUM = "i";
String OPTION_IGNORECHECKSUM_DESC =
"Ignore checksum verify between source snapshot and exported snapshot."
+ " Especially when the source and target file system types are different,"
+ " we should use -i option to skip checksum-checks.";

String OPTION_RECORD_NUMBER = "n";
String OPTION_RECORD_NUMBER_DESC = "Number of records of backup history. Default: 10";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ public String backupTables(BackupRequest request) throws IOException {
request = builder.withBackupType(request.getBackupType()).withTableList(tableList)
.withTargetRootDir(request.getTargetRootDir()).withBackupSetName(request.getBackupSetName())
.withTotalTasks(request.getTotalTasks()).withBandwidthPerTasks((int) request.getBandwidth())
.build();
.withNoChecksumVerify(request.getNoChecksumVerify()).build();

TableBackupClient client;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST;
Expand Down Expand Up @@ -329,6 +331,8 @@ public void execute() throws IOException {
? Integer.parseInt(cmdline.getOptionValue(OPTION_WORKERS))
: -1;

boolean ignoreChecksum = cmdline.hasOption(OPTION_IGNORECHECKSUM);

if (cmdline.hasOption(OPTION_YARN_QUEUE_NAME)) {
String queueName = cmdline.getOptionValue(OPTION_YARN_QUEUE_NAME);
// Set system property value for MR job
Expand All @@ -341,7 +345,8 @@ public void execute() throws IOException {
.withTableList(
tables != null ? Lists.newArrayList(BackupUtils.parseTableNames(tables)) : null)
.withTargetRootDir(targetBackupDir).withTotalTasks(workers)
.withBandwidthPerTasks(bandwidth).withBackupSetName(setName).build();
.withBandwidthPerTasks(bandwidth).withNoChecksumVerify(ignoreChecksum)
.withBackupSetName(setName).build();
String backupId = admin.backupTables(request);
System.out.println("Backup session " + backupId + " finished. Status: SUCCESS");
} catch (IOException e) {
Expand Down Expand Up @@ -394,6 +399,7 @@ protected void printUsage() {
options.addOption(OPTION_TABLE, true, OPTION_TABLE_LIST_DESC);
options.addOption(OPTION_YARN_QUEUE_NAME, true, OPTION_YARN_QUEUE_NAME_DESC);
options.addOption(OPTION_DEBUG, false, OPTION_DEBUG_DESC);
options.addOption(OPTION_IGNORECHECKSUM, false, OPTION_IGNORECHECKSUM_DESC);

HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.setLeftPadding(2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ public void close() {
* @throws BackupException exception
*/
public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
String targetRootDir, int workers, long bandwidth) throws BackupException {
String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify)
throws BackupException {
if (targetRootDir == null) {
throw new BackupException("Wrong backup request parameter: target backup root directory");
}
Expand Down Expand Up @@ -230,6 +231,7 @@ public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableN
targetRootDir);
backupInfo.setBandwidth(bandwidth);
backupInfo.setWorkers(workers);
backupInfo.setNoChecksumVerify(noChecksumVerify);
return backupInfo;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ protected void snapshotCopy(BackupInfo backupInfo) throws Exception {
argsList.add("-mappers");
argsList.add(String.valueOf(backupInfo.getWorkers()));
}
if (backupInfo.getNoChecksumVerify()) {
argsList.add("-no-checksum-verify");
}

String[] args = argsList.toArray(new String[0]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ public void init(final Connection conn, final String backupId, BackupRequest req
this.conf = conn.getConfiguration();
this.fs = CommonFSUtils.getCurrentFileSystem(conf);
backupInfo = backupManager.createBackupInfo(backupId, request.getBackupType(), tableList,
request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth());
request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth(),
request.getNoChecksumVerify());
if (tableList == null || tableList.isEmpty()) {
this.tableList = new ArrayList<>(backupInfo.getTables());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,14 @@ Table insertIntoTable(Connection conn, TableName table, byte[] family, int id, i

protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables,
String path) {
return createBackupRequest(type, tables, path, false);
}

protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables, String path,
boolean noChecksumVerify) {
BackupRequest.Builder builder = new BackupRequest.Builder();
BackupRequest request =
builder.withBackupType(type).withTableList(tables).withTargetRootDir(path).build();
BackupRequest request = builder.withBackupType(type).withTableList(tables)
.withTargetRootDir(path).withNoChecksumVerify(noChecksumVerify).build();
return request;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,15 @@ public void testIncBackupMergeRestoreSeparateFs() throws Exception {
BackupAdminImpl client = new BackupAdminImpl(conn);
List<TableName> tables = Lists.newArrayList(table1, table2);

BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR);
BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR, true);
String backupIdFull = client.backupTables(request);
assertTrue(checkSucceeded(backupIdFull));

request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true);
String backupIdIncMultiple = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple));

request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true);
String backupIdIncMultiple2 = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple2));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,8 +589,10 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu
errMessage
.append(" You can choose file-level checksum validation via "
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ " or filesystems are different.")
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
+ " or filesystems are different.\n")
.append(" Or you can skip checksum-checks altogether with -no-checksum-verify,")
.append(
" for the table backup scenario, you should use -i option to skip checksum-checks.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of "
+ "masking data-corruption during file-transfer.)\n");
}
Expand Down

0 comments on commit 10e7105

Please sign in to comment.