Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-28702 TestBackupMerge fails 100% of times on flaky dashboard #6078

Merged
merged 1 commit into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST;
Expand Down Expand Up @@ -151,6 +153,7 @@ protected void addOptions() {
addOptWithArg(OPTION_BANDWIDTH, OPTION_BANDWIDTH_DESC);
addOptWithArg(OPTION_LIST, OPTION_BACKUP_LIST_DESC);
addOptWithArg(OPTION_WORKERS, OPTION_WORKERS_DESC);
addOptNoArg(OPTION_IGNORECHECKSUM, OPTION_IGNORECHECKSUM_DESC);
addOptWithArg(OPTION_RECORD_NUMBER, OPTION_RECORD_NUMBER_DESC);
addOptWithArg(OPTION_SET, OPTION_SET_DESC);
addOptWithArg(OPTION_PATH, OPTION_PATH_DESC);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ public enum BackupPhase {
*/
private long bandwidth = -1;

/**
* Do not verify checksum between source snapshot and exported snapshot
*/
private boolean noChecksumVerify;

public BackupInfo() {
backupTableInfoMap = new HashMap<>();
}
Expand Down Expand Up @@ -197,6 +202,14 @@ public void setBandwidth(long bandwidth) {
this.bandwidth = bandwidth;
}

public void setNoChecksumVerify(boolean noChecksumVerify) {
this.noChecksumVerify = noChecksumVerify;
}

public boolean getNoChecksumVerify() {
return noChecksumVerify;
}

public void setBackupTableInfoMap(Map<TableName, BackupTableInfo> backupTableInfoMap) {
this.backupTableInfoMap = backupTableInfoMap;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ public Builder withBandwidthPerTasks(int bandwidth) {
return this;
}

public Builder withNoChecksumVerify(boolean noChecksumVerify) {
request.setNoChecksumVerify(noChecksumVerify);
return this;
}

public Builder withYarnPoolName(String name) {
request.setYarnPoolName(name);
return this;
Expand All @@ -81,6 +86,7 @@ public BackupRequest build() {
private String targetRootDir;
private int totalTasks = -1;
private long bandwidth = -1L;
private boolean noChecksumVerify = false;
private String backupSetName;
private String yarnPoolName;

Expand Down Expand Up @@ -132,6 +138,15 @@ public long getBandwidth() {
return this.bandwidth;
}

private BackupRequest setNoChecksumVerify(boolean noChecksumVerify) {
this.noChecksumVerify = noChecksumVerify;
return this;
}

public boolean getNoChecksumVerify() {
return noChecksumVerify;
}

public String getBackupSetName() {
return backupSetName;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public interface BackupRestoreConstants {

String OPTION_TABLE = "t";
String OPTION_TABLE_DESC =
"Table name. If specified, only backup images," + " which contain this table will be listed.";
"Table name. If specified, only backup images, which contain this table will be listed.";

String OPTION_LIST = "l";
String OPTION_TABLE_LIST_DESC = "Table name list, comma-separated.";
Expand All @@ -74,6 +74,12 @@ public interface BackupRestoreConstants {
String OPTION_WORKERS = "w";
String OPTION_WORKERS_DESC = "Number of parallel MapReduce tasks to execute";

String OPTION_IGNORECHECKSUM = "i";
String OPTION_IGNORECHECKSUM_DESC =
"Ignore checksum verify between source snapshot and exported snapshot."
+ " Especially when the source and target file system types are different,"
+ " we should use -i option to skip checksum-checks.";

String OPTION_RECORD_NUMBER = "n";
String OPTION_RECORD_NUMBER_DESC = "Number of records of backup history. Default: 10";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ public String backupTables(BackupRequest request) throws IOException {
request = builder.withBackupType(request.getBackupType()).withTableList(tableList)
.withTargetRootDir(request.getTargetRootDir()).withBackupSetName(request.getBackupSetName())
.withTotalTasks(request.getTotalTasks()).withBandwidthPerTasks((int) request.getBandwidth())
.build();
.withNoChecksumVerify(request.getNoChecksumVerify()).build();

TableBackupClient client;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_LIST;
Expand Down Expand Up @@ -329,6 +331,8 @@ public void execute() throws IOException {
? Integer.parseInt(cmdline.getOptionValue(OPTION_WORKERS))
: -1;

boolean ignoreChecksum = cmdline.hasOption(OPTION_IGNORECHECKSUM);

if (cmdline.hasOption(OPTION_YARN_QUEUE_NAME)) {
String queueName = cmdline.getOptionValue(OPTION_YARN_QUEUE_NAME);
// Set system property value for MR job
Expand All @@ -341,7 +345,8 @@ public void execute() throws IOException {
.withTableList(
tables != null ? Lists.newArrayList(BackupUtils.parseTableNames(tables)) : null)
.withTargetRootDir(targetBackupDir).withTotalTasks(workers)
.withBandwidthPerTasks(bandwidth).withBackupSetName(setName).build();
.withBandwidthPerTasks(bandwidth).withNoChecksumVerify(ignoreChecksum)
.withBackupSetName(setName).build();
String backupId = admin.backupTables(request);
System.out.println("Backup session " + backupId + " finished. Status: SUCCESS");
} catch (IOException e) {
Expand Down Expand Up @@ -394,6 +399,7 @@ protected void printUsage() {
options.addOption(OPTION_TABLE, true, OPTION_TABLE_LIST_DESC);
options.addOption(OPTION_YARN_QUEUE_NAME, true, OPTION_YARN_QUEUE_NAME_DESC);
options.addOption(OPTION_DEBUG, false, OPTION_DEBUG_DESC);
options.addOption(OPTION_IGNORECHECKSUM, false, OPTION_IGNORECHECKSUM_DESC);

HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.setLeftPadding(2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ public void close() {
* @throws BackupException exception
*/
public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
String targetRootDir, int workers, long bandwidth) throws BackupException {
String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify)
throws BackupException {
if (targetRootDir == null) {
throw new BackupException("Wrong backup request parameter: target backup root directory");
}
Expand Down Expand Up @@ -230,6 +231,7 @@ public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableN
targetRootDir);
backupInfo.setBandwidth(bandwidth);
backupInfo.setWorkers(workers);
backupInfo.setNoChecksumVerify(noChecksumVerify);
return backupInfo;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ protected void snapshotCopy(BackupInfo backupInfo) throws Exception {
argsList.add("-mappers");
argsList.add(String.valueOf(backupInfo.getWorkers()));
}
if (backupInfo.getNoChecksumVerify()) {
argsList.add("-no-checksum-verify");
}

String[] args = argsList.toArray(new String[0]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ public void init(final Connection conn, final String backupId, BackupRequest req
this.conf = conn.getConfiguration();
this.fs = CommonFSUtils.getCurrentFileSystem(conf);
backupInfo = backupManager.createBackupInfo(backupId, request.getBackupType(), tableList,
request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth());
request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth(),
request.getNoChecksumVerify());
if (tableList == null || tableList.isEmpty()) {
this.tableList = new ArrayList<>(backupInfo.getTables());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,14 @@ Table insertIntoTable(Connection conn, TableName table, byte[] family, int id, i

protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables,
String path) {
return createBackupRequest(type, tables, path, false);
}

protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables, String path,
boolean noChecksumVerify) {
BackupRequest.Builder builder = new BackupRequest.Builder();
BackupRequest request =
builder.withBackupType(type).withTableList(tables).withTargetRootDir(path).build();
BackupRequest request = builder.withBackupType(type).withTableList(tables)
.withTargetRootDir(path).withNoChecksumVerify(noChecksumVerify).build();
return request;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,15 @@ public void testIncBackupMergeRestoreSeparateFs() throws Exception {
BackupAdminImpl client = new BackupAdminImpl(conn);
List<TableName> tables = Lists.newArrayList(table1, table2);

BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR);
BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR, true);
String backupIdFull = client.backupTables(request);
assertTrue(checkSucceeded(backupIdFull));

request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true);
String backupIdIncMultiple = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple));

request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true);
String backupIdIncMultiple2 = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple2));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,10 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu
errMessage
.append(" You can choose file-level checksum validation via "
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ " or filesystems are different.")
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
+ " or filesystems are different.\n")
.append(" Or you can skip checksum-checks altogether with -no-checksum-verify,")
.append(
" for the table backup scenario, you should use -i option to skip checksum-checks.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of "
+ "masking data-corruption during file-transfer.)\n");
}
Expand Down