Skip to content

Commit

Permalink
HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996
Browse files Browse the repository at this point in the history
)

Signed-off-by: Duo Zhang <[email protected]>
Signed-off-by: Wei-Chiu Chuang <[email protected]>
  • Loading branch information
2005hithlj authored Jun 21, 2024
1 parent f8aa3e2 commit dd694e4
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,15 @@ public enum Counter {
BYTES_COPIED
}

/**
* Indicates the checksum comparison result.
*/
public enum ChecksumComparison {
TRUE, // checksum comparison is compatible and true.
FALSE, // checksum comparison is compatible and false.
INCOMPATIBLE, // checksum comparison is not compatible.
}

private static class ExportMapper
extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
private static final Logger LOG = LoggerFactory.getLogger(ExportMapper.class);
Expand Down Expand Up @@ -533,6 +542,9 @@ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
}
}

/**
* Utility to compare the file length and checksums for the paths specified.
*/
private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat)
throws IOException {
long inputLen = inputStat.getLen();
Expand All @@ -547,20 +559,64 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu

// If length==0, we will skip checksum
if (inputLen != 0 && verifyChecksum) {
FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
if (inChecksum == null) {
LOG.warn("Input file " + inputPath + " checksums are not available");
}
FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
if (outChecksum == null) {
LOG.warn("Output file " + outputPath + " checksums are not available");
}
if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) {
throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath);
FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());

ChecksumComparison checksumComparison = verifyChecksum(inChecksum, outChecksum);
if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
StringBuilder errMessage = new StringBuilder("Checksum mismatch between ")
.append(inputPath).append(" and ").append(outputPath).append(".");

boolean addSkipHint = false;
String inputScheme = inputFs.getScheme();
String outputScheme = outputFs.getScheme();
if (!inputScheme.equals(outputScheme)) {
errMessage.append(" Input and output filesystems are of different types.\n")
.append("Their checksum algorithms may be incompatible.");
addSkipHint = true;
} else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
errMessage.append(" Input and output differ in block-size.");
addSkipHint = true;
} else if (
inChecksum != null && outChecksum != null
&& !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
) {
errMessage.append(" Input and output checksum algorithms are of different types.");
addSkipHint = true;
}
if (addSkipHint) {
errMessage
.append(" You can choose file-level checksum validation via "
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ " or filesystems are different.")
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of "
+ "masking data-corruption during file-transfer.)\n");
}
throw new IOException(errMessage.toString());
}
}
}

/**
* Utility to compare checksums
*/
private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
final FileChecksum outChecksum) {
// If the input or output checksum is null, or the algorithms of input and output are not
// equal, that means there is no comparison
// and return not compatible. else if matched, return compatible with the matched result.
if (
inChecksum == null || outChecksum == null
|| !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
) {
return ChecksumComparison.INCOMPATIBLE;
} else if (inChecksum.equals(outChecksum)) {
return ChecksumComparison.TRUE;
}
return ChecksumComparison.FALSE;
}

/**
* Check if the two files are equal by looking at the file length, and at the checksum (if user
* has specified the verifyChecksum flag).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,23 @@ public void testConsecutiveExports() throws Exception {
removeExportDir(copyDir);
}

@Test
public void testExportWithChecksum() throws Exception {
// Test different schemes: input scheme is hdfs:// and output scheme is file://
// The checksum verification will fail
Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false, false,
getBypassRegionPredicate(), false, true);

// Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
// The checksum verification will success
Path copyHdfsDir = getHdfsDestinationDir();
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false, false,
getBypassRegionPredicate(), true, true);
}

@Test
public void testExportWithTargetName() throws Exception {
final String targetName = "testExportWithTargetName";
Expand Down Expand Up @@ -281,7 +298,7 @@ protected void testExportFileSystemState(final TableName tableName, final String
throws Exception {
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite, resetTtl,
getBypassRegionPredicate(), true);
getBypassRegionPredicate(), true, false);
}

/**
Expand All @@ -290,8 +307,8 @@ protected void testExportFileSystemState(final TableName tableName, final String
protected static void testExportFileSystemState(final Configuration conf,
final TableName tableName, final String snapshotName, final String targetName,
final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean overwrite,
final boolean resetTtl, final RegionPredicate bypassregionPredicate, boolean success)
throws Exception {
final boolean resetTtl, final RegionPredicate bypassregionPredicate, final boolean success,
final boolean checksumVerify) throws Exception {
FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
FileSystem srcFs = srcDir.getFileSystem(conf);
Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(), tgtFs.getWorkingDirectory());
Expand All @@ -312,6 +329,9 @@ protected static void testExportFileSystemState(final Configuration conf,
if (resetTtl) {
opts.add("--reset-ttl");
}
if (!checksumVerify) {
opts.add("--no-checksum-verify");
}

// Export Snapshot
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public void testExportRetry() throws Exception {
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, true);
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, true, false);
}

/**
Expand All @@ -167,6 +167,6 @@ public void testExportFailure() throws Exception {
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, false);
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, false, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs,
TableName tableName = builder.getTableDescriptor().getTableName();
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName,
snapshotName, snapshotName, snapshotFilesCount, testDir,
getDestinationDir(fs, testUtil, testDir), false, false, null, true);
getDestinationDir(fs, testUtil, testDir), false, false, null, true, false);
}

static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtil hctu, Path testDir)
Expand Down

0 comments on commit dd694e4

Please sign in to comment.