From 97a3723786ee6b7ef860205198050ffaa879e237 Mon Sep 17 00:00:00 2001 From: Liangjun He Date: Thu, 6 Jun 2024 11:08:11 +0800 Subject: [PATCH] HBASE-28625 ExportSnapshot should verify checksums for the source file and the target file (#5950) --- .../hadoop/hbase/snapshot/ExportSnapshot.java | 79 +++++++++++++------ 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 7fb9c14aa50c..cf08970a2f5d 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -306,24 +306,38 @@ private void copyFile(final Context context, final SnapshotFileInfo inputInfo, in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L); } + Path inputPath = inputStat.getPath(); try { context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen()); // Ensure that the output folder is there and copy the file createOutputPath(outputPath.getParent()); FSDataOutputStream out = outputFs.create(outputPath, true); - try { - copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen()); - } finally { - out.close(); - } + + long stime = EnvironmentEdgeManager.currentTime(); + long totalBytesWritten = + copyData(context, inputPath, in, outputPath, out, inputStat.getLen()); + + // Verify the file length and checksum + verifyCopyResult(inputStat, outputFs.getFileStatus(outputPath)); + + long etime = EnvironmentEdgeManager.currentTime(); + LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); + LOG + .info("size=" + totalBytesWritten + " (" + StringUtils.humanReadableInt(totalBytesWritten) + + ")" + " time=" + StringUtils.formatTimeDiff(etime, stime) + String + .format(" %.3fM/sec", (totalBytesWritten / ((etime - stime) / 1000.0)) / 1048576.0)); + context.getCounter(Counter.FILES_COPIED).increment(1); // Try to Preserve attributes if (!preserveAttributes(outputPath, inputStat)) { LOG.warn("You may have to run manually chown on: " + outputPath); } + } catch (IOException e) { + LOG.error("Error copying " + inputPath + " to " + outputPath, e); + context.getCounter(Counter.COPY_FAILED).increment(1); + throw e; } finally { - in.close(); injectTestFailure(context, inputInfo); } } @@ -400,7 +414,7 @@ private boolean stringIsNotEmpty(final String str) { return str != null && str.length() > 0; } - private void copyData(final Context context, final Path inputPath, final InputStream in, + private long copyData(final Context context, final Path inputPath, final InputStream in, final Path outputPath, final FSDataOutputStream out, final long inputFileSize) throws IOException { final String statusMessage = @@ -412,7 +426,6 @@ private void copyData(final Context context, final Path inputPath, final InputSt int reportBytes = 0; int bytesRead; - long stime = EnvironmentEdgeManager.currentTime(); while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); totalBytesWritten += bytesRead; @@ -427,7 +440,6 @@ private void copyData(final Context context, final Path inputPath, final InputSt reportBytes = 0; } } - long etime = EnvironmentEdgeManager.currentTime(); context.getCounter(Counter.BYTES_COPIED).increment(reportBytes); context @@ -435,23 +447,10 @@ private void copyData(final Context context, final Path inputPath, final InputSt (totalBytesWritten / (float) inputFileSize) * 100.0f) + " from " + inputPath + " to " + outputPath); - // Verify that the written size match - if (totalBytesWritten != inputFileSize) { - String msg = "number of bytes copied not matching copied=" + totalBytesWritten - + " expected=" + inputFileSize + " for file=" + inputPath; - throw new IOException(msg); - } - - LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); - LOG - .info("size=" + totalBytesWritten + " (" + StringUtils.humanReadableInt(totalBytesWritten) - + ")" + " time=" + StringUtils.formatTimeDiff(etime, stime) + String - .format(" %.3fM/sec", (totalBytesWritten / ((etime - stime) / 1000.0)) / 1048576.0)); - context.getCounter(Counter.FILES_COPIED).increment(1); - } catch (IOException e) { - LOG.error("Error copying " + inputPath + " to " + outputPath, e); - context.getCounter(Counter.COPY_FAILED).increment(1); - throw e; + return totalBytesWritten; + } finally { + out.close(); + in.close(); } } @@ -531,6 +530,34 @@ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) { } } + private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat) + throws IOException { + long inputLen = inputStat.getLen(); + long outputLen = outputStat.getLen(); + Path inputPath = inputStat.getPath(); + Path outputPath = outputStat.getPath(); + + if (inputLen != outputLen) { + throw new IOException("Mismatch in length of input:" + inputPath + " (" + inputLen + + ") and output:" + outputPath + " (" + outputLen + ")"); + } + + // If length==0, we will skip checksum + if (inputLen != 0 && verifyChecksum) { + FileChecksum inChecksum = getFileChecksum(inputFs, inputPath); + if (inChecksum == null) { + LOG.warn("Input file " + inputPath + " checksums are not available"); + } + FileChecksum outChecksum = getFileChecksum(outputFs, outputPath); + if (outChecksum == null) { + LOG.warn("Output file " + outputPath + " checksums are not available"); + } + if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) { + throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath); + } + } + } + /** * Check if the two files are equal by looking at the file length, and at the checksum (if user * has specified the verifyChecksum flag).