From d28e3fd3ec72874683af0114ca4bdef89721788a Mon Sep 17 00:00:00 2001 From: paxadax Date: Tue, 1 Oct 2024 17:06:03 +0100 Subject: [PATCH 1/7] feat: Use file SHA instead of last modification time --- .../src/jvm/org/apache/storm/blobstore/BlobStore.java | 2 +- .../jvm/org/apache/storm/blobstore/BlobStoreFile.java | 4 ++++ .../org/apache/storm/blobstore/LocalFsBlobStore.java | 2 +- .../apache/storm/blobstore/LocalFsBlobStoreFile.java | 10 ++++++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/storm-client/src/jvm/org/apache/storm/blobstore/BlobStore.java b/storm-client/src/jvm/org/apache/storm/blobstore/BlobStore.java index 8323884ae57..367141ae509 100644 --- a/storm-client/src/jvm/org/apache/storm/blobstore/BlobStore.java +++ b/storm-client/src/jvm/org/apache/storm/blobstore/BlobStore.java @@ -444,7 +444,7 @@ public BlobStoreFileInputStream(BlobStoreFile part) throws IOException { @Override public long getVersion() throws IOException { - return part.getModTime(); + return part.getVersion(); } @Override diff --git a/storm-client/src/jvm/org/apache/storm/blobstore/BlobStoreFile.java b/storm-client/src/jvm/org/apache/storm/blobstore/BlobStoreFile.java index 2f47978b336..bd901a0c234 100644 --- a/storm-client/src/jvm/org/apache/storm/blobstore/BlobStoreFile.java +++ b/storm-client/src/jvm/org/apache/storm/blobstore/BlobStoreFile.java @@ -42,6 +42,10 @@ public abstract class BlobStoreFile { public abstract long getModTime() throws IOException; + public long getVersion() throws IOException { + return getModTime(); + } + public abstract InputStream getInputStream() throws IOException; public abstract OutputStream getOutputStream() throws IOException; diff --git a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStore.java b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStore.java index a8f519d6453..f708946fbe2 100644 --- a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStore.java +++ b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStore.java @@ -291,7 +291,7 @@ public ReadableBlobMeta getBlobMeta(String key, Subject who) throws Authorizatio rbm.set_settable(meta); try { LocalFsBlobStoreFile pf = fbs.read(DATA_PREFIX + key); - rbm.set_version(pf.getModTime()); + rbm.set_version(pf.getVersion()); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java index 2262e908176..aa271287623 100644 --- a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java +++ b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java @@ -20,8 +20,10 @@ import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.StandardCopyOption; +import java.util.Arrays; import java.util.regex.Matcher; import org.apache.storm.generated.SettableBlobMeta; +import org.apache.storm.shade.org.apache.commons.codec.digest.DigestUtils; public class LocalFsBlobStoreFile extends BlobStoreFile { @@ -72,6 +74,14 @@ public String getKey() { return key; } + @Override + public long getVersion() throws IOException { + try (FileInputStream fis = new FileInputStream(path)) { + byte[] bytes = DigestUtils.sha1(fis); + return Arrays.hashCode(bytes); + } + } + @Override public long getModTime() throws IOException { return path.lastModified(); From 788a88e81de0cedf20b6cff47ab0ba56d522c8aa Mon Sep 17 00:00:00 2001 From: paxadax Date: Tue, 1 Oct 2024 17:07:48 +0100 Subject: [PATCH 2/7] tests: Add unit tests for SHA version --- .../blobstore/LocalFsBlobStoreFileTest.java | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java diff --git a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java new file mode 100644 index 00000000000..e63e635bcfe --- /dev/null +++ b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java @@ -0,0 +1,60 @@ +package org.apache.storm.blobstore; + +import org.apache.storm.shade.org.apache.commons.codec.digest.DigestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class LocalFsBlobStoreFileTest { + + private File tempFile; + private LocalFsBlobStoreFile blobStoreFile; + + @BeforeEach + public void setUp() throws IOException { + tempFile = Files.createTempFile(null, ".tmp").toFile(); + try (FileOutputStream fs = new FileOutputStream(tempFile)) { + fs.write("Content for SHA hash".getBytes()); + } + blobStoreFile = new LocalFsBlobStoreFile(tempFile.getParentFile(), tempFile.getName()); + } + + @Test + void testGetVersion() throws IOException { + long expectedVersion = Arrays.hashCode(DigestUtils.sha1("Content for SHA hash")); + long actualVersion = blobStoreFile.getVersion(); + assertEquals(expectedVersion, actualVersion, "The version should match the expected hash code."); + } + + @Test + void testGetVersion_Mismatch() throws IOException { + long expectedVersion = Arrays.hashCode(DigestUtils.sha1("Different content")); + long actualVersion = blobStoreFile.getVersion(); + assertNotEquals(expectedVersion, actualVersion, "The version shouldn't match the hash code of different content."); + } + + @Test + void testGetVersion_FileNotFound() { + boolean deleted = tempFile.delete(); + if (!deleted) { + throw new IllegalStateException("Failed to delete the temporary file."); + } + assertThrows(IOException.class, () -> blobStoreFile.getVersion(), "Should throw IOException if file is not found."); + } + + @Test + void testGetModTime() throws IOException { + long expectedModTime = tempFile.lastModified(); + long actualModTime = blobStoreFile.getModTime(); + assertEquals(expectedModTime, actualModTime, "The modification time should match the expected value."); + } +} From a79fd522d2a59a89e99ffdabba8a8e1451966296 Mon Sep 17 00:00:00 2001 From: paxadax Date: Tue, 1 Oct 2024 18:16:05 +0100 Subject: [PATCH 3/7] fix: add missing comment for rat-plugin --- .../storm/blobstore/LocalFsBlobStoreFileTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java index e63e635bcfe..20c71386e97 100644 --- a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java +++ b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java @@ -1,3 +1,15 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version + * 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + package org.apache.storm.blobstore; import org.apache.storm.shade.org.apache.commons.codec.digest.DigestUtils; From 331e6e149908deb1c7f7757c94291adcccf2b593 Mon Sep 17 00:00:00 2001 From: paxadax Date: Wed, 2 Oct 2024 14:19:33 +0100 Subject: [PATCH 4/7] fix: Use sha256 instead of sha1 --- .../org/apache/storm/blobstore/LocalFsBlobStoreFile.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java index aa271287623..b1764a12bd4 100644 --- a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java +++ b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java @@ -22,8 +22,10 @@ import java.nio.file.StandardCopyOption; import java.util.Arrays; import java.util.regex.Matcher; + +import org.apache.commons.codec.digest.DigestUtils; import org.apache.storm.generated.SettableBlobMeta; -import org.apache.storm.shade.org.apache.commons.codec.digest.DigestUtils; + public class LocalFsBlobStoreFile extends BlobStoreFile { @@ -77,7 +79,7 @@ public String getKey() { @Override public long getVersion() throws IOException { try (FileInputStream fis = new FileInputStream(path)) { - byte[] bytes = DigestUtils.sha1(fis); + byte[] bytes = DigestUtils.sha256(fis); return Arrays.hashCode(bytes); } } From 83bb99aff009d99cd5d8cb92389b50b51fde937e Mon Sep 17 00:00:00 2001 From: paxadax Date: Wed, 2 Oct 2024 15:05:22 +0100 Subject: [PATCH 5/7] fix: fix tests --- .../apache/storm/blobstore/LocalFsBlobStoreFileTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java index 20c71386e97..0690128958a 100644 --- a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java +++ b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java @@ -12,7 +12,7 @@ package org.apache.storm.blobstore; -import org.apache.storm.shade.org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.codec.digest.DigestUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -42,14 +42,14 @@ public void setUp() throws IOException { @Test void testGetVersion() throws IOException { - long expectedVersion = Arrays.hashCode(DigestUtils.sha1("Content for SHA hash")); + long expectedVersion = Arrays.hashCode(DigestUtils.sha256("Content for SHA hash")); long actualVersion = blobStoreFile.getVersion(); assertEquals(expectedVersion, actualVersion, "The version should match the expected hash code."); } @Test void testGetVersion_Mismatch() throws IOException { - long expectedVersion = Arrays.hashCode(DigestUtils.sha1("Different content")); + long expectedVersion = Arrays.hashCode(DigestUtils.sha256("Different content")); long actualVersion = blobStoreFile.getVersion(); assertNotEquals(expectedVersion, actualVersion, "The version shouldn't match the hash code of different content."); } From c135adc0f3adcb430fff5e42b7a5a2771f0c0969 Mon Sep 17 00:00:00 2001 From: paxadax Date: Thu, 3 Oct 2024 12:03:06 +0100 Subject: [PATCH 6/7] feat: Use Checksum instead of hash for faster computation --- .../storm/blobstore/LocalFsBlobStoreFile.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java index b1764a12bd4..f1236584155 100644 --- a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java +++ b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java @@ -2,9 +2,9 @@ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - * + *

* http://www.apache.org/licenses/LICENSE-2.0 - * + *

* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. @@ -12,6 +12,9 @@ package org.apache.storm.blobstore; +import org.apache.commons.io.FileUtils; +import org.apache.storm.generated.SettableBlobMeta; + import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -20,11 +23,9 @@ import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.StandardCopyOption; -import java.util.Arrays; import java.util.regex.Matcher; - -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.storm.generated.SettableBlobMeta; +import java.util.zip.CRC32C; +import java.util.zip.Checksum; public class LocalFsBlobStoreFile extends BlobStoreFile { @@ -33,6 +34,7 @@ public class LocalFsBlobStoreFile extends BlobStoreFile { private final boolean isTmp; private final File path; private final boolean mustBeNew; + private final Checksum checksumAlgorithm; private SettableBlobMeta meta; public LocalFsBlobStoreFile(File base, String name) { @@ -48,12 +50,14 @@ public LocalFsBlobStoreFile(File base, String name) { key = base.getName(); path = new File(base, name); mustBeNew = false; + checksumAlgorithm = new CRC32C(); } public LocalFsBlobStoreFile(File base, boolean isTmp, boolean mustBeNew) { key = base.getName(); this.isTmp = isTmp; this.mustBeNew = mustBeNew; + checksumAlgorithm = new CRC32C(); if (this.isTmp) { path = new File(base, System.currentTimeMillis() + TMP_EXT); } else { @@ -78,10 +82,7 @@ public String getKey() { @Override public long getVersion() throws IOException { - try (FileInputStream fis = new FileInputStream(path)) { - byte[] bytes = DigestUtils.sha256(fis); - return Arrays.hashCode(bytes); - } + return FileUtils.checksum(path, checksumAlgorithm).getValue(); } @Override From 381367bfceb5476f90e33911250e813bda7b9c15 Mon Sep 17 00:00:00 2001 From: paxadax Date: Thu, 3 Oct 2024 12:14:37 +0100 Subject: [PATCH 7/7] tests: Add tests for checksum --- .../storm/blobstore/LocalFsBlobStoreFile.java | 6 ++-- .../blobstore/LocalFsBlobStoreFileTest.java | 29 ++++++++----------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java index f1236584155..128377f9811 100644 --- a/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java +++ b/storm-server/src/main/java/org/apache/storm/blobstore/LocalFsBlobStoreFile.java @@ -12,9 +12,6 @@ package org.apache.storm.blobstore; -import org.apache.commons.io.FileUtils; -import org.apache.storm.generated.SettableBlobMeta; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -27,6 +24,9 @@ import java.util.zip.CRC32C; import java.util.zip.Checksum; +import org.apache.commons.io.FileUtils; +import org.apache.storm.generated.SettableBlobMeta; + public class LocalFsBlobStoreFile extends BlobStoreFile { diff --git a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java index 0690128958a..2faae2a5185 100644 --- a/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java +++ b/storm-server/src/test/java/org/apache/storm/blobstore/LocalFsBlobStoreFileTest.java @@ -12,7 +12,7 @@ package org.apache.storm.blobstore; -import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -20,47 +20,42 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; -import java.util.Arrays; +import java.util.zip.CRC32C; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; class LocalFsBlobStoreFileTest { private File tempFile; private LocalFsBlobStoreFile blobStoreFile; + private CRC32C checksumAlgorithm; @BeforeEach public void setUp() throws IOException { tempFile = Files.createTempFile(null, ".tmp").toFile(); try (FileOutputStream fs = new FileOutputStream(tempFile)) { - fs.write("Content for SHA hash".getBytes()); + fs.write("Content for checksum".getBytes()); } blobStoreFile = new LocalFsBlobStoreFile(tempFile.getParentFile(), tempFile.getName()); + checksumAlgorithm= new CRC32C(); } @Test void testGetVersion() throws IOException { - long expectedVersion = Arrays.hashCode(DigestUtils.sha256("Content for SHA hash")); + long expectedVersion = FileUtils.checksum(tempFile, checksumAlgorithm).getValue(); long actualVersion = blobStoreFile.getVersion(); - assertEquals(expectedVersion, actualVersion, "The version should match the expected hash code."); + assertEquals(expectedVersion, actualVersion, "The version should match the expected checksum value."); } @Test void testGetVersion_Mismatch() throws IOException { - long expectedVersion = Arrays.hashCode(DigestUtils.sha256("Different content")); - long actualVersion = blobStoreFile.getVersion(); - assertNotEquals(expectedVersion, actualVersion, "The version shouldn't match the hash code of different content."); - } - - @Test - void testGetVersion_FileNotFound() { - boolean deleted = tempFile.delete(); - if (!deleted) { - throw new IllegalStateException("Failed to delete the temporary file."); + long expectedVersion = FileUtils.checksum(tempFile, checksumAlgorithm).getValue(); + try (FileOutputStream fs = new FileOutputStream(tempFile)) { + fs.write("Different content".getBytes()); } - assertThrows(IOException.class, () -> blobStoreFile.getVersion(), "Should throw IOException if file is not found."); + long actualVersion = blobStoreFile.getVersion(); + assertNotEquals(expectedVersion, actualVersion, "The version shouldn't match the checksum value of different content."); } @Test