From 42f81183243eabe0cc60b41e22bd08c1d71120a1 Mon Sep 17 00:00:00 2001
From: Louis Bergelson <louisb@broadinstitute.org>
Date: Mon, 14 Oct 2024 16:56:41 -0400
Subject: [PATCH] Adding dependency constraints to remove vulnerable components
 as reported in https://github.com/broadinstitute/gatk/pull/8950

---
 build.gradle                                  | 51 +++++++++++++------
 .../engine/spark/RangePartitionCoalescer.java |  9 ++--
 .../PSBuildReferenceTaxonomyUtils.java        |  4 +-
 ...cturalVariationDiscoveryPipelineSpark.java |  4 +-
 .../spark/sv/utils/ComplexityPartitioner.java |  2 +
 .../hellbender/utils/io/IOUtils.java          |  4 +-
 .../utils/logging/OneShotLogger.java          |  3 +-
 .../RangePartitionCoalescerUnitTest.java      | 14 ++---
 .../FuncotatorReferenceTestUtils.java         |  4 +-
 9 files changed, 61 insertions(+), 34 deletions(-)

diff --git a/build.gradle b/build.gradle
index f65326d878d..d93f8e4caef 100644
--- a/build.gradle
+++ b/build.gradle
@@ -63,7 +63,7 @@ final htsjdkVersion = System.getProperty('htsjdk.version','4.1.3')
 final picardVersion = System.getProperty('picard.version','3.3.0')
 final barclayVersion = System.getProperty('barclay.version','5.0.0')
 final sparkVersion = System.getProperty('spark.version', '3.5.0')
-final hadoopVersion = System.getProperty('hadoop.version', '3.3.6')
+final hadoopVersion = System.getProperty('hadoop.version', '3.4.0')
 final disqVersion = System.getProperty('disq.version','0.3.8')
 final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.4')
 final bigQueryVersion = System.getProperty('bigQuery.version', '2.35.0')
@@ -175,7 +175,7 @@ configurations.configureEach {
         // force testng dependency so we don't pick up a different version via GenomicsDB
         force 'org.testng:testng:' + testNGVersion
         force 'org.broadinstitute:barclay:' + barclayVersion
-        force 'com.twitter:chill_2.12:0.10.0'
+        force 'com.twitter:chill_2.13:0.10.0'
         force 'org.apache.commons:commons-math3:3.5'
 
         // make sure we don't pick up an incorrect version of the GATK variant of the google-nio library
@@ -187,6 +187,7 @@ configurations.configureEach {
     configurations*.exclude group: 'org.slf4j', module: 'slf4j-jdk14' //exclude this to prevent slf4j complaining about to many slf4j bindings
     configurations*.exclude group: 'com.google.guava', module: 'guava-jdk5'
     configurations*.exclude group: 'junit', module: 'junit'
+    configurations*.exclude group: 'org.bouncycastle', module: 'bcprov-jdk15on'
 }
 
 tasks.withType(JavaCompile).configureEach {
@@ -221,13 +222,13 @@ configurations {
         // exclude Hadoop and Spark dependencies, since they are provided when running with Spark
         // (ref: http://unethicalblogger.com/2015/07/15/gradle-goodness-excluding-depends-from-shadow.html)
         exclude group: 'org.apache.hadoop'
-        exclude module: 'spark-core_2.12'
+        exclude module: 'spark-core_2.13'
         exclude group: 'org.slf4j'
         exclude module: 'jul-to-slf4j'
         exclude module: 'javax.servlet'
         exclude module: 'servlet-api'
         exclude group: 'com.esotericsoftware.kryo'
-        exclude module: 'spark-mllib_2.12.15'
+        exclude module: 'spark-mllib_2.13.15'
         exclude group: 'org.scala-lang'
         exclude module: 'kryo'
     }
@@ -276,14 +277,6 @@ dependencies {
     implementation 'org.apache.commons:commons-collections4:4.4'
     implementation 'org.apache.commons:commons-vfs2:2.9.0'
     implementation 'org.apache.commons:commons-configuration2:2.10.1'
-    constraints {
-        implementation('org.apache.commons:commons-text') {
-            version {
-                strictly '1.10.0'
-            }
-            because 'previous versions have a nasty vulnerability: https://nvd.nist.gov/vuln/detail/CVE-2022-42889'
-        }
-    }
 
     implementation 'org.apache.httpcomponents:httpclient:4.5.13'
     implementation 'commons-beanutils:commons-beanutils:1.9.4'
@@ -300,8 +293,7 @@ dependencies {
         exclude group: 'org.apache.commons'
     }
 
-    // TODO: migrate to mllib_2.12.15?
-    implementation ('org.apache.spark:spark-mllib_2.12:' + sparkVersion) {
+    implementation ('org.apache.spark:spark-mllib_2.13:' + sparkVersion) {
         // JUL is used by Google Dataflow as the backend logger, so exclude jul-to-slf4j to avoid a loop
         exclude module: 'jul-to-slf4j'
         exclude module: 'javax.servlet'
@@ -345,7 +337,36 @@ dependencies {
 
     // natural sort
     implementation('net.grey-panther:natural-comparator:1.1')
-    implementation('com.fasterxml.jackson.module:jackson-module-scala_2.12:2.9.8')
+    implementation('com.fasterxml.jackson.module:jackson-module-scala_2.13:2.9.8')
+
+    // Transitive dependencies with vulnerabilities are upgraded to newer versions in
+    // this block
+    constraints {
+        implementation('dnsjava:dnsjava:3.6.0') {
+            because '3.4.0 is vulnerable'
+        }
+        implementation('org.apache.commons:commons-compress:1.26.0'){
+            because '1.24.0 is vulnerable'
+        }
+        implementation('org.apache.ivy:ivy:2.5.2'){
+            because '2.5.1 is vulnerable'
+        }
+        implementation 'org.apache.commons:commons-text:1.10.0'
+        implementation 'ch.qos.logback:logback-classic:1.4.14'
+        implementation 'ch.qos.logback:logback-core:1.4.14'
+        implementation 'org.apache.avro:avro:1.12.0'
+        implementation 'io.airlift:aircompressor:0.27'
+        implementation 'org.scala-lang:scala-library:2.13.14'
+        implementation 'com.nimbusds:nimbus-jose-jwt:9.41.2'
+        implementation 'org.codehaus.janino:janino:3.1.12'
+        implementation 'org.apache.zookeeper:zookeeper:3.9.2'
+    }
+
+    //this is a replacement for the transitive dependency bcprov-jdk15on:1.70.0 whic
+    //is excluded for security purposes
+    implementation 'org.bouncycastle:bcprov-jdk18on:1.78.1'
+    //use netty bom to enforce same netty version
+    implementation platform('io.netty:netty-bom:4.1.114.Final')
 
     testUtilsImplementation sourceSets.main.output
     testUtilsImplementation 'org.testng:testng:' + testNGVersion
diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java
index fc1105c7d14..a3691154367 100644
--- a/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java
+++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java
@@ -4,9 +4,9 @@
 import org.apache.spark.rdd.PartitionCoalescer;
 import org.apache.spark.rdd.PartitionGroup;
 import org.apache.spark.rdd.RDD;
-import scala.collection.JavaConversions;
 import scala.collection.Seq;
-
+import scala.jdk.javaapi.CollectionConverters;
+import java.io.Serial;
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.List;
@@ -14,8 +14,9 @@
 /**
  * A {@link PartitionCoalescer} that allows a range of partitions to be coalesced into groups.
  */
-class RangePartitionCoalescer implements PartitionCoalescer, Serializable, scala.Serializable {
+class RangePartitionCoalescer implements PartitionCoalescer, Serializable {
 
+    @Serial
     private static final long serialVersionUID = 1L;
 
     private List<Integer> maxEndPartitionIndexes;
@@ -45,7 +46,7 @@ public PartitionGroup[] coalesce(int maxPartitions, RDD<?> parent) {
             PartitionGroup group = new PartitionGroup(preferredLocation);
             List<Partition> partitionsInGroup =
                     partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
-            group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup));
+            group.partitions().addAll(CollectionConverters.asScala(partitionsInGroup).toList());
             groups[i] = group;
         }
         return groups;
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java
index eb4a7687080..43e57b6fd78 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java
@@ -313,13 +313,13 @@ public static BufferedReader getBufferedReaderTarGz(final String tarPath, final
         try {
             InputStream result = null;
             final TarArchiveInputStream tarStream = new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(tarPath)));
-            TarArchiveEntry entry = tarStream.getNextTarEntry();
+            TarArchiveEntry entry = tarStream.getNextEntry();
             while (entry != null) {
                 if (entry.getName().equals(fileName)) {
                     result = tarStream;
                     break;
                 }
-                entry = tarStream.getNextTarEntry();
+                entry = tarStream.getNextEntry();
             }
             if (result == null) {
                 throw new UserException.BadInput("Could not find file " + fileName + " in tarball " + tarPath);
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
index 1ac964daeac..716e256d620 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
@@ -41,9 +41,10 @@
 import org.broadinstitute.hellbender.utils.io.IOUtils;
 import org.broadinstitute.hellbender.utils.read.GATKRead;
 import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
-import scala.Serializable;
 
 import java.io.IOException;
+import java.io.Serial;
+import java.io.Serializable;
 import java.nio.file.Paths;
 import java.util.List;
 import java.util.Set;
@@ -364,6 +365,7 @@ private static List<VariantContext> processEvidenceTargetLinks(List<VariantConte
     // parser ==========================================================================================================
 
     public static final class InMemoryAlignmentParser extends AlignedContigGenerator implements Serializable {
+        @Serial
         private static final long serialVersionUID = 1L;
 
         private final JavaSparkContext ctx;
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/ComplexityPartitioner.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/ComplexityPartitioner.java
index 460f841de0f..b0401e7fe04 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/ComplexityPartitioner.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/ComplexityPartitioner.java
@@ -2,10 +2,12 @@
 
 import org.apache.spark.Partitioner;
 
+import java.io.Serial;
 import java.util.Arrays;
 
 /** A Spark Partitioner that puts tasks with greater complexities into earlier partitions. */
 public final class ComplexityPartitioner extends Partitioner {
+    @Serial
     private static final long serialVersionUID = 1L;
     private final int[] partitions;
 
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java
index 167b0a8611d..1a3e93ad19b 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java
@@ -468,7 +468,7 @@ private static void extractFilesFromArchiveStream(final TarArchiveInputStream ar
 
         // Go through the archive and get the entries:
         TarArchiveEntry entry;
-        while ((entry = archiveStream.getNextTarEntry()) != null) {
+        while ((entry = archiveStream.getNextEntry()) != null) {
 
             logger.info("Extracting file: " + entry.getName());
 
@@ -549,7 +549,7 @@ private static void addToTar(TarArchiveOutputStream out, File file, String dir)
         if (file.isFile()){
             out.putArchiveEntry(new TarArchiveEntry(file, entry));
             try (FileInputStream in = new FileInputStream(file)){
-                org.apache.commons.compress.utils.IOUtils.copy(in, out);
+                org.apache.commons.io.IOUtils.copy(in, out);
             }
             out.closeArchiveEntry();
         } else if (file.isDirectory()) {
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java b/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java
index d94c3747b3a..400057d23f7 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java
@@ -3,7 +3,8 @@
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.util.Supplier;
+
+import java.util.function.Supplier;
 
 /**
  * A logger wrapper class which only outputs the first warning provided to it
diff --git a/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java
index 118c72736e7..bb664b6cb59 100644
--- a/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java
@@ -8,7 +8,7 @@
 import org.broadinstitute.hellbender.GATKBaseTest;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
-import scala.collection.JavaConversions;
+import scala.jdk.javaapi.CollectionConverters;
 
 import java.util.List;
 
@@ -38,9 +38,9 @@ public void testIdentity() {
         RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes);
         PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd());
         assertEquals(groups.length, 3);
-        assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0])));
-        assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1])));
-        assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2])));
+        assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0])));
+        assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1])));
+        assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2])));
     }
 
     @Test
@@ -49,8 +49,8 @@ public void testNonIdentity() {
         RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes);
         PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd());
         assertEquals(groups.length, 3);
-        assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0], partitions[1])));
-        assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1], partitions[2])));
-        assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2])));
+        assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0], partitions[1])));
+        assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1], partitions[2])));
+        assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2])));
     }
 }
diff --git a/src/test/java/org/broadinstitute/hellbender/testutils/FuncotatorReferenceTestUtils.java b/src/test/java/org/broadinstitute/hellbender/testutils/FuncotatorReferenceTestUtils.java
index 06f6cab1fcf..9f661589bb3 100644
--- a/src/test/java/org/broadinstitute/hellbender/testutils/FuncotatorReferenceTestUtils.java
+++ b/src/test/java/org/broadinstitute/hellbender/testutils/FuncotatorReferenceTestUtils.java
@@ -5,7 +5,7 @@
 import org.apache.commons.compress.archivers.ArchiveInputStream;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.broadinstitute.hellbender.exceptions.GATKException;
@@ -65,7 +65,7 @@ private static String extractFastaTarGzToTemp(final File fastaTarGz, final Path
         Utils.nonNull(fastaTarGz);
         Utils.nonNull(destDir);
         String result = null;
-        try (final ArchiveInputStream i = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(fastaTarGz)))) {
+        try (final ArchiveInputStream<?> i = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(fastaTarGz)))) {
             ArchiveEntry entry = null;
             while ((entry = i.getNextEntry()) != null) {
                 if (!i.canReadEntryData(entry)) {