From 359ef4def5f4c4621e3d059f391ad56b87b94f82 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 11 Aug 2023 18:33:45 -0400 Subject: [PATCH 01/17] Initial commit --- .../filesystems/blob/BlobPathBuilder.scala | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 9e7b230286c..c0c5861c73d 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -7,6 +7,7 @@ import cromwell.filesystems.blob.BlobPathBuilder._ import java.net.{MalformedURLException, URI} import java.nio.file.{Files, LinkOption} +import scala.jdk.CollectionConverters._ import scala.language.postfixOps import scala.util.{Failure, Success, Try} @@ -78,6 +79,17 @@ object BlobPath { // format the library expects // 2) If the path looks like :, strip off the : to leave the absolute path inside the container. private val brokenPathRegex = "https:/([a-z0-9]+).blob.core.windows.net/([-a-zA-Z0-9]+)/(.*)".r + + // Blob files larger than 5 GB upload in parallel parts [0][1] and do not get a native `CONTENT-MD5` property. + // Instead, some uploaders such as TES [2] may optionally calculate the MD5 themselves and store it under this key in metadata. + // N.B. most if not virtually all large files in the wild will NOT have this key populated because they were not created by TES. [3] + // + // [0] https://learn.microsoft.com/en-us/azure/storage/blobs/scalability-targets + // [1] https://learn.microsoft.com/en-us/rest/api/storageservices/version-2019-12-12 + // [2] https://github.com/microsoft/ga4gh-tes/pull/236 + // [3] As of 2023-08 there are zero search engine results for `md5_hashlist_root_hash` and the only sure-thing client is TES + private val largeBlobFileMetadataKey = "md5_hashlist_root_hash" + def cleanedNioPathString(nioString: String): String = { val pathStr = nioString match { case brokenPathRegex(_, containerName, pathInContainer) => @@ -116,16 +128,24 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con def blobFileAttributes: Try[AzureBlobFileAttributes] = Try(Files.readAttributes(nioPath, classOf[AzureBlobFileAttributes])) + def blobFileMetadata: Option[Map[String, String]] = blobFileAttributes.map(_.metadata().asScala.toMap).toOption + def md5HexString: Try[Option[String]] = { - blobFileAttributes.map(h => - Option(h.blobHttpHeaders().getContentMd5) match { - case None => None - case Some(arr) if arr.isEmpty => None - // Convert the bytes to a hex-encoded string. Note that this value - // is rendered in base64 in the Azure web portal. - case Some(bytes) => Option(bytes.map("%02x".format(_)).mkString) + // Convert the bytes to a hex-encoded string. Note that the value + // is rendered in base64 in the Azure web portal. + def hexString(bytes: Array[Byte]): String = bytes.map("%02x".format(_)).mkString + def md5FromMetadata: Option[String] = blobFileMetadata.flatMap(_.get(BlobPath.largeBlobFileMetadataKey)) + + blobFileAttributes.map { attr: AzureBlobFileAttributes => + (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { + case (None, None) => None + case (None, Some(metadataMd5)) => Option(hexString(metadataMd5.getBytes)) + case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => None + // (Some, Some) could happen if an uploader redundantly populates an md5 for a small file. + // Doesn't seem like an erroneous condition so just choose the native one. + case (Some(headerMd5Bytes), _) => Option(hexString(headerMd5Bytes)) } - ) + } } /** From 0b9aca06dd4ec37f985668f2a6cf71b81901dfce Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 11 Aug 2023 18:55:23 -0400 Subject: [PATCH 02/17] Consistently use `Try` for IO --- .../scala/cromwell/filesystems/blob/BlobPathBuilder.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index c0c5861c73d..e185dc018e3 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -128,13 +128,17 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con def blobFileAttributes: Try[AzureBlobFileAttributes] = Try(Files.readAttributes(nioPath, classOf[AzureBlobFileAttributes])) - def blobFileMetadata: Option[Map[String, String]] = blobFileAttributes.map(_.metadata().asScala.toMap).toOption + def blobFileMetadata: Try[Option[Map[String, String]]] = blobFileAttributes.map { attrs => + // `metadata()` has a documented `null` case + Option(attrs.metadata()).map(_.asScala.toMap) + } def md5HexString: Try[Option[String]] = { + def md5FromMetadata: Option[String] = blobFileMetadata.map(_.get(BlobPath.largeBlobFileMetadataKey)).toOption + // Convert the bytes to a hex-encoded string. Note that the value // is rendered in base64 in the Azure web portal. def hexString(bytes: Array[Byte]): String = bytes.map("%02x".format(_)).mkString - def md5FromMetadata: Option[String] = blobFileMetadata.flatMap(_.get(BlobPath.largeBlobFileMetadataKey)) blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { From cad86dc0fb6831ce7e2ddcf511d6c732f8726a23 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 11 Aug 2023 18:57:46 -0400 Subject: [PATCH 03/17] Whitespace disabled from working on WDL parser... --- .../main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index e185dc018e3..e7a54ad6c8d 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -139,7 +139,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con // Convert the bytes to a hex-encoded string. Note that the value // is rendered in base64 in the Azure web portal. def hexString(bytes: Array[Byte]): String = bytes.map("%02x".format(_)).mkString - + blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { case (None, None) => None From ade9145ce177c90a5e265b492c0f730b3bf89a5a Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 11 Aug 2023 20:21:24 -0400 Subject: [PATCH 04/17] Fix sneaky `None.get` disguised as `Map.get()` --- .../scala/cromwell/filesystems/blob/BlobPathBuilder.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index e7a54ad6c8d..788a353a327 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -134,7 +134,11 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con } def md5HexString: Try[Option[String]] = { - def md5FromMetadata: Option[String] = blobFileMetadata.map(_.get(BlobPath.largeBlobFileMetadataKey)).toOption + def md5FromMetadata: Option[String] = (blobFileMetadata map { maybeMetadataMap: Option[Map[String, String]] => + maybeMetadataMap flatMap { metadataMap: Map[String, String] => + metadataMap.get(BlobPath.largeBlobFileMetadataKey) + } + }).toOption.flatten // Convert the bytes to a hex-encoded string. Note that the value // is rendered in base64 in the Azure web portal. From 39024c2d4d016b7ec4826df00cda4c68aeb58562 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Mon, 14 Aug 2023 16:47:11 -0400 Subject: [PATCH 05/17] Add test, fix thing test found --- .../filesystems/blob/BlobPathBuilder.scala | 2 +- .../blob/BlobPathBuilderSpec.scala | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 788a353a327..653ebd773e9 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -147,7 +147,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { case (None, None) => None - case (None, Some(metadataMd5)) => Option(hexString(metadataMd5.getBytes)) + case (None, Some(metadataMd5)) => Option(metadataMd5) case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => None // (Some, Some) could happen if an uploader redundantly populates an md5 for a small file. // Doesn't seem like an erroneous condition so just choose the native one. diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 4012e241eb3..1093473206e 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -100,6 +100,30 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { new BlobPathBuilder(store, endpoint)(fsm) } + it should "read md5 from small files <5g" in { + val builder = makeBlobPathBuilder(endpoint, store) + val evalPath = "/test/inputFile.txt" + val testString = endpoint.value + "/" + store + evalPath + val blobPath1: BlobPath = builder build testString getOrElse fail() + blobPath1.md5HexString.toOption.get should equal(Some("967f0f086992f1a8b48f0a533f80290b")) + } + + it should "read md5 from large files >5g" in { + val builder = makeBlobPathBuilder(endpoint, store) + val evalPath = "/test/Rocky-9.2-aarch64-dvd.iso" + val testString = endpoint.value + "/" + store + evalPath + val blobPath1: BlobPath = builder build testString getOrElse fail() + blobPath1.md5HexString.toOption.get should equal(Some("13cb09331d2d12c0f476f81c672a4319")) + } + + it should "choose the native md5 over the metadata md5 for files that have both" in { + val builder = makeBlobPathBuilder(endpoint, store) + val evalPath = "/test/redundant_md5_test.txt" + val testString = endpoint.value + "/" + store + evalPath + val blobPath1: BlobPath = builder build testString getOrElse fail() + blobPath1.md5HexString.toOption.get should equal(Some("9e5ceec07c8730b593a3a4b4ae324475")) + } + ignore should "resolve an absolute path string correctly to a path" in { val builder = makeBlobPathBuilder(endpoint, store) val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" From c4fbfb14b44739657aea1e009af0981d7f557c0c Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 15 Aug 2023 10:21:59 -0400 Subject: [PATCH 06/17] Try populating Azure creds for unit tests --- .github/workflows/cromwell_unit_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/cromwell_unit_tests.yml b/.github/workflows/cromwell_unit_tests.yml index 797f38efd96..88951871d8f 100644 --- a/.github/workflows/cromwell_unit_tests.yml +++ b/.github/workflows/cromwell_unit_tests.yml @@ -28,6 +28,10 @@ jobs: #Invoke SBT to run all unit tests for Cromwell. - name: Run tests + env: + AZURE_CLIENT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_ID }} + AZURE_CLIENT_SECRET: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_SECRET }} + AZURE_TENANT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_TENANT_ID }} run: | set -e sbt "test" From ac4bd029807cbc3865573729936a8f095797cf18 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 15 Aug 2023 11:36:45 -0400 Subject: [PATCH 07/17] See what happens with other tests --- .../filesystems/blob/BlobPathBuilderSpec.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 1093473206e..3791c6abe92 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -124,7 +124,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobPath1.md5HexString.toOption.get should equal(Some("9e5ceec07c8730b593a3a4b4ae324475")) } - ignore should "resolve an absolute path string correctly to a path" in { + it should "resolve an absolute path string correctly to a path" in { val builder = makeBlobPathBuilder(endpoint, store) val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() @@ -133,7 +133,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") } - ignore should "build a blob path from a test string and read a file" in { + it should "build a blob path from a test string and read a file" in { val builder = makeBlobPathBuilder(endpoint, store) val endpointHost = BlobPathBuilder.parseURI(endpoint.value).map(_.getHost).getOrElse(fail("Could not parse URI")) val evalPath = "/test/inputFile.txt" @@ -149,7 +149,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { fileText should include ("This is my test file!!!! Did it work?") } - ignore should "build duplicate blob paths in the same filesystem" in { + it should "build duplicate blob paths in the same filesystem" in { val builder = makeBlobPathBuilder(endpoint, store) val evalPath = "/test/inputFile.txt" val testString = endpoint.value + "/" + store + evalPath @@ -162,7 +162,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { fileText should include ("This is my test file!!!! Did it work?") } - ignore should "resolve a path without duplicating container name" in { + it should "resolve a path without duplicating container name" in { val builder = makeBlobPathBuilder(endpoint, store) val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() @@ -171,7 +171,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") } - ignore should "correctly remove a prefix from the blob path" in { + it should "correctly remove a prefix from the blob path" in { val builder = makeBlobPathBuilder(endpoint, store) val rootString = s"${endpoint.value}/${store.value}/cromwell-execution/" val execDirString = s"${endpoint.value}/${store.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/" @@ -184,7 +184,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobFile.pathStringWithoutPrefix(blobFile) should equal ("") } - ignore should "not change a path if it doesn't start with a prefix" in { + it should "not change a path if it doesn't start with a prefix" in { val builder = makeBlobPathBuilder(endpoint, store) val otherRootString = s"${endpoint.value}/${store.value}/foobar/" val fileString = s"${endpoint.value}/${store.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" From cf96a7cbfa3df9077e2297596e143e78aa727147 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 15 Aug 2023 15:39:00 -0400 Subject: [PATCH 08/17] You work locally, will you work in CI? --- .../filesystems/blob/BlobPathBuilder.scala | 30 +++--- .../blob/BlobPathBuilderSpec.scala | 96 ++++++++++--------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 653ebd773e9..2d4d0bff8c5 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -81,14 +81,17 @@ object BlobPath { private val brokenPathRegex = "https:/([a-z0-9]+).blob.core.windows.net/([-a-zA-Z0-9]+)/(.*)".r // Blob files larger than 5 GB upload in parallel parts [0][1] and do not get a native `CONTENT-MD5` property. - // Instead, some uploaders such as TES [2] may optionally calculate the MD5 themselves and store it under this key in metadata. - // N.B. most if not virtually all large files in the wild will NOT have this key populated because they were not created by TES. [3] + // Instead, some uploaders such as TES [2] calculate the md5 themselves and store it under this key in metadata. + // They do this for all files they touch, regardless of size, and the root/metadata property is authoritative over native. + // + // N.B. most if not virtually all large files in the wild will NOT have this key populated because they were not created + // by TES or its associated upload utility [4]. // // [0] https://learn.microsoft.com/en-us/azure/storage/blobs/scalability-targets // [1] https://learn.microsoft.com/en-us/rest/api/storageservices/version-2019-12-12 - // [2] https://github.com/microsoft/ga4gh-tes/pull/236 - // [3] As of 2023-08 there are zero search engine results for `md5_hashlist_root_hash` and the only sure-thing client is TES - private val largeBlobFileMetadataKey = "md5_hashlist_root_hash" + // [2] https://github.com/microsoft/ga4gh-tes/blob/03feb746bb961b72fa91266a56db845e3b31be27/src/Tes.Runner/Transfer/BlobBlockApiHttpUtils.cs#L25 + // [4] https://github.com/microsoft/ga4gh-tes/blob/main/src/Tes.RunnerCLI/scripts/roothash.sh + private val largeBlobFileMetadataKey = "md5_4mib_hashlist_root_hash" def cleanedNioPathString(nioString: String): String = { val pathStr = nioString match { @@ -146,12 +149,17 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { - case (None, None) => None - case (None, Some(metadataMd5)) => Option(metadataMd5) - case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => None - // (Some, Some) could happen if an uploader redundantly populates an md5 for a small file. - // Doesn't seem like an erroneous condition so just choose the native one. - case (Some(headerMd5Bytes), _) => Option(hexString(headerMd5Bytes)) + case (None, None) => + None + // (Some, Some) will happen for all <5 GB files uploaded by TES. Per Microsoft 2023-08-15 the + // root/metadata algorithm emits different values than the native algorithm and we should + // always choose metadata for consistency with larger files that only have that one. + case (_, Some(metadataMd5)) => + Option(metadataMd5) + case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => + None + case (Some(headerMd5Bytes), None) => + Option(hexString(headerMd5Bytes)) } } } diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 3791c6abe92..cfa1e10e457 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -89,70 +89,76 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { ) } - //// The below tests are IGNORED because they depend on Azure auth information being present in the environment //// + // The following tests are NOT ignored because they use the `centaurtesting` account injected into CI + // You may have to private val subscriptionId: SubscriptionId = SubscriptionId(UUID.fromString("62b22893-6bc1-46d9-8a90-806bb3cce3c9")) - private val endpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") - private val store: BlobContainerName = BlobContainerName("inputs") - - def makeBlobPathBuilder(blobEndpoint: EndpointURL, container: BlobContainerName): BlobPathBuilder = { - val blobTokenGenerator = NativeBlobSasTokenGenerator(container, blobEndpoint, Some(subscriptionId)) - val fsm = new BlobFileSystemManager(container, blobEndpoint, 10, blobTokenGenerator) - new BlobPathBuilder(store, endpoint)(fsm) - } + private val centaurEndpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("centaurtesting") + private val centaurContainer: BlobContainerName = BlobContainerName("test-blob") it should "read md5 from small files <5g" in { - val builder = makeBlobPathBuilder(endpoint, store) - val evalPath = "/test/inputFile.txt" - val testString = endpoint.value + "/" + store + evalPath - val blobPath1: BlobPath = builder build testString getOrElse fail() - blobPath1.md5HexString.toOption.get should equal(Some("967f0f086992f1a8b48f0a533f80290b")) + val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val evalPath = "/testRead.txt" + val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath + val blobPath1: BlobPath = (builder build testString).get + blobPath1.md5HexString.toOption.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) } it should "read md5 from large files >5g" in { - val builder = makeBlobPathBuilder(endpoint, store) - val evalPath = "/test/Rocky-9.2-aarch64-dvd.iso" - val testString = endpoint.value + "/" + store + evalPath + val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val evalPath = "/Rocky-9.2-aarch64-dvd.iso" + val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = builder build testString getOrElse fail() blobPath1.md5HexString.toOption.get should equal(Some("13cb09331d2d12c0f476f81c672a4319")) } - it should "choose the native md5 over the metadata md5 for files that have both" in { - val builder = makeBlobPathBuilder(endpoint, store) - val evalPath = "/test/redundant_md5_test.txt" - val testString = endpoint.value + "/" + store + evalPath + it should "choose the root/metadata md5 over the native md5 for files that have both" in { + val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val evalPath = "/redundant_md5_test.txt" + val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = builder build testString getOrElse fail() - blobPath1.md5HexString.toOption.get should equal(Some("9e5ceec07c8730b593a3a4b4ae324475")) + blobPath1.md5HexString.toOption.get should equal(Some("021c7cc715ec82292bb9b925f9ca44d3")) } - it should "resolve an absolute path string correctly to a path" in { - val builder = makeBlobPathBuilder(endpoint, store) - val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" + //// The below tests are IGNORED because they depend on Azure auth information being present in the environment //// + private val endpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") + private val container: BlobContainerName = BlobContainerName("inputs") + + def makeBlobPathBuilder(blobEndpoint: EndpointURL, + container: BlobContainerName): BlobPathBuilder = { + val blobTokenGenerator = NativeBlobSasTokenGenerator(container, blobEndpoint, Some(subscriptionId)) + val fsm = new BlobFileSystemManager(container, blobEndpoint, 10, blobTokenGenerator) + new BlobPathBuilder(container, blobEndpoint)(fsm) + } + + ignore should "resolve an absolute path string correctly to a path" in { + val builder = makeBlobPathBuilder(endpoint, container) + val rootString = s"${endpoint.value}/${container.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution") val otherFile = blobRoot.resolve("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") } - it should "build a blob path from a test string and read a file" in { - val builder = makeBlobPathBuilder(endpoint, store) + ignore should "build a blob path from a test string and read a file" in { + val builder = makeBlobPathBuilder(endpoint, container) val endpointHost = BlobPathBuilder.parseURI(endpoint.value).map(_.getHost).getOrElse(fail("Could not parse URI")) val evalPath = "/test/inputFile.txt" - val testString = endpoint.value + "/" + store + evalPath + val testString = endpoint.value + "/" + container + evalPath val blobPath: BlobPath = builder build testString getOrElse fail() - blobPath.container should equal(store) + blobPath.container should equal(container) blobPath.endpoint should equal(endpoint) blobPath.pathAsString should equal(testString) - blobPath.pathWithoutScheme should equal(endpointHost + "/" + store + evalPath) + blobPath.pathWithoutScheme should equal(endpointHost + "/" + container + evalPath) val is = blobPath.newInputStream() val fileText = (is.readAllBytes.map(_.toChar)).mkString fileText should include ("This is my test file!!!! Did it work?") } - it should "build duplicate blob paths in the same filesystem" in { - val builder = makeBlobPathBuilder(endpoint, store) + ignore should "build duplicate blob paths in the same filesystem" in { + val builder = makeBlobPathBuilder(endpoint, container) val evalPath = "/test/inputFile.txt" - val testString = endpoint.value + "/" + store + evalPath + val testString = endpoint.value + "/" + container + evalPath val blobPath1: BlobPath = builder build testString getOrElse fail() blobPath1.nioPath.getFileSystem.close() val blobPath2: BlobPath = builder build testString getOrElse fail() @@ -162,20 +168,20 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { fileText should include ("This is my test file!!!! Did it work?") } - it should "resolve a path without duplicating container name" in { - val builder = makeBlobPathBuilder(endpoint, store) - val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" + ignore should "resolve a path without duplicating container name" in { + val builder = makeBlobPathBuilder(endpoint, container) + val rootString = s"${endpoint.value}/${container.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution") val otherFile = blobRoot.resolve("test/inputFile.txt") otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") } - it should "correctly remove a prefix from the blob path" in { - val builder = makeBlobPathBuilder(endpoint, store) - val rootString = s"${endpoint.value}/${store.value}/cromwell-execution/" - val execDirString = s"${endpoint.value}/${store.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/" - val fileString = s"${endpoint.value}/${store.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" + ignore should "correctly remove a prefix from the blob path" in { + val builder = makeBlobPathBuilder(endpoint, container) + val rootString = s"${endpoint.value}/${container.value}/cromwell-execution/" + val execDirString = s"${endpoint.value}/${container.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/" + val fileString = s"${endpoint.value}/${container.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" val blobRoot: BlobPath = builder build rootString getOrElse fail() val execDir: BlobPath = builder build execDirString getOrElse fail() val blobFile: BlobPath = builder build fileString getOrElse fail() @@ -184,10 +190,10 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobFile.pathStringWithoutPrefix(blobFile) should equal ("") } - it should "not change a path if it doesn't start with a prefix" in { - val builder = makeBlobPathBuilder(endpoint, store) - val otherRootString = s"${endpoint.value}/${store.value}/foobar/" - val fileString = s"${endpoint.value}/${store.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" + ignore should "not change a path if it doesn't start with a prefix" in { + val builder = makeBlobPathBuilder(endpoint, container) + val otherRootString = s"${endpoint.value}/${container.value}/foobar/" + val fileString = s"${endpoint.value}/${container.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" val otherBlobRoot: BlobPath = builder build otherRootString getOrElse fail() val blobFile: BlobPath = builder build fileString getOrElse fail() blobFile.pathStringWithoutPrefix(otherBlobRoot) should equal ("/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout") From d3484285c289ddf0f4bfe4a288f1236eb8362c35 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 15 Aug 2023 15:39:44 -0400 Subject: [PATCH 09/17] Don't hide the exception when tests fail --- .../scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index cfa1e10e457..74bbaf1eb1d 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -107,7 +107,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) val evalPath = "/Rocky-9.2-aarch64-dvd.iso" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath - val blobPath1: BlobPath = builder build testString getOrElse fail() + val blobPath1: BlobPath = (builder build testString).get blobPath1.md5HexString.toOption.get should equal(Some("13cb09331d2d12c0f476f81c672a4319")) } @@ -115,7 +115,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) val evalPath = "/redundant_md5_test.txt" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath - val blobPath1: BlobPath = builder build testString getOrElse fail() + val blobPath1: BlobPath = (builder build testString).get blobPath1.md5HexString.toOption.get should equal(Some("021c7cc715ec82292bb9b925f9ca44d3")) } From c5d991ec3e580a443d6fdfc7ed4be9c83b811743 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 16 Aug 2023 16:02:30 -0400 Subject: [PATCH 10/17] Can we, uh, see the error maybe? --- .../scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 74bbaf1eb1d..7bb5aa3d37b 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -100,6 +100,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val evalPath = "/testRead.txt" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = (builder build testString).get + blobPath1.md5HexString.get.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) blobPath1.md5HexString.toOption.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) } From b62c9de1cd3fdc9ae8410154878cfc721faa51c7 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 17 Aug 2023 11:10:31 -0400 Subject: [PATCH 11/17] Do you pass in CI now? --- .../cromwell/filesystems/blob/BlobFileSystemManager.scala | 4 ++-- .../cromwell/filesystems/blob/BlobPathBuilderSpec.scala | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala index ed22d1b55f5..0bf4a7ec48a 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -1,7 +1,7 @@ package cromwell.filesystems.blob import com.azure.core.credential.AzureSasCredential -import com.azure.storage.blob.nio.AzureFileSystem +import com.azure.storage.blob.nio.{AzureFileSystem, AzureFileSystemProvider} import com.azure.storage.blob.sas.{BlobContainerSasPermission, BlobServiceSasSignatureValues} import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging @@ -19,7 +19,7 @@ import scala.util.{Failure, Success, Try} // actually connecting to Blob storage. case class FileSystemAPI() { def getFileSystem(uri: URI): Try[FileSystem] = Try(FileSystems.getFileSystem(uri)) - def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = FileSystems.newFileSystem(uri, config.asJava) + def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = new AzureFileSystemProvider().newFileSystem(uri, config.asJava) def closeFileSystem(uri: URI): Option[Unit] = getFileSystem(uri).toOption.map(_.close) } /** diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 7bb5aa3d37b..04c039826dc 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -100,8 +100,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val evalPath = "/testRead.txt" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = (builder build testString).get - blobPath1.md5HexString.get.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) - blobPath1.md5HexString.toOption.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) + blobPath1.md5HexString.get should equal(Option("31ae06882d06a20e01ba1ac961ce576c")) } it should "read md5 from large files >5g" in { @@ -109,7 +108,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val evalPath = "/Rocky-9.2-aarch64-dvd.iso" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = (builder build testString).get - blobPath1.md5HexString.toOption.get should equal(Some("13cb09331d2d12c0f476f81c672a4319")) + blobPath1.md5HexString.get should equal(Option("13cb09331d2d12c0f476f81c672a4319")) } it should "choose the root/metadata md5 over the native md5 for files that have both" in { @@ -117,7 +116,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val evalPath = "/redundant_md5_test.txt" val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath val blobPath1: BlobPath = (builder build testString).get - blobPath1.md5HexString.toOption.get should equal(Some("021c7cc715ec82292bb9b925f9ca44d3")) + blobPath1.md5HexString.get should equal(Option("021c7cc715ec82292bb9b925f9ca44d3")) } //// The below tests are IGNORED because they depend on Azure auth information being present in the environment //// From c4e3ece77633e62076e655a18a3a143fe3737182 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 17 Aug 2023 11:37:14 -0400 Subject: [PATCH 12/17] Do you pass in CI now? --- .../filesystems/blob/BlobFileSystemManager.scala | 9 ++++++--- .../cromwell/filesystems/blob/BlobPathBuilder.scala | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala index 0bf4a7ec48a..efb073175f1 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -9,7 +9,7 @@ import common.validation.Validation._ import cromwell.cloudsupport.azure.{AzureCredentials, AzureUtils} import java.net.URI -import java.nio.file.{FileSystem, FileSystemNotFoundException, FileSystems} +import java.nio.file.{FileSystem, FileSystemNotFoundException} import java.time.temporal.ChronoUnit import java.time.{Duration, Instant, OffsetDateTime} import scala.jdk.CollectionConverters._ @@ -18,8 +18,11 @@ import scala.util.{Failure, Success, Try} // We encapsulate this functionality here so that we can easily mock it out, to allow for testing without // actually connecting to Blob storage. case class FileSystemAPI() { - def getFileSystem(uri: URI): Try[FileSystem] = Try(FileSystems.getFileSystem(uri)) - def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = new AzureFileSystemProvider().newFileSystem(uri, config.asJava) + + private lazy val provider: AzureFileSystemProvider = new AzureFileSystemProvider() + + def getFileSystem(uri: URI): Try[FileSystem] = Try(provider.getFileSystem(uri)) + def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = provider.newFileSystem(uri, config.asJava) def closeFileSystem(uri: URI): Option[Unit] = getFileSystem(uri).toOption.map(_.close) } /** diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 2d4d0bff8c5..afc447b8bde 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -141,7 +141,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con maybeMetadataMap flatMap { metadataMap: Map[String, String] => metadataMap.get(BlobPath.largeBlobFileMetadataKey) } - }).toOption.flatten + }).get // Convert the bytes to a hex-encoded string. Note that the value // is rendered in base64 in the Azure web portal. @@ -150,6 +150,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { case (None, None) => + println("None, None") None // (Some, Some) will happen for all <5 GB files uploaded by TES. Per Microsoft 2023-08-15 the // root/metadata algorithm emits different values than the native algorithm and we should @@ -157,6 +158,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con case (_, Some(metadataMd5)) => Option(metadataMd5) case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => + println("Empty None") None case (Some(headerMd5Bytes), None) => Option(hexString(headerMd5Bytes)) From b4872d919ff2b79384c12adc6a9196cff5be9dbd Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 17 Aug 2023 12:34:25 -0400 Subject: [PATCH 13/17] Cleanup --- .../filesystems/blob/BlobPathBuilder.scala | 16 +++++----------- .../filesystems/blob/BlobPathBuilderSpec.scala | 1 - 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index afc447b8bde..aa6445fa779 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -141,7 +141,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con maybeMetadataMap flatMap { metadataMap: Map[String, String] => metadataMap.get(BlobPath.largeBlobFileMetadataKey) } - }).get + }).toOption.flatten // Convert the bytes to a hex-encoded string. Note that the value // is rendered in base64 in the Azure web portal. @@ -149,19 +149,13 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con blobFileAttributes.map { attr: AzureBlobFileAttributes => (Option(attr.blobHttpHeaders().getContentMd5), md5FromMetadata) match { - case (None, None) => - println("None, None") - None + case (None, None) => None // (Some, Some) will happen for all <5 GB files uploaded by TES. Per Microsoft 2023-08-15 the // root/metadata algorithm emits different values than the native algorithm and we should // always choose metadata for consistency with larger files that only have that one. - case (_, Some(metadataMd5)) => - Option(metadataMd5) - case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => - println("Empty None") - None - case (Some(headerMd5Bytes), None) => - Option(hexString(headerMd5Bytes)) + case (_, Some(metadataMd5)) => Option(metadataMd5) + case (Some(headerMd5Bytes), None) if headerMd5Bytes.isEmpty => None + case (Some(headerMd5Bytes), None) => Option(hexString(headerMd5Bytes)) } } } diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 04c039826dc..6479300f64e 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -90,7 +90,6 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { } // The following tests are NOT ignored because they use the `centaurtesting` account injected into CI - // You may have to private val subscriptionId: SubscriptionId = SubscriptionId(UUID.fromString("62b22893-6bc1-46d9-8a90-806bb3cce3c9")) private val centaurEndpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("centaurtesting") private val centaurContainer: BlobContainerName = BlobContainerName("test-blob") From c36e3e5191da10067f89edde6e6dc79044550e0f Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 17 Aug 2023 13:22:12 -0400 Subject: [PATCH 14/17] Add PR suggested test --- .../cromwell/filesystems/blob/BlobPathBuilderSpec.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 6479300f64e..5172cdcbd63 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -118,6 +118,14 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobPath1.md5HexString.get should equal(Option("021c7cc715ec82292bb9b925f9ca44d3")) } + it should "gracefully return `None` when neither hash is found" in { + val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val evalPath = "/no_md5_test.txt" + val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath + val blobPath1: BlobPath = (builder build testString).get + blobPath1.md5HexString.get should equal(None) + } + //// The below tests are IGNORED because they depend on Azure auth information being present in the environment //// private val endpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") private val container: BlobContainerName = BlobContainerName("inputs") From 2fe419c0f327342c653ff46ebb7cee180ee2f9f5 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 17 Aug 2023 14:41:31 -0400 Subject: [PATCH 15/17] Unignore other tests --- .../blob/BlobPathBuilderSpec.scala | 60 +++++++++---------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 7bb5aa3d37b..245fd2a012f 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -89,58 +89,54 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { ) } - // The following tests are NOT ignored because they use the `centaurtesting` account injected into CI - // You may have to + // The following tests use the `centaurtesting` account injected into CI. They depend on access to the + // container specified below. You may need to log in to az cli locally to get them to pass. private val subscriptionId: SubscriptionId = SubscriptionId(UUID.fromString("62b22893-6bc1-46d9-8a90-806bb3cce3c9")) - private val centaurEndpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("centaurtesting") - private val centaurContainer: BlobContainerName = BlobContainerName("test-blob") + private val endpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("centaurtesting") + private val container: BlobContainerName = BlobContainerName("test-blob") + + def makeBlobPathBuilder(blobEndpoint: EndpointURL, + container: BlobContainerName): BlobPathBuilder = { + val blobTokenGenerator = NativeBlobSasTokenGenerator(container, blobEndpoint, Some(subscriptionId)) + val fsm = new BlobFileSystemManager(container, blobEndpoint, 10, blobTokenGenerator) + new BlobPathBuilder(container, blobEndpoint)(fsm) + } it should "read md5 from small files <5g" in { - val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val builder = makeBlobPathBuilder(endpoint, container) val evalPath = "/testRead.txt" - val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath + val testString = endpoint.value + "/" + container + evalPath val blobPath1: BlobPath = (builder build testString).get blobPath1.md5HexString.get.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) blobPath1.md5HexString.toOption.get should equal(Some("31ae06882d06a20e01ba1ac961ce576c")) } it should "read md5 from large files >5g" in { - val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val builder = makeBlobPathBuilder(endpoint, container) val evalPath = "/Rocky-9.2-aarch64-dvd.iso" - val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath + val testString = endpoint.value + "/" + container + evalPath val blobPath1: BlobPath = (builder build testString).get blobPath1.md5HexString.toOption.get should equal(Some("13cb09331d2d12c0f476f81c672a4319")) } it should "choose the root/metadata md5 over the native md5 for files that have both" in { - val builder = makeBlobPathBuilder(centaurEndpoint, centaurContainer) + val builder = makeBlobPathBuilder(endpoint, container) val evalPath = "/redundant_md5_test.txt" - val testString = centaurEndpoint.value + "/" + centaurContainer + evalPath + val testString = endpoint.value + "/" + container + evalPath val blobPath1: BlobPath = (builder build testString).get blobPath1.md5HexString.toOption.get should equal(Some("021c7cc715ec82292bb9b925f9ca44d3")) } - //// The below tests are IGNORED because they depend on Azure auth information being present in the environment //// - private val endpoint: EndpointURL = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") - private val container: BlobContainerName = BlobContainerName("inputs") - - def makeBlobPathBuilder(blobEndpoint: EndpointURL, - container: BlobContainerName): BlobPathBuilder = { - val blobTokenGenerator = NativeBlobSasTokenGenerator(container, blobEndpoint, Some(subscriptionId)) - val fsm = new BlobFileSystemManager(container, blobEndpoint, 10, blobTokenGenerator) - new BlobPathBuilder(container, blobEndpoint)(fsm) - } - - ignore should "resolve an absolute path string correctly to a path" in { + it should "resolve an absolute path string correctly to a path" in { val builder = makeBlobPathBuilder(endpoint, container) val rootString = s"${endpoint.value}/${container.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() - blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution") - val otherFile = blobRoot.resolve("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") - otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") + blobRoot.toAbsolutePath.pathAsString should equal ("https://centaurtesting.blob.core.windows.net/test-blob/cromwell-execution") + val otherFile = blobRoot.resolve("https://centaurtesting.blob.core.windows.net/test-blob/cromwell-execution/test/inputFile.txt") + otherFile.toAbsolutePath.pathAsString should equal ("https://centaurtesting.blob.core.windows.net/test-blob/cromwell-execution/test/inputFile.txt") } - ignore should "build a blob path from a test string and read a file" in { + it should "build a blob path from a test string and read a file" in { val builder = makeBlobPathBuilder(endpoint, container) val endpointHost = BlobPathBuilder.parseURI(endpoint.value).map(_.getHost).getOrElse(fail("Could not parse URI")) val evalPath = "/test/inputFile.txt" @@ -156,7 +152,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { fileText should include ("This is my test file!!!! Did it work?") } - ignore should "build duplicate blob paths in the same filesystem" in { + it should "build duplicate blob paths in the same filesystem" in { val builder = makeBlobPathBuilder(endpoint, container) val evalPath = "/test/inputFile.txt" val testString = endpoint.value + "/" + container + evalPath @@ -169,16 +165,16 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { fileText should include ("This is my test file!!!! Did it work?") } - ignore should "resolve a path without duplicating container name" in { + it should "resolve a path without duplicating container name" in { val builder = makeBlobPathBuilder(endpoint, container) val rootString = s"${endpoint.value}/${container.value}/cromwell-execution" val blobRoot: BlobPath = builder build rootString getOrElse fail() - blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution") + blobRoot.toAbsolutePath.pathAsString should equal ("https://centaurtesting.blob.core.windows.net/test-blob/cromwell-execution") val otherFile = blobRoot.resolve("test/inputFile.txt") - otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") + otherFile.toAbsolutePath.pathAsString should equal ("https://centaurtesting.blob.core.windows.net/test-blob/cromwell-execution/test/inputFile.txt") } - ignore should "correctly remove a prefix from the blob path" in { + it should "correctly remove a prefix from the blob path" in { val builder = makeBlobPathBuilder(endpoint, container) val rootString = s"${endpoint.value}/${container.value}/cromwell-execution/" val execDirString = s"${endpoint.value}/${container.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/" @@ -191,7 +187,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobFile.pathStringWithoutPrefix(blobFile) should equal ("") } - ignore should "not change a path if it doesn't start with a prefix" in { + it should "not change a path if it doesn't start with a prefix" in { val builder = makeBlobPathBuilder(endpoint, container) val otherRootString = s"${endpoint.value}/${container.value}/foobar/" val fileString = s"${endpoint.value}/${container.value}/cromwell-execution/abc123/myworkflow/task1/def4356/execution/stdout" From 23217a2993426dca540813a2c36242574dbadb22 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 17 Aug 2023 14:43:10 -0400 Subject: [PATCH 16/17] Enhance `FileSystemAPI` to allow injection --- .../cromwell/filesystems/blob/BlobFileSystemManager.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala index efb073175f1..ac8f01d2cc7 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -9,6 +9,7 @@ import common.validation.Validation._ import cromwell.cloudsupport.azure.{AzureCredentials, AzureUtils} import java.net.URI +import java.nio.file.spi.FileSystemProvider import java.nio.file.{FileSystem, FileSystemNotFoundException} import java.time.temporal.ChronoUnit import java.time.{Duration, Instant, OffsetDateTime} @@ -17,10 +18,7 @@ import scala.util.{Failure, Success, Try} // We encapsulate this functionality here so that we can easily mock it out, to allow for testing without // actually connecting to Blob storage. -case class FileSystemAPI() { - - private lazy val provider: AzureFileSystemProvider = new AzureFileSystemProvider() - +case class FileSystemAPI(private val provider: FileSystemProvider = new AzureFileSystemProvider()) { def getFileSystem(uri: URI): Try[FileSystem] = Try(provider.getFileSystem(uri)) def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = provider.newFileSystem(uri, config.asJava) def closeFileSystem(uri: URI): Option[Unit] = getFileSystem(uri).toOption.map(_.close) From d17274fcddb710d9502c58ac4cee89b5f1389057 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 17 Aug 2023 14:54:08 -0400 Subject: [PATCH 17/17] Re-add test I accidentally deleted --- .../cromwell/filesystems/blob/BlobPathBuilderSpec.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 4c4f3a66698..eef6db8e942 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -126,6 +126,14 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { blobPath1.md5HexString.toOption.get should equal(Some("021c7cc715ec82292bb9b925f9ca44d3")) } + it should "gracefully return `None` when neither hash is found" in { + val builder = makeBlobPathBuilder(endpoint, container) + val evalPath = "/no_md5_test.txt" + val testString = endpoint.value + "/" + container + evalPath + val blobPath1: BlobPath = (builder build testString).get + blobPath1.md5HexString.get should equal(None) + } + it should "resolve an absolute path string correctly to a path" in { val builder = makeBlobPathBuilder(endpoint, container) val rootString = s"${endpoint.value}/${container.value}/cromwell-execution"