Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WX-1530 Strip pesky URL bits when creating local paths for HTTP inputs #7404

Merged
merged 5 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ case class HttpPath(nioPath: NioPath) extends Path {

override def pathWithoutScheme: String = pathAsString.replaceFirst("http[s]?://", "")

def pathWithoutSchemeOrQueryOrFragment: String = pathWithoutScheme.split("[?#]").head

def fetchSize(implicit executionContext: ExecutionContext, actorSystem: ActorSystem): Future[Long] =
Http().singleRequest(HttpRequest(uri = pathAsString, method = HttpMethods.HEAD)).map { response =>
response.discardEntityBytes()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import cromwell.core.retry.Retry._
import cromwell.core.retry.SimpleExponentialBackoff
import cromwell.filesystems.blob.{BlobPath, WSMBlobSasTokenGenerator}
import cromwell.filesystems.drs.{DrsPath, DrsResolver}
import cromwell.filesystems.http.HttpPath
import net.ceedubs.ficus.Ficus._
import wom.values.WomFile

Expand Down Expand Up @@ -176,6 +177,46 @@ object TesAsyncBackendJobExecutionActor {
)
)
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pulled this logic out so I could test it "easily"

def mapInputPath(path: Path, tesJobPaths: TesJobPaths, commandDirectory: Path): String =
path match {
case drsPath: DrsPath =>
val filepath = DrsResolver.getContainerRelativePath(drsPath).unsafeRunSync()
tesJobPaths.containerExec(commandDirectory, filepath)
case httpPath: HttpPath =>
// Strip the query params and anything after a # from http paths when turning them into local paths
tesJobPaths.callInputsDockerRoot
.resolve(httpPath.pathWithoutSchemeOrQueryOrFragment.stripPrefix("/"))
.pathAsString
Comment on lines +186 to +190
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part that changed.

case path: Path if path.startsWith(tesJobPaths.workflowPaths.DockerRoot) =>
path.pathAsString
case path: Path if path.equals(tesJobPaths.callExecutionRoot) =>
commandDirectory.pathAsString
case path: Path if path.startsWith(tesJobPaths.callExecutionRoot) =>
tesJobPaths.containerExec(commandDirectory, path.name)
case path: Path if path.startsWith(tesJobPaths.callRoot) =>
tesJobPaths.callDockerRoot.resolve(path.name).pathAsString
case path: BlobPath if path.startsWith(tesJobPaths.workflowPaths.workflowRoot) =>
// Blob paths can get really long, which causes problems for some tools. If this input file
// lives in the workflow execution directory, strip off that prefix from the path we're
// generating inside `inputs/` to keep the total path length under control.
// In Terra on Azure, this saves us 200+ characters.
tesJobPaths.callInputsDockerRoot
.resolve(
path.pathStringWithoutPrefix(tesJobPaths.workflowPaths.workflowRoot)
)
.pathAsString
case path: BlobPath if path.startsWith(tesJobPaths.workflowPaths.executionRoot) =>
// See comment above... if this file is in the execution root, strip that off.
// In Terra on Azure, this saves us 160+ characters.
tesJobPaths.callInputsDockerRoot
.resolve(
path.pathStringWithoutPrefix(tesJobPaths.workflowPaths.executionRoot)
)
.pathAsString
case _ =>
tesJobPaths.callInputsDockerRoot.resolve(path.pathWithoutScheme.stripPrefix("/")).pathAsString
}
}

class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams)
Expand Down Expand Up @@ -262,37 +303,8 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn
override def mapCommandLineJobInputWomFile(womFile: WomFile): WomFile =
womFile.mapFile(value =>
getPath(value) match {
case Success(drsPath: DrsPath) =>
val filepath = DrsResolver.getContainerRelativePath(drsPath).unsafeRunSync()
tesJobPaths.containerExec(commandDirectory, filepath)
case Success(path: Path) if path.startsWith(tesJobPaths.workflowPaths.DockerRoot) =>
path.pathAsString
case Success(path: Path) if path.equals(tesJobPaths.callExecutionRoot) =>
commandDirectory.pathAsString
case Success(path: Path) if path.startsWith(tesJobPaths.callExecutionRoot) =>
tesJobPaths.containerExec(commandDirectory, path.name)
case Success(path: Path) if path.startsWith(tesJobPaths.callRoot) =>
tesJobPaths.callDockerRoot.resolve(path.name).pathAsString
case Success(path: BlobPath) if path.startsWith(tesJobPaths.workflowPaths.workflowRoot) =>
// Blob paths can get really long, which causes problems for some tools. If this input file
// lives in the workflow execution directory, strip off that prefix from the path we're
// generating inside `inputs/` to keep the total path length under control.
// In Terra on Azure, this saves us 200+ characters.
tesJobPaths.callInputsDockerRoot
.resolve(
path.pathStringWithoutPrefix(tesJobPaths.workflowPaths.workflowRoot)
)
.pathAsString
case Success(path: BlobPath) if path.startsWith(tesJobPaths.workflowPaths.executionRoot) =>
// See comment above... if this file is in the execution root, strip that off.
// In Terra on Azure, this saves us 160+ characters.
tesJobPaths.callInputsDockerRoot
.resolve(
path.pathStringWithoutPrefix(tesJobPaths.workflowPaths.executionRoot)
)
.pathAsString
case Success(path: Path) =>
tesJobPaths.callInputsDockerRoot.resolve(path.pathWithoutScheme.stripPrefix("/")).pathAsString
TesAsyncBackendJobExecutionActor.mapInputPath(path, tesJobPaths, commandDirectory)
case _ =>
value
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
package cromwell.backend.impl.tes

import common.mock.MockSugar
import cromwell.backend.BackendJobDescriptorKey
import cromwell.backend.BackendSpec.buildWdlWorkflowDescriptor
import cromwell.core.logging.JobLogger
import cromwell.core.path.NioPath
import cromwell.core.path.{DefaultPathBuilder, NioPath}
import cromwell.filesystems.blob.{BlobFileSystemManager, BlobPath, WSMBlobSasTokenGenerator}
import cromwell.filesystems.http.HttpPathBuilder
import org.mockito.ArgumentMatchers.any
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import org.scalatest.prop.TableDrivenPropertyChecks
import wom.graph.CommandCallNode

import java.time.Duration
import java.time.temporal.ChronoUnit
import scala.util.{Failure, Try}

class TesAsyncBackendJobExecutionActorSpec extends AnyFlatSpec with Matchers with MockSugar {
class TesAsyncBackendJobExecutionActorSpec
extends AnyFlatSpec
with Matchers
with MockSugar
with TableDrivenPropertyChecks {
behavior of "TesAsyncBackendJobExecutionActor"

val fullyQualifiedName = "this.name.is.more.than.qualified"
Expand Down Expand Up @@ -175,4 +184,53 @@ class TesAsyncBackendJobExecutionActorSpec extends AnyFlatSpec with Matchers wit
generatedBashScript should include(echoCommandSubstring)
generatedBashScript should include(exportCommandSubstring)
}

private val httpPathTestCases = Table(
("test name", "http path", "local path in input dir"),
(
"strip simple kv query params",
"http://example.com/my_sample.bam?k1=v1&k2=v2",
"example.com/my_sample.bam"
),
(
"handle http paths without query params",
"http://example.com/my_sample.bam",
"example.com/my_sample.bam"
),
(
"handle http paths without params but with a ?",
"http://example.com/my_sample.bam?",
"example.com/my_sample.bam"
),
(
"handle a blob file with SAS token attached",
"https://lzbc096764ae93ffff9f406e.blob.core.windows.net/sc-a7f7a9e0-2dcf-465c-997b-a276090a52da/workspace-services/cbas/terra-app-2f577477-763b-4e27-8e28-b03d91b6f3be/cromwell-workflow-logs/workflow.c621a5df-37f1-422d-b91a-1a65f6112a6a.log?sv=2023-11-03&spr=https&st=2024-04-09T23%3A35%3A37Z&se=2024-04-10T07%3A50%3A37Z&sr=c&sp=racwdlt&sig=REDACTEDS&rscd=100067995116984528334",
"lzbc096764ae93ffff9f406e.blob.core.windows.net/sc-a7f7a9e0-2dcf-465c-997b-a276090a52da/workspace-services/cbas/terra-app-2f577477-763b-4e27-8e28-b03d91b6f3be/cromwell-workflow-logs/workflow.c621a5df-37f1-422d-b91a-1a65f6112a6a.log"
),
(
"handle an http path with fragment",
"http://example.com/my_sample.bam#my_favorite_part",
"example.com/my_sample.bam"
),
(
"handle an http path with fragment and query params",
"http://example.com/my_sample.bam?k=yourface#my_favorite_part",
"example.com/my_sample.bam"
)
)

forAll(httpPathTestCases) { (testName, httpPath, localPathInInputDir) =>
it should testName in {
val wd = buildWdlWorkflowDescriptor(TestWorkflows.HelloWorld)
val call: CommandCallNode = wd.callable.taskCallNodes.head
val jobKey = BackendJobDescriptorKey(call, None, 1)
val jobPaths = TesJobPaths(jobKey, wd, TesTestConfig.backendConfig)
val commandDirectory = DefaultPathBuilder.build("/my/command/dir").get
val httpBuilder = new HttpPathBuilder()

val httpPathWithParams = httpBuilder.build(httpPath)
val actual = TesAsyncBackendJobExecutionActor.mapInputPath(httpPathWithParams.get, jobPaths, commandDirectory)
actual shouldBe s"${jobPaths.callInputsDockerRoot}/$localPathInInputDir"
}
}
}
Loading