Skip to content

Commit

Permalink
Encode special characters in ADLS Gen2 URI (#3937)
Browse files Browse the repository at this point in the history
* Encode special characters in ADLS Gen2 URI
  • Loading branch information
t-rufang authored Jan 16, 2020
1 parent 15d031c commit 3c1a482
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import com.intellij.ui.components.JBScrollPane
import com.intellij.ui.components.fields.ExpandableTextField
import com.intellij.ui.table.JBTable
import com.intellij.uiDesigner.core.GridConstraints.*
import com.intellij.util.execution.ParametersListUtil
import com.microsoft.azure.cosmosspark.common.JXHyperLinkWithUri
import com.microsoft.azure.hdinsight.common.AbfsUri
import com.microsoft.azure.hdinsight.common.ClusterManagerEx
Expand Down Expand Up @@ -298,10 +297,10 @@ open class SparkSubmissionContentPanel(private val myProject: Project, val type:
}

private val refJarsPrompt: JLabel = JLabel("Referenced Jars(spark.jars)").apply {
toolTipText = "Files to be placed on the java classpath; The path needs to be an Azure Blob Storage Path (path started with wasb://); Multiple paths should be split by semicolon (;)"
toolTipText = "Files to be placed on the java classpath. Multiple paths should be split by space."
}

private val referencedJarsTextField: TextFieldWithBrowseButton = TextFieldWithBrowseButton(ExpandableTextField(ParametersListUtil.COLON_LINE_PARSER, ParametersListUtil.COLON_LINE_JOINER).apply {
private val referencedJarsTextField: TextFieldWithBrowseButton = TextFieldWithBrowseButton(ExpandableTextField().apply {
toolTipText = "Artifact from remote storage account."
}).apply {
textField.name = "referencedJarsTextFieldText"
Expand All @@ -322,17 +321,17 @@ open class SparkSubmissionContentPanel(private val myProject: Project, val type:
// Warning: We have overridden toString method in class AdlsGen2VirtualFile
// If we implement virtual file for Gen1, blob or other storage later, remember to implement toString method
// for those virtual file class later.
text = chooseFiles.joinToString(";") { vf -> vf.toString() }
text = chooseFiles.joinToString(" ") { vf -> vf.toString() }
}
}
}
}

private val refFilesPrompt: JLabel = JLabel("Referenced Files(spark.files)").apply {
toolTipText = "Files to be placed in executor working directory. The path needs to be an Azure Blob Storage Path (path started with wasb://); Multiple paths should be split by semicolon (;) "
toolTipText = "Files to be placed in executor working directory. Multiple paths should be split by space."
}

private val referencedFilesTextField: TextFieldWithBrowseButton = TextFieldWithBrowseButton(ExpandableTextField(ParametersListUtil.COLON_LINE_PARSER, ParametersListUtil.COLON_LINE_JOINER).apply {
private val referencedFilesTextField: TextFieldWithBrowseButton = TextFieldWithBrowseButton(ExpandableTextField().apply {
toolTipText = refFilesPrompt.toolTipText
}).apply {
textField.name = "referencedFilesTextFieldText"
Expand All @@ -352,7 +351,7 @@ open class SparkSubmissionContentPanel(private val myProject: Project, val type:
// Warning: We have overridden toString method in class AdlsGen2VirtualFile
// If we implement virtual file for Gen1, blob or other storage later, remember to implement toString method
// for those virtual file class later.
text = chooseFiles.joinToString(";") { vf -> vf.toString() }
text = chooseFiles.joinToString(" ") { vf -> vf.toString() }
}
}
}
Expand Down Expand Up @@ -613,8 +612,8 @@ open class SparkSubmissionContentPanel(private val myProject: Project, val type:

localArtifactTextField.text = data.localArtifactPath
commandLineTextField.text = data.commandLineArgs.joinToString(" ")
referencedJarsTextField.text = data.referenceJars.joinToString(";")
referencedFilesTextField.text = data.referenceFiles.joinToString(";")
referencedJarsTextField.text = data.referenceJars.joinToString(" ")
referencedFilesTextField.text = data.referenceFiles.joinToString(" ")

// update job configuration table
if (jobConfigurationTable.model != data.tableModel) {
Expand Down Expand Up @@ -647,13 +646,13 @@ open class SparkSubmissionContentPanel(private val myProject: Project, val type:
val selectedClusterName = viewModel.clusterSelection.selectedCluster?.name
val selectedClusterId = viewModel.clusterSelection.toSelectClusterByIdBehavior.value as? String

val referencedFileList = referencedFilesTextField.text.split(";").dropLastWhile { it.isEmpty() }
val referencedFileList = referencedFilesTextField.text.split(" ").dropLastWhile { it.isEmpty() }
.asSequence()
.map { it.trim() }
.filter { s -> !s.isEmpty() }
.toList()

val uploadedFilePathList = referencedJarsTextField.text.split(";").dropLastWhile { it.isEmpty() }
val uploadedFilePathList = referencedJarsTextField.text.split(" ").dropLastWhile { it.isEmpty() }
.asSequence()
.map { it.trim() }
.filter { s -> !s.isEmpty() }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.VirtualFileListener;
import com.microsoft.azure.hdinsight.common.AbfsUri;
import com.microsoft.azure.hdinsight.common.AzureStorageUri;
import com.microsoft.azure.hdinsight.common.UriUtil;
import com.microsoft.azure.hdinsight.sdk.common.HttpObservable;
import com.microsoft.azure.hdinsight.sdk.common.errorresponse.ForbiddenHttpErrorStatus;
Expand Down Expand Up @@ -67,12 +68,12 @@ public VirtualFile[] listFiles(AdlsGen2VirtualFile vf) {
// sample rootUrl: https://accountName.dfs.core.windows.net/fileSystem
URL rootUrl = this.rootPathUri.getUrl();
// sample directoryParam: sub/path/to
URI directoryParam = vf.getAbfsUri().getDirectoryParam();
childrenList = this.op.list(rootUrl.toString(), directoryParam.toString())
String directoryParam = vf.getAbfsUri().getDirectoryParam();
childrenList = this.op.list(rootUrl.toString(), directoryParam)
// sample remoteFile.getName(): sub/path/to/SparkSubmission
.map(remoteFile -> new AdlsGen2VirtualFile(
AbfsUri.parse(UriUtil.normalizeWithSlashEnding(URI.create(rootUrl.toString()))
.resolve(remoteFile.getName()).toString()),
(AbfsUri) AbfsUri.parse(rootUrl.toString())
.resolveAsRoot(AzureStorageUri.encodeAndNormalizePath(remoteFile.getName())),
remoteFile.isDirectory(),
this))
.doOnNext(file -> file.setParent(vf))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import com.microsoft.azuretools.azurecommons.helpers.Nullable

open class AdlsGen2VirtualFile(val abfsUri: AbfsUri, private val myIsDirectory: Boolean, private val myFileSystem: VirtualFileSystem) : AzureStorageVirtualFile() {
private var parent: VirtualFile? = null
override fun getPath(): String = abfsUri.url.path
override fun getPath(): String = abfsUri.path
override fun getName(): String {
return path.substring(path.lastIndexOf("/") + 1)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,16 @@ import cucumber.api.java.en.Given
import cucumber.api.java.en.Then
import java.util.*
import java.util.stream.Collectors
import kotlin.test.assertEquals

class AbfsUriScenario {
data class AbfsUriAndProperties(val url: String,
val accountName: String,
val fileSystem: String,
val rawPath: String,
val path: String,
val directoryParam: String)

private var restfulGen2Paths: List<String> = emptyList()
private var abfsUris: List<String> = emptyList()

Expand Down Expand Up @@ -75,20 +83,23 @@ class AbfsUriScenario {
}
}

@Then("^the Gen two directory param should be$")
fun gen2SubPathShouldBe(expectedSubPath: DataTable) {
val actualSubPath = this.abfsUris.stream()
.map { path ->
try {
AbfsUri.parse(path).directoryParam.toString()
} catch (ex: Throwable) {
"invalid Gen2 URI"
}
@Then("^properties of abfs URI should be$")
fun abfsUriPropertiesShouldBe(checkTable: List<AbfsUriAndProperties>) {
checkTable.forEach {
try {
val abfsUri = AbfsUri.parse(it.url)
assertEquals(abfsUri.accountName, it.accountName)
assertEquals(abfsUri.fileSystem, it.fileSystem)
assertEquals(abfsUri.rawPath, it.rawPath)
assertEquals(abfsUri.getPath(), it.path)
assertEquals(abfsUri.directoryParam, it.directoryParam)
} catch (ex: Throwable) {
assertEquals("<invalid>", it.accountName, "Get error when parsing accountName from AbfsUri ${it.url}. ${ex.message}")
assertEquals("<invalid>", it.fileSystem, "Get error when parsing fileSystem from AbfsUri ${it.url}. ${ex.message}")
assertEquals("<invalid>", it.rawPath, "Get error when parsing rawPath from AbfsUri ${it.url}. ${ex.message}")
assertEquals("<invalid>", it.path, "Get error when parsing path from AbfsUri ${it.url}. ${ex.message}")
assertEquals("<invalid>", it.directoryParam, "Get error when parsing directoryParam from AbfsUri ${it.url}. ${ex.message}")
}
.collect(Collectors.toList())

(0 until actualSubPath.size).forEach {
assert(actualSubPath[it] == expectedSubPath.asList(String::class.java)[it])
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class AdlUriScenario {
try {
val adlUri = AdlUri.parse(it.uri)

assertEquals(it.path, adlUri.path.toString(), "Check ADL Gen1 URI ${it.uri} path parameter")
assertEquals(it.path, adlUri.getPath(), "Check ADL Gen1 URI ${it.uri} path parameter")
assertEquals(it.storageName, adlUri.storageName.toString(), "Check ADL Gen1 URI ${it.uri} storage parameter")
} catch (ex: UnknownFormatConversionException) {
assertEquals(it.path, "<invalid>", "Get ${ex.message} when parsing ${it.uri} to GEN 1 AdlURI")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class WasbUriScenario {
data class UrlUriEqualEntry(val src: String, val dest: String, val isEqualed: Boolean)
data class UriPathResolveAsRootEntry(val uri: String, val path: String, val result: String)
data class UriRelativizeCheckEntry(val src: String, val dest: String, val result: String)
data class rawPathEncodedPathEntry(val rawPath: String, val encodedPath: String)

@Then("convert Wasb URL restful path to URI should be")
fun checkWasb2HttpConversion(checkTable: List<UrlUriEntry>) {
Expand Down Expand Up @@ -66,7 +67,7 @@ class WasbUriScenario {
try {
val wasbUri = WasbUri.parse(it.uri)

assertEquals(it.path, wasbUri.path.toString(), "Check Wasb URI ${it.uri} path parameter")
assertEquals(it.path, wasbUri.getPath(), "Check Wasb URI ${it.uri} path parameter")
assertEquals(it.account, wasbUri.storageAccount.toString(), "Check Wasb URI ${it.uri} account parameter")
assertEquals(it.container, wasbUri.container.toString(), "Check Wasb URI ${it.uri} container parameter")
assertEquals(it.endpointSuffix, wasbUri.endpointSuffix.toString(), "Check Wasb URI ${it.uri} endpointSuffix parameter")
Expand Down Expand Up @@ -116,4 +117,17 @@ class WasbUriScenario {
)
}
}

@Then("^check the encoded path as below$")
fun checkEncodedPath(checkTable: List<rawPathEncodedPathEntry>) {
checkTable.forEach {
val rawPath = it.rawPath
val expectedEncodedPath = it.encodedPath
try {
assertEquals(AzureStorageUri.encodeAndNormalizePath(rawPath), expectedEncodedPath, "Check encode path $rawPath")
} catch (ex: Throwable) {
assertEquals("<invalid>", expectedEncodedPath, "Get error when encode path $rawPath. ${ex.message}")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,25 @@ Feature: ADLS Gen2 URI operation
| https://accountName.dfs.core.windows.net/fs0/subPath0/ |
| https://accountName.dfs.core.windows.net/fs0/subPath0/subPath1 |

Scenario: Get Gen2 directory param from ABFS URI
Given ABFS URI is
| abfs://accountName.dfs.core.windows.net |
| abfs://fs0@accountName.dfs.core.windows.net |
| abfs://fs0@accountName.dfs.core.windows.net/ |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0 |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0/ |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0/subPath1 |
| https://accountName.dfs.core.windows.net |
| https://accountName.dfs.core.windows.net/fs0 |
| https://accountName.dfs.core.windows.net/fs0/ |
| https://accountName.dfs.core.windows.net/fs0/subPath0 |
| https://accountName.dfs.core.windows.net/fs0/subPath0/ |
| https://accountName.dfs.core.windows.net/fs0/subPath0/subPath1 |
Then the Gen two directory param should be
| invalid Gen2 URI |
| / |
| / |
| subPath0 |
| subPath0/ |
| subPath0/subPath1 |
| invalid Gen2 URI |
| / |
| / |
| subPath0 |
| subPath0/ |
| subPath0/subPath1 |
Scenario: Get properties from ABFS URI
Then properties of abfs URI should be
| url | accountName | fileSystem | rawPath | path | directoryParam |
| abfs://accountName.dfs.core.windows.net | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
| abfs://fs0@accountName.dfs.core.windows.net | accountName | fs0 | | | / |
| abfs://fs0@accountName.dfs.core.windows.net/ | accountName | fs0 | / | / | / |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0 | accountName | fs0 | /subPath0 | /subPath0 | subPath0 |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0/ | accountName | fs0 | /subPath0/ | /subPath0/ | subPath0/ |
| abfs://fs0@accountName.dfs.core.windows.net/subPath0/subPath1 | accountName | fs0 | /subPath0/subPath1 | /subPath0/subPath1 | subPath0/subPath1 |
| https://accountName.dfs.core.windows.net | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
| https://accountName.dfs.core.windows.net/fs0 | accountName | fs0 | | | / |
| https://accountName.dfs.core.windows.net/fs0/ | accountName | fs0 | / | / | / |
| https://accountName.dfs.core.windows.net/fs0/subPath0 | accountName | fs0 | /subPath0 | /subPath0 | subPath0 |
| https://accountName.dfs.core.windows.net/fs0/subPath0/ | accountName | fs0 | /subPath0/ | /subPath0/ | subPath0/ |
| https://accountName.dfs.core.windows.net/fs0/subPath0/subPath1 | accountName | fs0 | /subPath0/subPath1 | /subPath0/subPath1 | subPath0/subPath1 |
| abfs://fs0@accountName.dfs.core.windows.net/new%20%23%25folder | accountName | fs0 | /new%20%23%25folder | /new #%folder | new #%folder |
| abfs://fs0@accountName.dfs.core.windows.net/.~_@:!$'()*+,;= | accountName | fs0 | /.~_@:!$'()*+,;= | /.~_@:!$'()*+,;= | .~_@:!$'()*+,;= |
| abfs://fs0@accountName.dfs.core.windows.net/aaa%3Fbbb | accountName | fs0 | /aaa%3Fbbb | /aaa?bbb | aaa?bbb |
| abfs://fs0@accountName.dfs.core.windows.net/aaa?bbb | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
| abfs://fs0@accountName.dfs.core.windows.net/new folder | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
| abfs://fs0@accountName.dfs.core.windows.net/new#folder | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
| abfs://fs0@accountName.dfs.core.windows.net/new%folder | <invalid> | <invalid> | <invalid> | <invalid> | <invalid> |
Loading

0 comments on commit 3c1a482

Please sign in to comment.