Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: A base scrubber and a "Shared Access Signature" Scrubber #1939

Merged
merged 9 commits into from
Apr 27, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

package com.microsoft.azure.synapse.ml.io.http

import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging
import org.apache.commons.io.IOUtils
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.{CloseableHttpResponse, HttpPost, HttpRequestBase}
Expand Down Expand Up @@ -146,10 +147,10 @@ object HandlingUtils extends SparkLogging {
case r: HttpPost => Try(IOUtils.toString(r.getEntity.getContent, "UTF-8")).getOrElse("")
case r => r.getURI
}
logInfo(s"sending $message")
SynapseMLLogging.logMessage(s"sending $message")
val start = System.currentTimeMillis()
val resp = sendWithRetries(client, req, retryTimes.toArray)
logInfo(s"finished sending (${System.currentTimeMillis() - start}ms) $message")
SynapseMLLogging.logMessage(s"finished sending (${System.currentTimeMillis() - start}ms) $message")
val respData = convertAndClose(resp)
req.releaseConnection()
respData
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
package com.microsoft.azure.synapse.ml.logging

import com.microsoft.azure.synapse.ml.build.BuildInfo
import com.microsoft.azure.synapse.ml.logging.common.SASScrubber
import org.apache.spark.internal.Logging
import spray.json.{DefaultJsonProtocol, RootJsonFormat, NullOptions}

import scala.collection.JavaConverters._
import scala.collection.mutable

Expand Down Expand Up @@ -40,6 +42,10 @@ object SynapseMLLogging extends Logging {
logInfo(s"metrics/ ${mapToPrint.toJson.compactPrint}")
}

def logMessage(message: String): Unit = {
logInfo(SASScrubber.scrub(message))
}

}

trait SynapseMLLogging extends Logging {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.azure.synapse.ml.logging.common

import scala.util.matching.Regex

/*
Scrubbers for filtering out information that is prohibited from
logging such as SAS token, etc.

SASScrubber: This one is specifically for scrubbing the Shared Access Signature.
*/

trait Scrubber {
def scrub(content: String): String
}

object SASScrubber extends Scrubber {
def scrub(message: String): String = {
val pattern = new Regex("(?i)sig=[a-z0-9%]{43,63}%3d")
pattern replaceAllIn(message, "sig=####")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.azure.synapse.ml.logging

import com.microsoft.azure.synapse.ml.core.test.base.TestBase
import com.microsoft.azure.synapse.ml.logging.common.SASScrubber
class LoggingScrubberTests extends TestBase {
test("SASScrubber Valid Input Test.") {
val message = "sending {\"alignPolicy\":{},\"endTime\":\"2023-04-17T13:00:00Z\",\"slidingWindow\":300," +
"\"source\":\"https://usw2itesgprodsa01g8kn8.blob.core.windows.net/raw/intermediate%2F" +
"FitMultivariateAnomaly_b79685045fba.zip?sv=2020-10-02&se=2023-04-19T22%3A45%3A17Z&sr=b&sp=r&" +
"sig=k2N3nSvLtiDH5xfYAwklSfyiuJ42aG8T8hLCNWdtNXk%3D\",\"startTime\":\"2020-07-01T00:00:00Z\"}"
val scrubbedMessage = "sending {\"alignPolicy\":{},\"endTime\":\"2023-04-17T13:00:00Z\",\"slidingWindow\":300," +
"\"source\":\"https://usw2itesgprodsa01g8kn8.blob.core.windows.net/raw/intermediate%2F" +
"FitMultivariateAnomaly_b79685045fba.zip?sv=2020-10-02&se=2023-04-19T22%3A45%3A17Z&sr=b&sp=r&" +
"sig=####\",\"startTime\":\"2020-07-01T00:00:00Z\"}"
val result = SASScrubber.scrub(message)
assert(result == scrubbedMessage)
}
test("SASScrubber Invalid Input Test.") {
val message = "sending {\"alignPolicy\":{},\"endTime\":\"2023-04-17T13:00:00Z\",\"slidingWindow\":300," +
"\"source\":\"https://usw2itesgprodsa01g8kn8.blob.core.windows.net/raw/intermediate%2F" +
"FitMultivariateAnomaly_b79685045fba.zip?sv=2020-10-02&se=2023-04-19T22%3A45%3A17Z&sr=b&sp=r&" +
"sig=k2N3nSvLt@DH5xfYAwkl###iuJ42aG8T8hLCNWdtNXk%3D\",\"startTime\":\"2020-07-01T00:00:00Z\"}"
val scrubbedMessage = "sending {\"alignPolicy\":{},\"endTime\":\"2023-04-17T13:00:00Z\",\"slidingWindow\":300," +
"\"source\":\"https://usw2itesgprodsa01g8kn8.blob.core.windows.net/raw/intermediate%2F" +
"FitMultivariateAnomaly_b79685045fba.zip?sv=2020-10-02&se=2023-04-19T22%3A45%3A17Z&sr=b&sp=r&" +
"sig=####\",\"startTime\":\"2020-07-01T00:00:00Z\"}"
val result = SASScrubber.scrub(message)
assert(result != scrubbedMessage)
}
}