Skip to content

Commit

Permalink
[Test Proxy] Centrally sanitize sensitive patterns for all tests (Azu…
Browse files Browse the repository at this point in the history
…re#35196)

* Add sanitizer enum and batch sanitizing method

* Add shared, fake credentials

* Add central set of sanitizers

* Remove KV custom matcher

* Add extra batch of sanitizers
  • Loading branch information
mccoyp authored and pull[bot] committed Dec 3, 2024
1 parent 5d72c31 commit 8147686
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ async def test_example_secret_crud_operations(self, client, **kwargs):
@AsyncSecretsClientPreparer()
@recorded_by_proxy_async
async def test_example_secret_list_operations(self, client, **kwargs):
if not is_live():
set_custom_default_matcher(excluded_headers="Authorization")
secret_client = client
async with secret_client:
for i in range(7):
Expand Down
4 changes: 4 additions & 0 deletions tools/azure-sdk-tools/devtools_testutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .proxy_testcase import recorded_by_proxy
from .sanitizers import (
add_api_version_transform,
add_batch_sanitizers,
add_body_key_sanitizer,
add_body_regex_sanitizer,
add_body_string_sanitizer,
Expand All @@ -40,6 +41,7 @@
add_uri_string_sanitizer,
add_uri_subscription_id_sanitizer,
PemCertificate,
Sanitizer,
set_bodiless_matcher,
set_custom_default_matcher,
set_default_function_settings,
Expand All @@ -56,6 +58,7 @@

__all__ = [
"add_api_version_transform",
"add_batch_sanitizers",
"add_body_key_sanitizer",
"add_body_regex_sanitizer",
"add_body_string_sanitizer",
Expand All @@ -79,6 +82,7 @@
"FakeResource",
"ReservedResourceNameError",
"ResourceGroupPreparer",
"Sanitizer",
"StorageAccountPreparer",
"BlobAccountPreparer",
"CachedStorageAccountPreparer",
Expand Down
6 changes: 6 additions & 0 deletions tools/azure-sdk-tools/devtools_testutils/fake_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@


# General-use fake credentials
FAKE_ACCESS_TOKEN = "eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJlbWFpbCI6IkJvYkBjb250b3NvLmNvbSIsImdpdmVuX25hbWUiOiJCb2I" \
"iLCJpc3MiOiJodHRwOi8vRGVmYXVsdC5Jc3N1ZXIuY29tIiwiYXVkIjoiaHR0cDovL0RlZmF1bHQuQXVkaWVuY2UuY29tIiwiaWF0IjoiMTYwNz" \
"k3ODY4MyIsIm5iZiI6IjE2MDc5Nzg2ODMiLCJleHAiOiIxNjA3OTc4OTgzIn0."
FAKE_ID = "00000000-0000-0000-0000-000000000000"
FAKE_LOGIN_PASSWORD = "F4ke_L0gin_P4ss"

# Service-specific fake credentials
BATCH_TEST_PASSWORD = "kt#_gahr!@aGERDXA"
MGMT_HDINSIGHT_FAKE_KEY = "qFmud5LfxcCxWUvWcGMhKDp0v0KuBRLsO/AIddX734W7lzdInsVMsB5ILVoOrF+0fCfk/IYYy5SJ9Q+2v4aihQ=="
SERVICEBUS_FAKE_SAS = "SharedAccessSignature sr=https%3A%2F%2Ffoo.servicebus.windows.net&sig=dummyValue%3D&se=168726" \
"7490&skn=dummyKey"
STORAGE_ACCOUNT_FAKE_KEY = "NzhL3hKZbJBuJ2484dPTR+xF30kYaWSSCbs2BzLgVVI1woqeST/1IgqaLm6QAOTxtGvxctSNbIR/1hW8yH+bJg=="


Expand Down
166 changes: 159 additions & 7 deletions tools/azure-sdk-tools/devtools_testutils/proxy_startup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@
from ci_tools.variables import in_ci

from .config import PROXY_URL
from .fake_credentials import FAKE_ACCESS_TOKEN, FAKE_ID, SERVICEBUS_FAKE_SAS
from .helpers import get_http_client, is_live_and_not_recording
from .sanitizers import add_oauth_response_sanitizer, add_remove_header_sanitizer, set_custom_default_matcher
from .sanitizers import (
add_batch_sanitizers,
Sanitizer,
set_custom_default_matcher,
)


load_dotenv(find_dotenv())
Expand Down Expand Up @@ -280,6 +285,158 @@ def prepare_local_tool(repo_root: str) -> str:
)


def set_common_sanitizers() -> None:
"""Register sanitizers that will apply to all recordings throughout the SDK."""
SANITIZED = "Sanitized"
batch_sanitizers = {}

# Remove headers from recordings if we don't need them, and ignore them if present
# Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth
headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id"
set_custom_default_matcher(excluded_headers=headers_to_ignore)
batch_sanitizers[Sanitizer.REMOVE_HEADER] = [{"headers": headers_to_ignore}]

# Remove OAuth interactions, which can contain client secrets and aren't necessary for playback testing
batch_sanitizers[Sanitizer.OAUTH_RESPONSE] = [None]

# Body key sanitizers for sensitive fields in JSON requests/responses
batch_sanitizers[Sanitizer.BODY_KEY] = [
{"json_path": "$..access_token", "value": FAKE_ACCESS_TOKEN},
{"json_path": "$..AccessToken", "value": FAKE_ACCESS_TOKEN},
{"json_path": "$..targetModelLocation", "value": SANITIZED},
{"json_path": "$..targetResourceId", "value": SANITIZED},
{"json_path": "$..urlSource", "value": SANITIZED},
{"json_path": "$..azureBlobSource.containerUrl", "value": SANITIZED},
{"json_path": "$..source", "value": SANITIZED},
{"json_path": "$..resourceLocation", "value": SANITIZED},
{"json_path": "Location", "value": SANITIZED},
{"json_path": "$..to", "value": SANITIZED},
{"json_path": "$..from", "value": SANITIZED},
{"json_path": "$..sasUri", "value": SANITIZED},
{"json_path": "$..containerUri", "value": SANITIZED},
{"json_path": "$..inputDataUri", "value": SANITIZED},
{"json_path": "$..outputDataUri", "value": SANITIZED},
{"json_path": "$..id", "value": SANITIZED},
{"json_path": "$..token", "value": SANITIZED},
{"json_path": "$..appId", "value": SANITIZED},
{"json_path": "$..userId", "value": SANITIZED},
{"json_path": "$..storageAccount", "value": SANITIZED},
{"json_path": "$..resourceGroup", "value": SANITIZED},
{"json_path": "$..guardian", "value": SANITIZED},
{"json_path": "$..scan", "value": SANITIZED},
{"json_path": "$..catalog", "value": SANITIZED},
{"json_path": "$..lastModifiedBy", "value": SANITIZED},
{"json_path": "$..managedResourceGroupName", "value": SANITIZED},
{"json_path": "$..friendlyName", "value": SANITIZED},
{"json_path": "$..createdBy", "value": SANITIZED},
{"json_path": "$..credential", "value": SANITIZED},
{"json_path": "$..aliasPrimaryConnectionString", "value": SANITIZED},
{"json_path": "$..aliasSecondaryConnectionString", "value": SANITIZED},
{"json_path": "$..connectionString", "value": SANITIZED},
{"json_path": "$..primaryConnectionString", "value": SANITIZED},
{"json_path": "$..secondaryConnectionString", "value": SANITIZED},
{"json_path": "$..sshPassword", "value": SANITIZED},
{"json_path": "$..primaryKey", "value": SANITIZED},
{"json_path": "$..secondaryKey", "value": SANITIZED},
{"json_path": "$..runAsPassword", "value": SANITIZED},
{"json_path": "$..adminPassword", "value": SANITIZED},
{"json_path": "$..adminPassword.value", "value": SANITIZED},
{"json_path": "$..administratorLoginPassword", "value": SANITIZED},
{"json_path": "$..accessSAS", "value": SANITIZED},
{"json_path": "$..WEBSITE_AUTH_ENCRYPTION_KEY", "value": SANITIZED},
{"json_path": "$..storageContainerWriteSas", "value": SANITIZED},
{"json_path": "$..storageContainerUri", "value": SANITIZED},
{"json_path": "$..storageContainerReadListSas", "value": SANITIZED},
{"json_path": "$..storageAccountPrimaryKey", "value": SANITIZED},
{"json_path": "$..uploadUrl", "value": SANITIZED},
{"json_path": "$..secondaryReadonlyMasterKey", "value": SANITIZED},
{"json_path": "$..primaryMasterKey", "value": SANITIZED},
{"json_path": "$..primaryReadonlyMasterKey", "value": SANITIZED},
{"json_path": "$..secondaryMasterKey", "value": SANITIZED},
{"json_path": "$..scriptUrlSasToken", "value": SANITIZED},
{"json_path": "$..privateKey", "value": SANITIZED},
{"json_path": "$..password", "value": SANITIZED},
{"json_path": "$..logLink", "value": SANITIZED},
{"json_path": "$..keyVaultClientSecret", "value": SANITIZED},
{"json_path": "$..httpHeader", "value": SANITIZED},
{"json_path": "$..functionKey", "value": SANITIZED},
{"json_path": "$..fencingClientPassword", "value": SANITIZED},
{"json_path": "$..encryptedCredential", "value": SANITIZED},
{"json_path": "$..clientSecret", "value": SANITIZED},
{"json_path": "$..certificatePassword", "value": SANITIZED},
{"json_path": "$..authHeader", "value": SANITIZED},
{"json_path": "$..atlasKafkaSecondaryEndpoint", "value": SANITIZED},
{"json_path": "$..atlasKafkaPrimaryEndpoint", "value": SANITIZED},
{"json_path": "$..appkey", "value": SANITIZED},
{"json_path": "$..acrToken", "value": SANITIZED},
{"json_path": "$..accountKey", "value": SANITIZED},
{"json_path": "$..accountName", "value": SANITIZED},
{"json_path": "$..decryptionKey", "value": SANITIZED},
{"json_path": "$..applicationId", "value": SANITIZED},
{"json_path": "$..apiKey", "value": SANITIZED},
{"json_path": "$..userName", "value": SANITIZED},
{"json_path": "$.properties.DOCKER_REGISTRY_SERVER_PASSWORD", "value": SANITIZED},
{"json_path": "$.value[*].key", "value": SANITIZED},
{"json_path": "$.key", "value": SANITIZED},
{"json_path": "$..clientId", "value": FAKE_ID},
{"json_path": "$..principalId", "value": FAKE_ID},
{"json_path": "$..tenantId", "value": FAKE_ID},
]

# Body regex sanitizers for sensitive patterns in request/response bodies
batch_sanitizers[Sanitizer.BODY_REGEX] = [
{"regex": "(client_id=)[^&]+", "value": "$1sanitized"},
{"regex": "(client_secret=)[^&]+", "value": "$1sanitized"},
{"regex": "(client_assertion=)[^&]+", "value": "$1sanitized"},
{"regex": "(?:(sv|sig|se|srt|ss|sp)=)(?<secret>(([^&\\s]*)))", "value": SANITIZED},
{"regex": "refresh_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED},
{"regex": "access_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED},
{"regex": "token=(?<token>[^\\u0026]+)($|\\u0026)", "group_for_replace": "token", "value": SANITIZED},
{"regex": "-----BEGIN PRIVATE KEY-----\\n(.+\\n)*-----END PRIVATE KEY-----\\n", "value": SANITIZED},
{"regex": "(?<=<UserDelegationKey>).*?(?:<SignedTid>)(.*)(?:</SignedTid>)", "value": SANITIZED},
{"regex": "(?<=<UserDelegationKey>).*?(?:<SignedOid>)(.*)(?:</SignedOid>)", "value": SANITIZED},
{"regex": "(?<=<UserDelegationKey>).*?(?:<Value>)(.*)(?:</Value>)", "value": SANITIZED},
{"regex": "(?:Password=)(.*?)(?:;)", "value": SANITIZED},
{"regex": "(?:User ID=)(.*?)(?:;)", "value": SANITIZED},
{"regex": "(?:<PrimaryKey>)(.*)(?:</PrimaryKey>)", "value": SANITIZED},
{"regex": "(?:<SecondaryKey>)(.*)(?:</SecondaryKey>)", "value": SANITIZED},
]

# General regex sanitizers for sensitive patterns throughout interactions
batch_sanitizers[Sanitizer.GENERAL_REGEX] = [
{"regex": "SharedAccessKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
{"regex": "AccountKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
{"regex": "accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
{"regex": "Accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
{"regex": "Secret=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
]

# Header regex sanitizers for sensitive patterns in request/response headers
batch_sanitizers[Sanitizer.HEADER_REGEX] = [
{"key": "subscription-key", "value": SANITIZED},
{"key": "x-ms-encryption-key", "value": SANITIZED},
{"key": "x-ms-rename-source", "value": SANITIZED},
{"key": "x-ms-file-rename-source", "value": SANITIZED},
{"key": "x-ms-copy-source", "value": SANITIZED},
{"key": "x-ms-copy-source-authorization", "value": SANITIZED},
{"key": "x-ms-file-rename-source-authorization", "value": SANITIZED},
{"key": "x-ms-encryption-key-sha256", "value": SANITIZED},
{"key": "api-key", "value": SANITIZED},
{"key": "aeg-sas-token", "value": SANITIZED},
{"key": "aeg-sas-key", "value": SANITIZED},
{"key": "aeg-channel-name", "value": SANITIZED},
{"key": "SupplementaryAuthorization", "value": SERVICEBUS_FAKE_SAS},
]

# URI regex sanitizers for sensitive patterns in request/response URLs
batch_sanitizers[Sanitizer.URI_REGEX] = [
{"regex": "sig=(?<sig>[^&]+)", "group_for_replace": "sig", "value": SANITIZED}
]

# Send all the above sanitizers to the test proxy in a single, batch request
add_batch_sanitizers(sanitizers=batch_sanitizers)


def start_test_proxy(request) -> None:
"""Starts the test proxy and returns when the proxy server is ready to receive requests.
Expand Down Expand Up @@ -334,12 +491,7 @@ def start_test_proxy(request) -> None:

# Wait for the proxy server to become available
check_proxy_availability()
# Remove headers from recordings if we don't need them, and ignore them if present
# Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth
headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id"
add_remove_header_sanitizer(headers=headers_to_ignore)
set_custom_default_matcher(excluded_headers=headers_to_ignore)
add_oauth_response_sanitizer()
set_common_sanitizers()


def stop_test_proxy() -> None:
Expand Down
76 changes: 66 additions & 10 deletions tools/azure-sdk-tools/devtools_testutils/sanitizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,30 @@
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
from enum import Enum
import json
from typing import TYPE_CHECKING
from typing import Dict, List, Optional

from .config import PROXY_URL
from .helpers import get_http_client, get_recording_id, is_live, is_live_and_not_recording

if TYPE_CHECKING:
from typing import Optional

class Sanitizer(str, Enum):
"""Sanitizers that can be applied to recordings."""

BODY_KEY = "BodyKeySanitizer"
BODY_REGEX = "BodyRegexSanitizer"
BODY_STRING = "BodyStringSanitizer"
CONTINUATION = "ContinuationSanitizer"
GENERAL_REGEX = "GeneralRegexSanitizer"
GENERAL_STRING = "GeneralStringSanitizer"
HEADER_REGEX = "HeaderRegexSanitizer"
HEADER_STRING = "HeaderStringSanitizer"
OAUTH_RESPONSE = "OAuthResponseSanitizer"
REMOVE_HEADER = "RemoveHeaderSanitizer"
URI_REGEX = "UriRegexSanitizer"
URI_STRING = "UriStringSanitizer"
URI_SUBSCRIPTION_ID = "UriSubscriptionIdSanitizer"


# This file contains methods for adjusting many aspects of test proxy behavior:
Expand Down Expand Up @@ -416,6 +432,46 @@ def add_uri_subscription_id_sanitizer(**kwargs) -> None:
_send_sanitizer_request("UriSubscriptionIdSanitizer", request_args, {"x-recording-id": x_recording_id})


def add_batch_sanitizers(sanitizers: Dict[str, List[Optional[Dict[str, str]]]], headers: Optional[Dict] = None) -> None:
"""Registers a batch of sanitizers at once.
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
request will be sent.
:param sanitizers: A group of sanitizers to add, as a dictionary. Keys should be sanitizer names (from the Sanitizer
enum) and values should be lists containing dictionaries of sanitizer constructor parameters. The parameters
should be formatted as key-value pairs aligning with keyword-only arguments to sanitizer methods.
:type sanitizers: dict[str, list[Optional[dict]]]
"""

if is_live_and_not_recording():
return

data = [] # Body content to populate with multiple sanitizer definitions

for sanitizer in sanitizers:
# Iterate over each instance of the particular sanitizer (e.g. each body regex sanitizer)
for sanitizer_instance in sanitizers[sanitizer]:
sanitizer_definition = {"Name": sanitizer}
if sanitizer_instance:
sanitizer_definition.update({"Body": _get_request_args(**sanitizer_instance)})
data.append(sanitizer_definition)

headers_to_send = {"Content-Type": "application/json"}
if headers is not None:
for key in headers:
if headers[key] is not None:
headers_to_send[key] = headers[key]

http_client = get_http_client()
http_client.request(
method="POST",
url="{}/Admin/AddSanitizers".format(PROXY_URL),
headers=headers_to_send,
body=json.dumps(data).encode("utf-8"),
)


# ----------TRANSFORMS----------
#
# A transform extends functionality of the test proxy by applying to responses just before they are returned during
Expand Down Expand Up @@ -538,7 +594,7 @@ def __init__(self, data: str, key: str) -> None:
# ----------HELPERS----------


def _get_recording_option_args(**kwargs) -> dict:
def _get_recording_option_args(**kwargs) -> Dict:
"""Returns a dictionary of recording option request arguments, formatted for test proxy consumption."""

certificates = kwargs.pop("certificates", None)
Expand All @@ -564,7 +620,7 @@ def _get_recording_option_args(**kwargs) -> dict:
return request_args


def _get_request_args(**kwargs) -> dict:
def _get_request_args(**kwargs) -> Dict:
"""Returns a dictionary of request arguments, formatted for test proxy consumption."""

request_args = {}
Expand Down Expand Up @@ -605,7 +661,7 @@ def _get_request_args(**kwargs) -> dict:
return request_args


def _send_matcher_request(matcher: str, headers: dict, parameters: "Optional[dict]" = None) -> None:
def _send_matcher_request(matcher: str, headers: Dict, parameters: Optional[Dict] = None) -> None:
"""Sends a POST request to the test proxy endpoint to register the specified matcher.
If live tests are being run, no request will be sent.
Expand Down Expand Up @@ -633,7 +689,7 @@ def _send_matcher_request(matcher: str, headers: dict, parameters: "Optional[dic
)


def _send_recording_options_request(parameters: dict, headers: "Optional[dict]" = None) -> None:
def _send_recording_options_request(parameters: Dict, headers: Optional[Dict] = None) -> None:
"""Sends a POST request to the test proxy endpoint to set the specified recording options.
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
Expand Down Expand Up @@ -661,7 +717,7 @@ def _send_recording_options_request(parameters: dict, headers: "Optional[dict]"
)


def _send_reset_request(headers: dict) -> None:
def _send_reset_request(headers: Dict) -> None:
"""Sends a POST request to the test proxy endpoint to reset setting customizations.
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
Expand All @@ -682,7 +738,7 @@ def _send_reset_request(headers: dict) -> None:
http_client.request(method="POST", url=f"{PROXY_URL}/Admin/Reset", headers=headers_to_send)


def _send_sanitizer_request(sanitizer: str, parameters: dict, headers: "Optional[dict]" = None) -> None:
def _send_sanitizer_request(sanitizer: str, parameters: Dict, headers: Optional[Dict] = None) -> None:
"""Sends a POST request to the test proxy endpoint to register the specified sanitizer.
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
Expand All @@ -709,7 +765,7 @@ def _send_sanitizer_request(sanitizer: str, parameters: dict, headers: "Optional
)


def _send_transform_request(transform: str, parameters: dict, headers: "Optional[dict]" = None) -> None:
def _send_transform_request(transform: str, parameters: Dict, headers: Optional[Dict] = None) -> None:
"""Sends a POST request to the test proxy endpoint to register the specified transform.
If live tests are being run, no request will be sent.
Expand Down

0 comments on commit 8147686

Please sign in to comment.