Skip to content

Commit

Permalink
Merge pull request redpanda-data#20749 from redpanda-data/clee/PESDLC…
Browse files Browse the repository at this point in the history
…-1432

[rptest] Azure CDT bringup
  • Loading branch information
clee authored Aug 2, 2024
2 parents 06aeccb + 170faf6 commit f2cdd7d
Show file tree
Hide file tree
Showing 11 changed files with 586 additions and 173 deletions.
61 changes: 43 additions & 18 deletions tests/rptest/clients/kubectl.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,24 @@


def is_redpanda_pod(pod_obj: dict[str, Any], cluster_id: str) -> bool:
"""Returns true if the pod API object name matches the Redpanda pattern."""
"""Returns true if the pod looks like a Redpanda broker pod"""

# Azure behaves this way. This is also the 'new' way going forward (circa 2024/7)
# We look for pods whose metadata indicates that it is part of a statefulset, and that
# the pod names are generated with the template "redpanda-broker-...".
# False positives are quite unlikely with this criterion:
# Scenario would be:
# - Another Statefulset
# - That reuses the SAME generateName (bad!)
try:
if pod_obj['metadata']['generateName'] == 'redpanda-broker-':
if pod_obj['metadata']['labels'][
'app.kubernetes.io/component'] == 'redpanda-statefulset':
return True
except KeyError:
pass

# Other providers like AWS / GCP behave this way ("the old way")
return pod_obj['metadata']['name'].startswith(f'rp-{cluster_id}')


Expand All @@ -37,7 +54,7 @@ def __init__(
remote_uri=None,
namespace='redpanda',
cluster_id='',
cluster_privider='aws',
cluster_provider='aws',
cluster_region='us-west-2',
tp_proxy=None,
tp_token=None,
Expand All @@ -47,7 +64,7 @@ def __init__(
self._namespace = namespace
self._cluster_id = cluster_id

self._provider = cluster_privider.lower()
self._provider = cluster_provider.lower()
if self._provider not in SUPPORTED_PROVIDERS:
raise RuntimeError("KubectlTool does not yet support "
f"'{self._provider}' cloud provider")
Expand Down Expand Up @@ -119,7 +136,13 @@ def _scp_cmd(self, src, dest):
def _install(self):
'''Installs kubectl on a remote target host'''
if not self._kubectl_installed and self._remote_uri is not None:
self._ssh_cmd(['./breakglass-tools.sh'])
breakglass_cmd = ['./breakglass-tools.sh']
if self._provider == 'azure':
# for azure, we manually override the path here to ensure
# that azure-cli installed as a snap gets found (workaround)
p = ['env', 'PATH=/usr/local/bin:/usr/bin:/bin:/snap/bin']
breakglass_cmd = p + breakglass_cmd
self._ssh_cmd(breakglass_cmd)

# Determine the appropriate command for the cloud provider
self._redpanda.logger.info(
Expand All @@ -134,17 +157,7 @@ def _install(self):
'gcloud', 'container', 'clusters', 'get-credentials',
f'redpanda-{self._cluster_id}', '--region', self._region
],
'azure': [
'bash', '-c',
("'if ! command -v az &> /dev/null; then "
"echo \"Azure CLI not found. Installing Azure CLI.\" >&2; "
"curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash; "
"echo \"Azure CLI installed.\" >&2; "
"fi && "
'az login --identity --allow-no-subscriptions && '
f"az aks get-credentials --resource-group rg-rpcloud-{self._cluster_id} --name aks-rpcloud-{self._cluster_id}'"
)
]
'azure': ['kubectl', 'get', 'nodes']
}[self._provider]

# Log the full command to be executed
Expand Down Expand Up @@ -240,6 +253,16 @@ def _prepare_output(sout: str, serr: str) -> str:
# return that instead.
return s_out if len(s_out) > 0 else s_err

@property
def _redpanda_operator_v2(self) -> bool:
return self._provider == 'azure'

def _redpanda_broker_pod_name(self) -> str:
if self._redpanda_operator_v2:
return 'redpanda-broker-0'
else:
return f'rp-{self._cluster_id}-0'

def _ssh_cmd(
self,
cmd: list[str],
Expand Down Expand Up @@ -284,19 +307,21 @@ def exec(self, remote_cmd, pod_name=None) -> str:

self._install()
if pod_name is None:
pod_name = f'rp-{self._cluster_id}-0'
pod_name = self._redpanda_broker_pod_name()
cmd = [
'kubectl', 'exec', pod_name, f'-n={self._namespace}',
'-c=redpanda', '--', 'bash', '-c'
] + ['"' + remote_cmd + '"']
return self._ssh_cmd(cmd) # type: ignore

def exists(self, remote_path):
def exists(self, remote_path, pod_name=None):
self._install()
if pod_name is None:
pod_name = self._redpanda_broker_pod_name()
try:
self._ssh_cmd([
'kubectl', 'exec', '-n', self._namespace, '-c', 'redpanda',
f'rp-{self._cluster_id}-0', '--', 'stat', remote_path
pod_name, '--', 'stat', remote_path
])
return True
except subprocess.CalledProcessError:
Expand Down
77 changes: 52 additions & 25 deletions tests/rptest/redpanda_cloud_tests/config_profile_verify_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@ def test_config_profile_verify(self):
self.logger.debug("Here we go")

# assert isinstance(self.redpanda, RedpandaServiceCloud)
if self._configProfile['cloud_provider'] == 'gcp':
self._check_gcp_nodes()
else:
self._check_aws_nodes()
match self._configProfile['cloud_provider']:
case 'aws':
self._check_aws_nodes()
case 'gcp':
self._check_gcp_nodes()
case 'azure':
self._check_azure_nodes()

self._check_rp_config()

Expand All @@ -62,7 +65,8 @@ def _check_rp_config(self):
self.logger.debug(
"asserting we got the config for the right cluster: expected rp-{}, actual: {}"
.format(self._clusterId, clusterConfig["cluster_id"]))
assert "rp-{}".format(self._clusterId) == clusterConfig['cluster_id']
assert clusterConfig['cluster_id'] in (self._clusterId,
f'rp-{self._clusterId}')

for k, v in self._configProfile["cluster_config"].items():
self.logger.debug(
Expand All @@ -71,6 +75,49 @@ def _check_rp_config(self):
if clusterConfig[k] != v and "{}".format(clusterConfig[k]) != v:
assert False

def _check_aws_nodes(self):
cmd = self.redpanda.kubectl._ssh_prefix() + [
'aws', 'ec2', 'describe-instances',
'--filters="Name=tag:Name, Values=redpanda-{}-rp"'.format(
self._clusterId),
'--query="Reservations[0].Instances[*].InstanceType"'
]
res = subprocess.check_output(cmd)
resd = json.loads(res)

self.logger.debug(
"asserting nodes_count: expected: {}, actual: {}".format(
self._configProfile['nodes_count'], len(resd)))
assert len(resd) == self._configProfile['nodes_count']

self.logger.debug(
"asserting machineType: expected: {}, actual: {}".format(
self._configProfile['machine_type'], resd[0]))
assert resd[0] == self._configProfile['machine_type']

def _check_azure_nodes(self):
# currently, we have to override the PATH for azure because
# az-cli is installed via snap and /snap/bin only appears in
# $PATH on *interactive* shells
cmd = self.redpanda.kubectl._ssh_prefix() + [
'env', 'PATH=/usr/local/bin:/usr/bin:/bin:/snap/bin',
'az', 'aks', 'nodepool', 'list',
'--cluster-name', f'aks-rpcloud-{self._clusterId}',
'--resource-group', f'rg-rpcloud-{self._clusterId}',
'--query', "'[?starts_with(name,`redpanda`)].vmSize'",
'--output', 'json'
] # yapf: disable

res = subprocess.check_output(cmd)
resd = json.loads(res)

nc = self._configProfile['nodes_count']
assert len(
resd) == nc, f"expected nodes_count: {nc}, actual: {len(resd)}"

mt = self._configProfile['machine_type']
assert resd[1] == mt, f"expected machineType: {mt}, actual: {resd[1]}"

def _check_gcp_nodes(self):
cmd = self.redpanda.kubectl._ssh_prefix() + [
'gcloud', 'compute', 'instances', 'list', '--filter',
Expand Down Expand Up @@ -101,23 +148,3 @@ def _check_gcp_nodes(self):
.format(n["name"], self._configProfile["storage_size_bytes"],
total))
assert total == self._configProfile['storage_size_bytes']

def _check_aws_nodes(self):
cmd = self.redpanda.kubectl._ssh_prefix() + [
'aws', 'ec2', 'describe-instances',
'--filters="Name=tag:Name, Values=redpanda-{}-rp"'.format(
self._clusterId),
'--query="Reservations[0].Instances[*].InstanceType"'
]
res = subprocess.check_output(cmd)
resd = json.loads(res)

self.logger.debug(
"asserting nodes_count: expected: {}, actual: {}".format(
self._configProfile['nodes_count'], len(resd)))
assert len(resd) == self._configProfile['nodes_count']

self.logger.debug(
"asserting machineType: expected: {}, actual: {}".format(
self._configProfile['machine_type'], resd[0]))
assert resd[0] == self._configProfile['machine_type']
2 changes: 2 additions & 0 deletions tests/rptest/redpanda_cloud_tests/high_throughput_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
'im4gn.large': 8 * GiB,
'im4gn.xlarge': 16 * GiB,
'im4gn.8xlarge': 128 * GiB,
'Standard_L8s_v3': 64 * GiB,
'Standard_L8as_v3': 64 * GiB,
'n2d-standard-2': 8 * GiB,
'n2d-standard-4': 16 * GiB,
'n2d-standard-16': 64 * GiB,
Expand Down
14 changes: 12 additions & 2 deletions tests/rptest/services/cloud_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def __init__(self, context, logger, infra_id, infra_secret, provider,
self.gcp_project_id = self._get_gcp_project_id(infra_id)
self.logger.info(f"Using GCP project '{self.gcp_project_id}'")
self.env.update({"GOOGLE_APPLICATION_CREDENTIALS": infra_id})
elif self.provider == 'azure':
self.subscription_id = context.globals['azure_subscription_id']
self.logger.debug(
f"Using Azure subscription ID: {self.subscription_id}")

def _get_gcp_project_id(self, keyfilepath):
project_id = None
Expand Down Expand Up @@ -131,8 +135,14 @@ def rpk_cloud_apply(self, cluster_id):
self.logger.debug("Deploying cluster agent")
cmd = self._get_rpk_cloud_cmd()
cmd += ["byoc", self.provider, "apply", f"--redpanda-id={cluster_id}"]
if self.provider == 'gcp':
cmd += ["--project-id=" + self.gcp_project_id]
match self.provider:
case 'gcp':
cmd += [f"--project-id={self.gcp_project_id}"]
case 'azure':
cmd += [
f"--subscription-id={self.subscription_id}",
"--identity=cli", "--credential-source=cli"
]
out = self._exec(cmd, timeout=1800)
# TODO: Handle errors
return out
Expand Down
10 changes: 10 additions & 0 deletions tests/rptest/services/machinetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class MachineTypeName(str, Enum):
IM4GN_XLARGE = 'im4gn.xlarge'
IM4GN_8XLARGE = 'im4gn.8xlarge'

# Azure X86
STANDARD_L8S_V3 = 'Standard_L8s_v3'
STANDARD_L8AS_V3 = 'Standard_L8as_v3'

# GCP X86
N2_STANDARD_2 = 'n2-standard-2'
N2_STANDARD_4 = 'n2-standard-4'
Expand Down Expand Up @@ -67,6 +71,12 @@ class MachineTypeConfig:
MachineTypeName.IM4GN_8XLARGE:
MachineTypeConfig(num_shards=31, memory=128 * GiB),

# Azure X86
MachineTypeName.STANDARD_L8S_V3:
MachineTypeConfig(num_shards=7, memory=64 * GiB),
MachineTypeName.STANDARD_L8AS_V3:
MachineTypeConfig(num_shards=7, memory=64 * GiB),

# GCP X86
MachineTypeName.N2_STANDARD_2:
MachineTypeConfig(num_shards=1, memory=8 * GiB),
Expand Down
6 changes: 5 additions & 1 deletion tests/rptest/services/provider_clients/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from rptest.services.provider_clients.ec2_client import EC2Client
from rptest.services.provider_clients.gcp_client import GCPClient
from rptest.services.provider_clients.azure_client import AzureClient


def make_provider_client(provider, logger, region, key, secret):
def make_provider_client(provider, logger, region, key, secret, tenant=None):
provider = provider.upper()
_client = None
if provider == 'AWS':
_client = EC2Client(region, key, secret, logger)
elif provider == 'GCP':
# In scope of GCP, key contains path to keyfile
_client = GCPClient(region, key, logger)
elif provider == 'AZURE':
_client = AzureClient(region, key, secret, tenant, logger)
return _client
Loading

0 comments on commit f2cdd7d

Please sign in to comment.