Skip to content

Commit

Permalink
[PLAT-4899]Basic validation of certificates
Browse files Browse the repository at this point in the history
Summary:
Ticket - https://yugabyte.atlassian.net/browse/PLAT-4899
Design doc - https://docs.google.com/document/d/18MVJUDT_QJm0HPc2dP7aitrj_of7O0CZ33e4PyKFlwM/edit
Adding a new subtask for validating the customCA config (customCertHostPath) for an onprem universe. The subtask will be run on create universe, create RR, edit universe, add nodes, certs rotate and toggle tls. We had some existing checks that were part of preflight checks but we never used to run them, moved those checks to a new subtask in order to run them for certs rotate and toggle TLS as well.

Adding all certificate related validations during add certificate behind a runtimeConfig `yb.tlscertificate.enableValidation`.
Adding all certificate related validation for universes behing `yb.tls.skip_cert_validation` (existing config)

Corrected the certificates in unit tests (the certs didn't have CA: True)

Test Plan:
Tested following flows
  - Created a valid CA certificate config for self-signed, customCA and cert-manger
  - Created an invalid CA certificate config for self-signed, customCA and cert-manger -> YBA blocks it
  - Created onprem universe with valid CA config
  - Created onprem universe with invalid CAconfiguration
  - Ran AddReadReplica, ExpandUniverse, FullMove, CertificateRotation, AddNode tasks
  - Onprem universe creation using an invalid certificateConfig with RuntimeConfig set to False
  - Ran ToggleTls tests
  - Verified that we are using ssh for verifying certs if nodes are not provisioned and using NodeAgent when nodes are provisioned.

Reviewers: #yba-api-review!, svarshney, asharma, yshchetinin, sanketh

Reviewed By: svarshney

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D36271
  • Loading branch information
Arpit-yb committed Sep 5, 2024
1 parent 7d8fc76 commit 4c6cf5a
Show file tree
Hide file tree
Showing 27 changed files with 838 additions and 332 deletions.
1 change: 1 addition & 0 deletions managed/RUNTIME-FLAGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
| "Enforced User Tags List" | "yb.universe.user_tags.enforced_tags" | "CUSTOMER" | "A list of enforced user tag and accepted value pairs during universe creation. Pass '*' to accept all values for a tag. Ex: [\"yb_task:dev\",\"yb_task:test\",\"yb_owner:*\",\"yb_dept:eng\",\"yb_dept:qa\", \"yb_dept:product\", \"yb_dept:sales\"]" | "Key Value SetMultimap" |
| "Enable IMDSv2" | "yb.aws.enable_imdsv2_support" | "CUSTOMER" | "Enable IMDSv2 support for AWS providers" | "Boolean" |
| "Backup Garbage Collector Number of Retries" | "yb.backupGC.number_of_retries" | "CUSTOMER" | "Number of retries during backup deletion" | "Integer" |
| "Enable Certificate Config Validation" | "yb.tls.enable_config_validation" | "CUSTOMER" | "Certificate configuration validation during the addition of new certificates." | "Boolean" |
| "Allow Unsupported Instances" | "yb.internal.allow_unsupported_instances" | "PROVIDER" | "Enabling removes supported instance type filtering on AWS providers." | "Boolean" |
| "Default AWS Instance Type" | "yb.aws.default_instance_type" | "PROVIDER" | "Default AWS Instance Type" | "String" |
| "Default GCP Instance Type" | "yb.gcp.default_instance_type" | "PROVIDER" | "Default GCP Instance Type" | "String" |
Expand Down
80 changes: 65 additions & 15 deletions managed/devops/opscli/ybops/cloud/common/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,17 +406,33 @@ def __verify_certs_hostname(self, node_crt_path, connect_options):
"'{}' does not match with any entry in CN or SAN of the node cert: {}, "
"cert_cn: {}, cert_san: {}".format(host, node_crt_path, cert_cn, cert_san))

def verify_certs(self, root_crt_path, node_crt_path, connect_options, verify_hostname=False):
def verify_certs(self, root_crt_path, server_crt_path, server_key_path, connect_options,
verify_hostname=False, perform_extended_validation=False):
remote_shell = RemoteShell(connect_options)
# Verify that both cert are present in FS and have read rights

# Verify that the root cert, server certs and keys are not empty strings or None
if not root_crt_path or not server_crt_path or not server_key_path:
raise YBOpsRuntimeError(
"Path to root cert: {}, server cert: {} or key: {} is empty."
.format(root_crt_path, server_crt_path, server_key_path))

# Verify that both cert and the key are present in FS and have read rights
root_file_verify = remote_shell.run_command_raw("test -r {}".format(root_crt_path))
if root_file_verify.exited == 1:
raise YBOpsRuntimeError(
"Root cert: {} is absent or is not readable.".format(root_crt_path))
node_file_verify = remote_shell.run_command_raw("test -r {}".format(node_crt_path))
if node_file_verify.exited == 1:

cert_file_verify = remote_shell.run_command_raw("test -r {}".format(server_crt_path))
if cert_file_verify.exited == 1:
raise YBOpsRuntimeError(
"Server cert: {} is absent or is not readable.".format(server_crt_path))

key_file_verify = remote_shell.run_command_raw("test -r {}".format(server_key_path))
if key_file_verify.exited == 1:
raise YBOpsRuntimeError(
"Node cert: {} is absent or is not readable.".format(node_crt_path))
"Key file: {} is absent or is not readable.".format(server_key_path))

# Verify OpenSSL is installed
try:
remote_shell.run_command('which openssl')
except YBOpsRuntimeError:
Expand All @@ -427,29 +443,63 @@ def verify_certs(self, root_crt_path, node_crt_path, connect_options, verify_hos
cert_verify = remote_shell.run_command_raw(
("openssl crl2pkcs7 -nocrl -certfile {} -certfile {} "
"| openssl pkcs7 -print_certs -text -noout")
.format(root_crt_path, node_crt_path))
.format(root_crt_path, server_crt_path))
if cert_verify.exited == 1:
raise YBOpsRuntimeError(
"Provided certs ({}, {}) are not valid."
.format(root_crt_path, node_crt_path))
.format(root_crt_path, server_crt_path))

# Verify if the node cert is not expired
# Verify if the server cert is not expired
validity_verify = remote_shell.run_command_raw(
"openssl x509 -noout -checkend 86400 -in {}".format(node_crt_path))
"openssl x509 -noout -checkend 86400 -in {}".format(server_crt_path))
if validity_verify.exited == 1:
raise YBOpsRuntimeError(
"Node cert: {} is expired or will expire in one day.".format(node_crt_path))
"Node cert: {} is expired or will expire in one day.".format(server_crt_path))

# Verify if node cert has valid signature
# Verify if node cert has valid signature and the certificate chain is valid
signature_verify = remote_shell.run_command_raw(
"openssl verify -CAfile {} {} | egrep error".format(root_crt_path, node_crt_path))
"openssl verify -CAfile {} {} | egrep error".format(root_crt_path, server_crt_path))
if signature_verify.exited != 1:
raise YBOpsRuntimeError(
"Node cert: {} is not signed by the provided root cert: {}.".format(node_crt_path,
"Node cert: {} is not signed by the provided root cert: {}.".format(server_crt_path,
root_crt_path))
if perform_extended_validation:
# Verify both certificates are RSA
for crt_path, cert_name in [(root_crt_path, "Root"), (server_crt_path, "Server")]:
output = remote_shell.run_command_raw(f"openssl x509 -in {crt_path} -noout -text")
if output.exited != 0:
raise YBOpsRuntimeError(
f"Failed to read {cert_name.lower()} certificate - {crt_path}.")
if "RSA Public-Key" not in output.stdout:
raise YBOpsRuntimeError(
f"{cert_name} certificate - {crt_path} is not of RSA algorithm.")

# Verify server key is RSA and at least 2048 bits
key_info = remote_shell.run_command_raw(
f"openssl rsa -in {server_key_path} -noout -text")
if key_info.exited != 0:
raise YBOpsRuntimeError(f"Key file: {server_key_path} is not using RSA algorithm.")

# Extract and verify the key size
key_size = int(key_info.stdout.split("Private-Key: (")[1].split()[0].strip())
logging.info(f"Key size: {key_size}")
if key_size < 2048:
raise YBOpsRuntimeError(
f"RSA key size in server cert at {server_key_path} is less than 2048 bits.")

# Verify key signs the certificate
cert_md5 = remote_shell.run_command_raw(
"openssl x509 -noout -modulus -in {} | openssl md5".format(server_crt_path)).stdout
key_md5 = remote_shell.run_command_raw(
"openssl rsa -noout -modulus -in {} | openssl md5".format(
server_key_path)
).stdout
if cert_md5 != key_md5:
raise YBOpsRuntimeError(
"Server certificate and server key do not match.")

if verify_hostname:
self.__verify_certs_hostname(node_crt_path, connect_options)
self.__verify_certs_hostname(server_crt_path, connect_options)

def copy_server_certs(
self,
Expand Down Expand Up @@ -510,7 +560,7 @@ def copy_server_certs(
logging.info(
"Skipping host name validation for certs for node {}".format(node_ip))
verify_hostname = False
self.verify_certs(root_cert_path, server_cert_path,
self.verify_certs(root_cert_path, server_cert_path, server_key_path,
connect_options, verify_hostname)
if copy_root:
remote_shell.run_command("cp '{}' '{}'".format(root_cert_path,
Expand Down
4 changes: 3 additions & 1 deletion managed/devops/opscli/ybops/cloud/onprem/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from ybops.cloud.onprem.method import OnPremCreateInstancesMethod, OnPremDestroyInstancesMethod, \
OnPremProvisionInstancesMethod, OnPremValidateMethod, \
OnPremFillInstanceProvisionTemplateMethod, OnPremListInstancesMethod, \
OnPremPrecheckInstanceMethod, OnPremAccessAddKeyMethod, OnPremInstallNodeAgentMethod
OnPremPrecheckInstanceMethod, OnPremAccessAddKeyMethod, OnPremInstallNodeAgentMethod, \
OnPremVerifyCertificatesMethod


class OnPremInstanceCommand(InstanceCommand):
Expand Down Expand Up @@ -47,6 +48,7 @@ def add_methods(self):
self.add_method(WaitForConnection(self))
self.add_method(OnPremInstallNodeAgentMethod(self))
self.add_method(ManageOtelCollector(self))
self.add_method(OnPremVerifyCertificatesMethod(self))


class OnPremAccessCommand(AccessCommand):
Expand Down
151 changes: 93 additions & 58 deletions managed/devops/opscli/ybops/cloud/onprem/method.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,31 +271,6 @@ def add_extra_args(self):
self.parser.add_argument("--skip_ntp_check", action="store_true",
help='Skip check for time synchronization.')

def verify_certificates(self, cert_type, root_cert_path, cert_path, key_path, connect_options,
skip_cert_validation, results):
result_var = True
remote_shell = RemoteShell(connect_options)
if not self.test_file_readable(remote_shell, root_cert_path):
results["File {} is present and readable".format(root_cert_path)] = False
result_var = False
if not self.test_file_readable(remote_shell, cert_path):
results["File {} is present and readable".format(cert_path)] = False
result_var = False
if not self.test_file_readable(remote_shell, key_path):
results["File {} is present and readable".format(key_path)] = False
result_var = False
if result_var and skip_cert_validation != 'ALL':
try:
self.cloud.verify_certs(root_cert_path, cert_path, connect_options,
skip_cert_validation != 'HOSTNAME')
except YBOpsRuntimeError as e:
result_var = False
results["Check {} certificate".format(cert_type)] = str(e)

if result_var:
results["Check {} certificate".format(cert_type)] = True
return result_var

def test_file_readable(self, remote_shell, path):
node_file_verify = remote_shell.run_command_raw("test -r {}".format(path))
return node_file_verify.exited == 0
Expand Down Expand Up @@ -327,39 +302,6 @@ def callback(self, args):
remote_tmp_dir=args.remote_tmp_dir)
results["SSH Connection"] = scp_result == 0

connect_options = {}
connect_options.update(self.extra_vars)
connect_options.update({
"ssh_user": "yugabyte",
"node_agent_user": "yugabyte"
})

if args.root_cert_path is not None:
self.verify_certificates("Server", args.root_cert_path,
args.server_cert_path,
args.server_key_path,
connect_options,
args.skip_cert_validation,
results)

if args.root_cert_path_client_to_server is not None:
self.verify_certificates("Server clientRootCA", args.root_cert_path_client_to_server,
args.server_cert_path_client_to_server,
args.server_key_path_client_to_server,
connect_options,
args.skip_cert_validation,
results)

if args.client_cert_path is not None:
root_cert_path = args.root_cert_path_client_to_server \
if args.root_cert_path_client_to_server is not None else args.root_cert_path
self.verify_certificates("Client", root_cert_path,
args.client_cert_path,
args.client_key_path,
connect_options,
'HOSTNAME', # not checking hostname for that serts
results)

sudo_pass_file = '{}/.yb_sudo_pass.sh'.format(args.remote_tmp_dir)
self.extra_vars['sudo_pass_file'] = sudo_pass_file
ansible_status = self.cloud.setup_ansible(args).run("send_sudo_pass.yml",
Expand Down Expand Up @@ -651,3 +593,96 @@ def install_node_agent(self, args):
remote_shell.run_command(service_cmd)
finally:
remote_shell.close()


class OnPremVerifyCertificatesMethod(AbstractInstancesMethod):
def __init__(self, base_command):
super(OnPremVerifyCertificatesMethod, self).__init__(base_command, "verify_certs")

def add_extra_args(self):
super(OnPremVerifyCertificatesMethod, self).add_extra_args()
# NodeToNode certificates
self.parser.add_argument("--root_cert_path", type=str, required=False, default=None)
self.parser.add_argument("--yba_root_cert_checksum", type=str, required=False, default=None)
self.parser.add_argument("--node_server_cert_path", type=str, required=False, default=None)
self.parser.add_argument("--node_server_key_path", type=str, required=False, default=None)
# ClientToNode certificates
# client_rootCA and yba_client_root_cert_checksum are not required if
# they are same as rootCA
self.parser.add_argument("--client_root_cert_path", type=str, required=False,
default=None)
self.parser.add_argument("--yba_client_root_cert_checksum", type=str, required=False,
default=None)
self.parser.add_argument("--client_server_cert_path", type=str, required=False,
default=None)
self.parser.add_argument("--client_server_key_path", type=str, required=False, default=None)

self.parser.add_argument("--skip_hostname_check", action="store_true")

def verify_custom_certificate_checksum(self, root_cert_path, yba_cert_checksum,
connect_options):
remote_shell = RemoteShell(connect_options)
root_ca_checksum_response = remote_shell.run_command_raw("md5sum {}".format(root_cert_path))
if root_ca_checksum_response.exited != 0:
raise YBOpsRuntimeError("Failed to get checksum for root CA certificate")
root_ca_checksum = root_ca_checksum_response.stdout.split()[0]
if root_ca_checksum != yba_cert_checksum:
raise YBOpsRuntimeError(
"Root Certificate on the node doesn't match the certificate given to YBA.")

def verify_certificates(self, cert_type, root_cert_path, yba_cert_checksum, cert_path,
key_path, connect_options, verify_hostname, results):
"""
This validation is only used for onprem + customCertHostPath certs.
For more generic validation, use the verify_certs method in cloud.py
and in CertificateHelper.java class.
"""
result_var = True
try:
# Do all the basic checks here
self.cloud.verify_certs(root_cert_path, cert_path, key_path, connect_options,
verify_hostname, perform_extended_validation=True)
# Do the CA checksum check here since its unique to onprem + customCertHostPath certs
self.verify_custom_certificate_checksum(root_cert_path, yba_cert_checksum,
connect_options)
except YBOpsRuntimeError as e:
result_var = False
results["{} certificate".format(cert_type)] = str(e)
if result_var:
results["{} certificate".format(cert_type)] = True

def callback(self, args):
host_info = self.cloud.get_host_info(args)
if not host_info:
raise YBOpsRuntimeError("Instance: {} does not exist, cannot verify certificates"
.format(args.search_pattern))
self.update_ansible_vars_with_args(args)
self.extra_vars.update(self.get_server_host_port(host_info, args.custom_ssh_port))
results = {}
# Verify NodeToNode certificates
if args.root_cert_path is not None:
self.verify_certificates("RootCA", args.root_cert_path,
args.yba_root_cert_checksum,
args.node_server_cert_path,
args.node_server_key_path,
self.extra_vars,
not args.skip_hostname_check,
results)

# Verify ClientToNode certificates
if args.client_root_cert_path is not None:
self.verify_certificates("ClientRootCA", args.client_root_cert_path,
args.yba_client_root_cert_checksum,
args.client_server_cert_path,
args.client_server_key_path,
self.extra_vars,
not args.skip_hostname_check,
results)

for key, value in results.items():
logging.debug("Certificate verification result: {}: {}".format(key, value))
if value is not True:
logging.error(json.dumps(results, indent=2))
raise YBOpsRuntimeError(
"Certificate verification for {} on node - {} failed with error: {}".format(
key, args.search_pattern, value))
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.NodeDetails;
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
import java.util.Collection;
import java.util.Collections;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import javax.annotation.Nullable;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;

Expand Down Expand Up @@ -116,6 +119,15 @@ private void configureNodeDetails(Universe universe) {
EncryptionInTransitUtil.isClientRootCARequired(taskParams())
? taskParams().getClientRootCA()
: null);

createCheckCertificateConfigTask(
Collections.singleton(cluster),
Collections.singleton(currentNodeClone),
taskParams().rootCA,
EncryptionInTransitUtil.isClientRootCARequired(taskParams())
? taskParams().getClientRootCA()
: null,
userIntent.enableClientToNodeEncrypt);
}
}

Expand Down Expand Up @@ -328,4 +340,14 @@ public void run() {
}
log.info("Finished {} task.", getName());
}

public void createCheckCertificateConfigTask(
Collection<Cluster> clusters,
Set<NodeDetails> nodes,
@Nullable UUID rootCA,
@Nullable UUID clientRootCA,
boolean enableClientToNodeEncrypt) {
createCheckCertificateConfigTask(
clusters, nodes, rootCA, clientRootCA, enableClientToNodeEncrypt, null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ public void run() {
// Create preflight node check tasks for on-prem nodes.
createPreflightNodeCheckTasks(universe, Collections.singletonList(cluster));

// Add check for certificateConfig
createCheckCertificateConfigTask(universe, Collections.singletonList(cluster));

// Create the nodes.
// State checking is enabled because the subtasks are not idempotent.
createCreateNodeTasks(
Expand Down
Loading

0 comments on commit 4c6cf5a

Please sign in to comment.