From 3996f556393bf9bdcff35a53ebc52aa450dc7452 Mon Sep 17 00:00:00 2001 From: Shubham Varshney Date: Tue, 16 Jul 2024 10:40:00 +0530 Subject: [PATCH] [YNP][PLAT-14664] make node register to provider idempotent Summary: [YNP] make node register to provider idempotent. Also improves the preflight check for the modules Test Plan: Verified that node-agent registration is idempotent. Verified that preflight checks are successful. Reviewers: anijhawan, nbhatia Reviewed By: anijhawan Subscribers: yugaware Differential Revision: https://phorge.dev.yugabyte.com/D36542 --- .../ynp/commands/provision_command.py | 2 +- .../configure_os/templates/precheck.j2 | 11 ++- .../provision/network/templates/precheck.j2 | 6 +- .../provision/node_agent/node_agent.py | 4 +- .../provision/node_agent/templates/run.j2 | 98 ++++++++++++------- .../provision/systemd/templates/precheck.j2 | 2 +- .../provision/yugabyte/templates/run.j2 | 5 +- 7 files changed, 83 insertions(+), 45 deletions(-) diff --git a/managed/node-agent/resources/ynp/commands/provision_command.py b/managed/node-agent/resources/ynp/commands/provision_command.py index e8021a809545..fe9cada25212 100644 --- a/managed/node-agent/resources/ynp/commands/provision_command.py +++ b/managed/node-agent/resources/ynp/commands/provision_command.py @@ -89,7 +89,7 @@ def _generate_template(self): precheck_combined_script = self._build_script(all_templates, "precheck") run_combined_script = self._build_script(all_templates, "run") - return precheck_combined_script, run_combined_script + return run_combined_script, precheck_combined_script def _check_package(self, package_manager, package_name): """Check if a package is installed.""" diff --git a/managed/node-agent/resources/ynp/modules/provision/configure_os/templates/precheck.j2 b/managed/node-agent/resources/ynp/modules/provision/configure_os/templates/precheck.j2 index fe8cb966cfc8..df778ff4edd6 100644 --- a/managed/node-agent/resources/ynp/modules/provision/configure_os/templates/precheck.j2 +++ b/managed/node-agent/resources/ynp/modules/provision/configure_os/templates/precheck.j2 @@ -30,7 +30,7 @@ check_ulimit() { verify_systemd_conf() { local file=$1 local setting=$2 - if grep -q "$setting" $file; then + if grep -q "$setting" "$file"; then echo "[PASS] $setting is set in $file" else echo "[FAIL] $setting is not set in $file" @@ -44,7 +44,7 @@ verify_sysctl() { local expected_value=$2 local current_value - current_value=$(sysctl -n $param) + current_value=$(sysctl -n "$param") if [ "$current_value" -eq "$expected_value" ]; then echo "[PASS] $param is set to $current_value (expected: $expected_value)" @@ -55,9 +55,10 @@ verify_sysctl() { } # Verify ulimit settings -{% for key, value in limits.items() %} -check_ulimit "{{ key.replace('_', ' ') }}" "{{ value }}" || exit 1 -{%- endfor -%} +for key in "${!limits[@]}"; do + local key_name="$key" + check_ulimit "$key_name" "${limits[$key]}" || exit 1 +done # Verify DefaultLimitNOFILE in systemd configuration files verify_systemd_conf /etc/systemd/system.conf "DefaultLimitNOFILE={{ fd_limit }}" || exit 1 diff --git a/managed/node-agent/resources/ynp/modules/provision/network/templates/precheck.j2 b/managed/node-agent/resources/ynp/modules/provision/network/templates/precheck.j2 index 4a7e1bf9c79f..aba037e41b1f 100644 --- a/managed/node-agent/resources/ynp/modules/provision/network/templates/precheck.j2 +++ b/managed/node-agent/resources/ynp/modules/provision/network/templates/precheck.j2 @@ -39,6 +39,8 @@ for port in "${ports[@]}"; do # Check if the port is open check_port "$vm_ip" "$port" - # Stop the server - kill $server_pid + # Stop the server if the PID exists + if ps -p $server_pid > /dev/null; then + kill $server_pid + fi done diff --git a/managed/node-agent/resources/ynp/modules/provision/node_agent/node_agent.py b/managed/node-agent/resources/ynp/modules/provision/node_agent/node_agent.py index 83bdc8d48199..0ef27cd2b52c 100644 --- a/managed/node-agent/resources/ynp/modules/provision/node_agent/node_agent.py +++ b/managed/node-agent/resources/ynp/modules/provision/node_agent/node_agent.py @@ -89,7 +89,7 @@ def _generate_provider_update_payload(self, context, provider): }) provider['regions'] = regions return provider - + def _generate_instance_type_payload(self, context): time_stamp = int(time.time()) instance_data = { @@ -130,7 +130,7 @@ def _generate_add_node_payload(self, context): } return node_add_payload - + def _get_provider(self, context): provider_url = self._get_provider_url(context) yba_url = context.get('url') diff --git a/managed/node-agent/resources/ynp/modules/provision/node_agent/templates/run.j2 b/managed/node-agent/resources/ynp/modules/provision/node_agent/templates/run.j2 index 935c4ba6b117..25be95228821 100644 --- a/managed/node-agent/resources/ynp/modules/provision/node_agent/templates/run.j2 +++ b/managed/node-agent/resources/ynp/modules/provision/node_agent/templates/run.j2 @@ -38,6 +38,13 @@ _add_node_to_provider() { echo "${yba_url}/api/v1/customers/${customer_uuid}/zones/${zone_uuid}/nodes" } +_get_nodes_in_provider() { + local yba_url=$1 + local customer_uuid=$2 + local provider_uuid=$3 + echo "${yba_url}/api/v1/customers/${customer_uuid}/providers/${provider_uuid}/nodes/list" +} + _get_headers() { local token=$1 echo "Accept: application/json" @@ -184,54 +191,79 @@ su - {{ yb_user }} -c "\"$installer_dir/node-agent-installer.sh\" -c install -u "$installer_dir/node-agent-installer.sh" -c install_service --user yugabyte if test -f "{{ tmp_directory }}/add_node_to_provider.json"; then - get_provider_endpoint=$(_get_provider_url $yba_url $customer_uuid $provider_id) - # Perform GET request to fetch provider data by UUID - response=$(curl -s -w "%{http_code}" -o response.txt -X GET "${header_options[@]}" $tls_verify_option "$get_provider_endpoint") + get_nodes_in_provider=$(_get_nodes_in_provider $yba_url $customer_uuid $provider_id) + # Perform GET request to fetch all the nodes associated with provider + response=$(curl -s -w "%{http_code}" -o response.txt -X GET "${header_options[@]}" $tls_verify_option "$get_nodes_in_provider") http_status="${response:(-3)}" response_body=$( temp_response.txt - - # Use a while loop with a file to avoid subshell - while read -r zone; do - zone_code=$(echo "$zone" | grep -oP '"code":\s*"\K[^"]+') - uuid=$(echo "$zone" | grep -oP '"uuid":\s*"\K[^"]+') - - if [ "$zone_code" == "{{ provider_region_zone_name }}" ]; then - zone_uuid="$uuid" - echo "Match found: Zone Code = $zone_code, UUID = $zone_uuid" + # Extract IPs using sed and grep + ips=$(echo "$response_body" | sed -n 's/.*"ip":"\([^"]*\)".*/\1/p') + for ip in $ips; do + if [[ "$ip" == "{{ node_ip }}" ]]; then + matched=true break fi - done < <(grep -oP '"zones":\s*\[[^]]*\]' temp_response.txt | grep -oP '{[^}]*}') - - # Check if zone_uuid was found - if [ -z "$zone_uuid" ]; then - echo "Zone with code {{ provider_region_zone_name }} not found" - exit 1 - fi + done + else + echo "Error: GET request failed with HTTP status $http_status" + exit 1 + fi - add_node_data=$(cat "{{ tmp_directory }}/add_node_to_provider.json") - add_node_url=$(_add_node_to_provider $yba_url $customer_uuid $zone_uuid) - response=$(curl -s -w "%{http_code}" -o response.txt -X POST "${header_options[@]}" -d "$add_node_data" $tls_verify_option "$add_node_url") + if [[ "$matched" == false ]]; then + get_provider_endpoint=$(_get_provider_url $yba_url $customer_uuid $provider_id) + # Perform GET request to fetch provider data by UUID + response=$(curl -s -w "%{http_code}" -o response.txt -X GET "${header_options[@]}" $tls_verify_option "$get_provider_endpoint") http_status="${response:(-3)}" + response_body=$( temp_response.txt + + # Use a while loop with a file to avoid subshell + while read -r zone; do + zone_code=$(echo "$zone" | grep -oP '"code":\s*"\K[^"]+') + uuid=$(echo "$zone" | grep -oP '"uuid":\s*"\K[^"]+') + + if [ "$zone_code" == "{{ provider_region_zone_name }}" ]; then + zone_uuid="$uuid" + echo "Match found: Zone Code = $zone_code, UUID = $zone_uuid" + break + fi + done < <(grep -oP '"zones":\s*\[[^]]*\]' temp_response.txt | grep -oP '{[^}]*}') + + # Check if zone_uuid was found + if [ -z "$zone_uuid" ]; then + echo "Zone with code {{ provider_region_zone_name }} not found" + exit 1 + fi + + add_node_data=$(cat "{{ tmp_directory }}/add_node_to_provider.json") + add_node_url=$(_add_node_to_provider $yba_url $customer_uuid $zone_uuid) + response=$(curl -s -w "%{http_code}" -o response.txt -X POST "${header_options[@]}" -d "$add_node_data" $tls_verify_option "$add_node_url") + http_status="${response:(-3)}" + + if [ $http_status -ge 200 ] && [ $http_status -lt 300 ]; then + echo "Node added successfully" + else + echo "Error: POST request failed with HTTP status $http_status" + exit 1 + fi else echo "Error: POST request failed with HTTP status $http_status" exit 1 fi - else - echo "Error: POST request failed with HTTP status $http_status" - exit 1 fi fi diff --git a/managed/node-agent/resources/ynp/modules/provision/systemd/templates/precheck.j2 b/managed/node-agent/resources/ynp/modules/provision/systemd/templates/precheck.j2 index f1b254712a5b..5ab67bce1eb6 100644 --- a/managed/node-agent/resources/ynp/modules/provision/systemd/templates/precheck.j2 +++ b/managed/node-agent/resources/ynp/modules/provision/systemd/templates/precheck.j2 @@ -1,6 +1,6 @@ systemd_dir="/etc/systemd/system" -{% if not use_system_level_systemd %} +{% if use_system_level_systemd == 'False' %} systemd_dir="{{yb_home_dir}}/.config/systemd/user" {% endif %} diff --git a/managed/node-agent/resources/ynp/modules/provision/yugabyte/templates/run.j2 b/managed/node-agent/resources/ynp/modules/provision/yugabyte/templates/run.j2 index a5e00a01739c..1751ae8ba135 100644 --- a/managed/node-agent/resources/ynp/modules/provision/yugabyte/templates/run.j2 +++ b/managed/node-agent/resources/ynp/modules/provision/yugabyte/templates/run.j2 @@ -37,7 +37,7 @@ if [[ "$platform_id" == "platform:el8" ]]; then SELINUX_STATUS=$(sestatus | grep 'SELinux status' | awk '{print $3}') if [[ "$SELINUX_STATUS" == "enabled" ]]; then # Configuring the correct SELinux context - current_context=$(ls -Zd "{{ yb_home_dir }}" | awk '{print $1}' | cut -d: -f3') + current_context=$(ls -Zd "{{ yb_home_dir }}" | awk '{print $1}' | cut -d: -f3) if [[ "$current_context" != "ssh_home_t" ]]; then chcon -R -t ssh_home_t "{{ yb_home_dir }}" echo "SELinux context for {{ yb_home_dir }} changed to ssh_home_t" @@ -78,3 +78,6 @@ if [ -z {{ public_key_filepath }} ]; then exit 1 fi fi + +# Ensure the permissions for yb_home_dir are 711 +chmod -R 711 "{{ yb_home_dir }}"