Skip to content

Commit

Permalink
Add az vm repair reset-nic command (#5117)
Browse files Browse the repository at this point in the history
  • Loading branch information
swbae31 authored Jul 28, 2022
1 parent 1dab6ae commit e85a4e9
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 26 deletions.
4 changes: 4 additions & 0 deletions src/vm-repair/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
Release History
===============

0.4.4
++++++
Add az vm repair reset-nic command

0.4.3
++++++
Adding a new distro option for creating the recovery VM, adding the detect for gen2 Linux machine and create a gen2 recovery VM
Expand Down
15 changes: 15 additions & 0 deletions src/vm-repair/azext_vm_repair/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,18 @@
text: >
az vm repair list-scripts --preview "https://github.com/haagha/repair-script-library/blob/master/map.json"
"""

helps['vm repair reset-nic'] = """
type: command
short-summary: Reset the network interface stack on the VM guest OS. https://docs.microsoft.com/en-us/troubleshoot/azure/virtual-machines/reset-network-interface
examples:
- name: Reset the VM guest NIC. Specify VM resource group and name.
text: >
az vm repair reset-nic -g MyResourceGroup -n MyVM --verbose
- name: Reset the VM guest NIC and auto-start the VM if it is not in running state.
text: >
az vm repair reset-nic -g MyResourceGroup -n MyVM --yes --verbose
- name: Reset the VM guest NIC. Specify VM resource id.
text: >
az vm repair reset-nic --ids /subscriptions/MySubscriptionId/resourceGroups/MyResourceGroup/providers/Microsoft.Compute/virtualMachines/MyVM --verbose
"""
3 changes: 3 additions & 0 deletions src/vm-repair/azext_vm_repair/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ def load_arguments(self, _):

with self.argument_context('vm repair list-scripts') as c:
c.argument('preview', help="URL of forked repair script library's map.json https://github.com/{user}/repair-script-library/blob/master/map.json")

with self.argument_context('vm repair reset-nic') as c:
c.argument('yes', help='Do not prompt for confirmation to start VM if it is not running.')
5 changes: 5 additions & 0 deletions src/vm-repair/azext_vm_repair/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ def validate_run(cmd, namespace):
raise CLIError('Repair resource id is not valid.')


def validate_reset_nic(cmd, namespace):
check_extension_version(EXTENSION_NAME)
_validate_and_get_vm(cmd, namespace.resource_group_name, namespace.vm_name)


def _prompt_encrypted_vm(namespace):
from knack.prompting import prompt_y_n, NoTTYException
try:
Expand Down
4 changes: 2 additions & 2 deletions src/vm-repair/azext_vm_repair/command_helper_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
VM_REPAIR_RUN_COMMAND = 'vm repair run'


class command_helper(object):
class command_helper:
"""
The command helper stores command state data and helper functions for vm-repair commands.
It will also execute needed functions at the start and end of commands such as sending telemetry data
Expand Down Expand Up @@ -117,7 +117,7 @@ def init_return_dict(self):
return self.return_dict


class script_data(object):
class script_data:
""" Stores repair script data. """
def __init__(self):
# Unique run-id
Expand Down
3 changes: 2 additions & 1 deletion src/vm-repair/azext_vm_repair/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# --------------------------------------------------------------------------------------------

# pylint: disable=line-too-long
from ._validators import validate_create, validate_restore, validate_run
from ._validators import validate_create, validate_restore, validate_run, validate_reset_nic


# pylint: disable=too-many-locals, too-many-statements
Expand All @@ -15,3 +15,4 @@ def load_command_table(self, _):
g.custom_command('restore', 'restore', validator=validate_restore)
g.custom_command('run', 'run', validator=validate_run)
g.custom_command('list-scripts', 'list_scripts')
g.custom_command('reset-nic', 'reset_nic', is_preview=True, validator=validate_reset_nic)
119 changes: 115 additions & 4 deletions src/vm-repair/azext_vm_repair/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
# --------------------------------------------------------------------------------------------

# pylint: disable=line-too-long, too-many-locals, too-many-statements, broad-except, too-many-branches
import json
import timeit
import traceback
import requests

from knack.log import get_logger

from azure.cli.command_modules.vm.custom import get_vm, _is_linux_os
from azure.cli.command_modules.storage.storage_url_helpers import StorageResourceIdentifier
from msrestazure.tools import parse_resource_id
from .exceptions import SkuDoesNotSupportHyperV

from .command_helper_class import command_helper
from .repair_utils import (
Expand All @@ -32,15 +33,15 @@
_fetch_disk_info,
_unlock_singlepass_encrypted_disk,
_invoke_run_command,
_check_hyperV_gen,
_get_cloud_init_script,
_select_distro_linux,
_check_linux_hyperV_gen,
_select_distro_linux_gen2,
_set_repair_map_url,
_is_gen2
_is_gen2,
_check_n_start_vm
)
from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError
from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError, SupportingResourceNotFoundError, CommandCanceledByUserError
logger = get_logger(__name__)


Expand Down Expand Up @@ -529,3 +530,113 @@ def list_scripts(cmd, preview=None):
return_dict['map'] = run_map

return return_dict


def reset_nic(cmd, vm_name, resource_group_name, yes=False):

# Init command helper object
command = command_helper(logger, cmd, 'vm repair reset-nic')
DYNAMIC_CONFIG = 'Dynamic'

try:
# 0) Check if VM is deallocated or off. If it is, ask to run start the VM.
VM_OFF_MESSAGE = 'VM is not running. The VM must be in running to reset its NIC.\n'
vm_instance_view = get_vm(cmd, resource_group_name, vm_name, 'instanceView')
VM_started = _check_n_start_vm(vm_name, resource_group_name, not yes, VM_OFF_MESSAGE, vm_instance_view)
if not VM_started:
raise CommandCanceledByUserError("Could not get consent to run VM before resetting the NIC.")

# 1) Fetch vm network info
logger.info('Fetching necessary VM network information to reset the NIC...\n')
# Fetch primary nic id. The primary field is null or true for primary nics.
get_primary_nic_id_command = 'az vm nic list -g {g} --vm-name {n} --query "[[?primary].id || [?primary==null].id][0][0]" -o tsv' \
.format(g=resource_group_name, n=vm_name)
primary_nic_id = _call_az_command(get_primary_nic_id_command)
if not primary_nic_id:
# Raise no primary nic excpetion
raise SupportingResourceNotFoundError('The primary NIC for the VM was not found on Azure.')
primary_nic_name = primary_nic_id.split('/')[-1]

# Get ip config info to get: vnet name, current private ip, ipconfig name, subnet id
get_primary_ip_config = 'az network nic ip-config list -g {g} --nic-name {nic_name} --query [[?primary]][0][0]' \
.format(g=resource_group_name, nic_name=primary_nic_name)
ip_config_string = _call_az_command(get_primary_ip_config)
if not ip_config_string:
# Raise primary ip_config not found
raise SupportingResourceNotFoundError('The primary IP configuration for the VM NIC was not found on Azure.')
ip_config_object = json.loads(ip_config_string)

subnet_id = ip_config_object['subnet']['id']
vnet_name = subnet_id.split('/')[-3]
ipconfig_name = ip_config_object['name']
orig_ip_address = ip_config_object['privateIpAddress']
# Dynamic | Static
orig_ip_allocation_method = ip_config_object['privateIpAllocationMethod']

# Get aviailable ip address within subnet
# Change to az network vnet subnet list-available-ips when it is available
get_available_ip_command = 'az network vnet list-available-ips -g {g} -n {vnet} --query [0] -o tsv' \
.format(g=resource_group_name, vnet=vnet_name)
swap_ip_address = _call_az_command(get_available_ip_command)
if not swap_ip_address:
# Raise available IP not found
raise SupportingResourceNotFoundError('Available IP address was not found within the VM subnet.')

# 3) Update private IP address to another in subnet. This will invoke and wait for a VM restart.
logger.info('Updating VM IP configuration. This might take a few minutes...\n')
# Update IP address
update_ip_command = 'az network nic ip-config update -g {g} --nic-name {nic} -n {config} --private-ip-address {ip} ' \
.format(g=resource_group_name, nic=primary_nic_name, config=ipconfig_name, ip=swap_ip_address)
_call_az_command(update_ip_command)

# 4) Change things back. This will also invoke and wait for a VM restart.
logger.info('NIC reset is complete. Now reverting back to your original configuration...\n')
# If user had dynamic config, change back to dynamic
revert_ip_command = None
if orig_ip_allocation_method == DYNAMIC_CONFIG:
# Revert Static to Dynamic
revert_ip_command = 'az network nic ip-config update -g {g} --nic-name {nic} -n {config} --set privateIpAllocationMethod={method}' \
.format(g=resource_group_name, nic=primary_nic_name, config=ipconfig_name, method=DYNAMIC_CONFIG)
else:
# Revert to original static ip
revert_ip_command = 'az network nic ip-config update -g {g} --nic-name {nic} -n {config} --private-ip-address {ip} ' \
.format(g=resource_group_name, nic=primary_nic_name, config=ipconfig_name, ip=orig_ip_address)

_call_az_command(revert_ip_command)
logger.info('VM guest NIC reset is complete and all configurations are reverted.')
# Some error happened. Stop command and revert back as needed.
except KeyboardInterrupt:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command interrupted by user input."
command.message = "Command interrupted by user input."
except AzCommandError as azCommandError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(azCommandError)
command.message = "Reset NIC failed."
except SupportingResourceNotFoundError as resourceError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(resourceError)
command.message = "Reset NIC could not be initiated."
except CommandCanceledByUserError as canceledError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(canceledError)
command.message = VM_OFF_MESSAGE
except Exception as exception:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = 'An unexpected error occurred. Try running again with the --debug flag to debug.'
else:
command.set_status_success()
command.message = 'VM guest NIC reset complete. The VM is in running state.'
finally:
if command.error_stack_trace:
logger.debug(command.error_stack_trace)
# Generate return object and log errors if needed
return_dict = command.init_return_dict()

return return_dict
8 changes: 8 additions & 0 deletions src/vm-repair/azext_vm_repair/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@ class ScriptReturnsError(Exception):

class SuseNotAvailableError(Exception):
"""Raised when SUSE image not available"""


class SupportingResourceNotFoundError(Exception):
"""Raised when a supporting resource needed for the command is not found"""


class CommandCanceledByUserError(Exception):
"""Raised when the command is canceled an user input"""
60 changes: 51 additions & 9 deletions src/vm-repair/azext_vm_repair/repair_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

# pylint: disable=line-too-long, deprecated-method, global-statement
# from logging import Logger # , log
import subprocess
import shlex
Expand All @@ -16,9 +16,7 @@
from knack.prompting import prompt_y_n, NoTTYException

from .encryption_types import Encryption

from .exceptions import (AzCommandError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, SuseNotAvailableError)
# pylint: disable=line-too-long, deprecated-method

REPAIR_MAP_URL = 'https://raw.githubusercontent.com/Azure/repair-script-library/master/map.json'

Expand Down Expand Up @@ -202,6 +200,53 @@ def _clean_up_resources(resource_group_name, confirm):
logger.error("Clean up failed.")


def _check_n_start_vm(vm_name, resource_group_name, confirm, vm_off_message, vm_instance_view):
"""
Checks if the VM is running and prompts to auto-start it.
Returns: True if VM is already running or succeeded in running it.
False if user selected not to run the VM or running in non-interactive mode.
Raises: AzCommandError if vm start command fails
Exception if something went wrong while fetching VM power state
"""
VM_RUNNING = 'PowerState/running'
try:
logger.info('Checking VM power state...\n')
VM_TURNED_ON = False
vm_statuses = vm_instance_view.instance_view.statuses
for vm_status in vm_statuses:
if vm_status.code == VM_RUNNING:
VM_TURNED_ON = True
# VM already on
if VM_TURNED_ON:
logger.info('VM is running\n')
return True

logger.warning(vm_off_message)
# VM Stopped or Deallocated. Ask to run it
if confirm:
if not prompt_y_n('Continue to auto-start VM?'):
logger.warning('Skipping VM start')
return False

start_vm_command = 'az vm start --resource-group {rg} --name {n}'.format(rg=resource_group_name, n=vm_name)
logger.info('Starting the VM. This might take a few minutes...\n')
_call_az_command(start_vm_command)
logger.info('VM started\n')
# NoTTYException exception only thrown from confirm block
except NoTTYException:
logger.warning('Cannot confirm VM auto-start in non-interactive mode.')
logger.warning('Skipping auto-start')
return False
except AzCommandError as azCommandError:
logger.error("Failed to start VM.")
raise azCommandError
except Exception as exception:
logger.error("Failed to check VM power status.")
raise exception
else:
return True


def _fetch_compatible_sku(source_vm, hyperv):

location = source_vm.location
Expand Down Expand Up @@ -318,13 +363,10 @@ def _check_linux_hyperV_gen(source_vm):
fetch_hypervgen_command = 'az vm get-instance-view --ids {id} --query "[instanceView.hyperVGeneration]" -o json'.format(id=source_vm.id)
hyperVGen_list = loads(_call_az_command(fetch_hypervgen_command))
hyperVGen = hyperVGen_list[0]
if hyperVGen == 'V2':
return hyperVGen
else:
if hyperVGen != 'V2':
hyperVGen = 'V1'
return hyperVGen
else:
return hyperVGen

return hyperVGen


def _secret_tag_check(resource_group_name, copy_disk_name, secreturl):
Expand Down
Loading

0 comments on commit e85a4e9

Please sign in to comment.