diff --git a/cluster-autoscaler/cloudprovider/azure/README.md b/cluster-autoscaler/cloudprovider/azure/README.md
index 9fd9a51a134e..540b0588ddff 100644
--- a/cluster-autoscaler/cloudprovider/azure/README.md
+++ b/cluster-autoscaler/cloudprovider/azure/README.md
@@ -151,7 +151,8 @@ Make a copy of [cluster-autoscaler-standard-master.yaml](examples/cluster-autosc
 
 In the `cluster-autoscaler` spec, find the `image:` field and replace `{{ ca_version }}` with a specific cluster autoscaler release.
 
-Below that, in the `command:` section, update the `--nodes=` arguments to reference your node limits and node pool name. For example, if node pool "k8s-nodepool-1" should scale from 1 to 10 nodes:
+Below that, in the `command:` section, update the `--nodes=` arguments to reference your node limits and node pool name (tips: node pool name is NOT availability set name, e.g., the corresponding node pool name of the availability set 
+`agentpool1-availabilitySet-xxxxxxxx` would be `agentpool1`). For example, if node pool "k8s-nodepool-1" should scale from 1 to 10 nodes:
 
 ```yaml
         - --nodes=1:10:k8s-nodepool-1
diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go
index 463f90663db3..642248ae261a 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go
@@ -36,6 +36,16 @@ import (
 	schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
 )
 
+var (
+	vmInstancesRefreshPeriod = 5 * time.Minute
+)
+
+var virtualMachinesStatusCache struct {
+	lastRefresh     time.Time
+	mutex           sync.Mutex
+	virtualMachines []compute.VirtualMachine
+}
+
 // AgentPool implements NodeGroup interface for agent pools deployed by aks-engine.
 type AgentPool struct {
 	azureRef
@@ -117,9 +127,32 @@ func (as *AgentPool) MaxSize() int {
 	return as.maxSize
 }
 
+func (as *AgentPool) getVirtualMachinesFromCache() ([]compute.VirtualMachine, error) {
+	virtualMachinesStatusCache.mutex.Lock()
+	defer virtualMachinesStatusCache.mutex.Unlock()
+
+	if virtualMachinesStatusCache.lastRefresh.Add(vmInstancesRefreshPeriod).After(time.Now()) {
+		return virtualMachinesStatusCache.virtualMachines, nil
+	}
+
+	vms, err := as.GetVirtualMachines()
+	if err != nil {
+		if isAzureRequestsThrottled(err) {
+			klog.Warningf("getAllVirtualMachines: throttling with message %v, would return the cached vms", err)
+			return virtualMachinesStatusCache.virtualMachines, nil
+		}
+
+		return []compute.VirtualMachine{}, err
+	}
+	virtualMachinesStatusCache.virtualMachines = vms
+	virtualMachinesStatusCache.lastRefresh = time.Now()
+
+	return vms, err
+}
+
 // GetVMIndexes gets indexes of all virtual machines belonging to the agent pool.
 func (as *AgentPool) GetVMIndexes() ([]int, map[int]string, error) {
-	instances, err := as.GetVirtualMachines()
+	instances, err := as.getVirtualMachinesFromCache()
 	if err != nil {
 		return nil, nil, err
 	}
@@ -266,7 +299,7 @@ func (as *AgentPool) DecreaseTargetSize(delta int) error {
 	as.mutex.Lock()
 	defer as.mutex.Unlock()
 
-	nodes, err := as.GetVirtualMachines()
+	nodes, err := as.getVirtualMachinesFromCache()
 	if err != nil {
 		return err
 	}
@@ -391,7 +424,7 @@ func (as *AgentPool) TemplateNodeInfo() (*schedulernodeinfo.NodeInfo, error) {
 
 // Nodes returns a list of all nodes that belong to this node group.
 func (as *AgentPool) Nodes() ([]cloudprovider.Instance, error) {
-	instances, err := as.GetVirtualMachines()
+	instances, err := as.getVirtualMachinesFromCache()
 	if err != nil {
 		return nil, err
 	}