Skip to content

Commit

Permalink
Merge pull request #2515 from losipiuk/lo/ca-update-vendor-master
Browse files Browse the repository at this point in the history
Revendor CA against tip of k8s.io/kubernetes
  • Loading branch information
k8s-ci-robot authored Nov 22, 2019
2 parents a56fe5d + 8161ca6 commit 3413247
Show file tree
Hide file tree
Showing 2,112 changed files with 139,811 additions and 51,853 deletions.
9 changes: 5 additions & 4 deletions cluster-autoscaler/cloudprovider/gce/gce_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ limitations under the License.
package gce

import (
"github.com/prometheus/client_golang/prometheus"
k8smetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)

const (
Expand All @@ -26,8 +27,8 @@ const (

var (
/**** Metrics related to GCE API usage ****/
requestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
requestCounter = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "gce_request_count",
Help: "Counter of GCE API requests for each verb and API resource.",
Expand All @@ -37,7 +38,7 @@ var (

// RegisterMetrics registers all GCE metrics.
func RegisterMetrics() {
prometheus.MustRegister(requestCounter)
legacyregistry.MustRegister(requestCounter)
}

// registerRequest registers request to GCE API.
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/gce/templates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ func makeResourceList2(cpu int64, memory int64, gpu int64, pods int64) (apiv1.Re

func assertEqualResourceLists(t *testing.T, name string, expected, actual apiv1.ResourceList) {
t.Helper()
assert.True(t, quota.V1Equals(expected, actual),
assert.True(t, quota.Equals(expected, actual),
"%q unequal:\nExpected: %v\nActual: %v", name, stringifyResourceList(expected), stringifyResourceList(actual))
}

Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/filter_out_schedulable.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func filterOutSchedulableByPacking(unschedulableCandidates []*apiv1.Pod, nodes [
nodeNameToNodeInfo := schedulerutil.CreateNodeNameToInfoMap(nonExpendableScheduled, nodes)

sort.Slice(unschedulableCandidates, func(i, j int) bool {
return util.GetPodPriority(unschedulableCandidates[i]) > util.GetPodPriority(unschedulableCandidates[j])
return util.MoreImportantPod(unschedulableCandidates[i], unschedulableCandidates[j])
})

for _, pod := range unschedulableCandidates {
Expand Down
19 changes: 10 additions & 9 deletions cluster-autoscaler/core/utils/taint_key_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package utils

import (
v1 "k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
)

Expand All @@ -26,14 +27,14 @@ type TaintKeySet map[string]bool
var (
// NodeConditionTaints lists taint keys used as node conditions
NodeConditionTaints = TaintKeySet{
schedulerapi.TaintNodeNotReady: true,
schedulerapi.TaintNodeUnreachable: true,
schedulerapi.TaintNodeUnschedulable: true,
schedulerapi.TaintNodeMemoryPressure: true,
schedulerapi.TaintNodeDiskPressure: true,
schedulerapi.TaintNodeNetworkUnavailable: true,
schedulerapi.TaintNodePIDPressure: true,
schedulerapi.TaintExternalCloudProvider: true,
schedulerapi.TaintNodeShutdown: true,
v1.TaintNodeNotReady: true,
v1.TaintNodeUnreachable: true,
v1.TaintNodeUnschedulable: true,
v1.TaintNodeMemoryPressure: true,
v1.TaintNodeDiskPressure: true,
v1.TaintNodeNetworkUnavailable: true,
v1.TaintNodePIDPressure: true,
schedulerapi.TaintExternalCloudProvider: true,
schedulerapi.TaintNodeShutdown: true,
}
)
314 changes: 200 additions & 114 deletions cluster-autoscaler/go.mod

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion cluster-autoscaler/go.mod-extra
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ go 1.12

require (
github.com/rancher/go-rancher v0.1.0
github.com/google/go-querystring v1.0.0
github.com/aws/aws-sdk-go v1.23.18
)

Expand Down
375 changes: 230 additions & 145 deletions cluster-autoscaler/go.sum

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"syscall"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/pflag"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -53,6 +52,7 @@ import (
"k8s.io/client-go/tools/leaderelection/resourcelock"
kube_flag "k8s.io/component-base/cli/flag"
componentbaseconfig "k8s.io/component-base/config"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/client/leaderelectionconfig"
)
Expand Down Expand Up @@ -368,7 +368,7 @@ func main() {
klog.V(1).Infof("Cluster Autoscaler %s", version.ClusterAutoscalerVersion)

go func() {
http.Handle("/metrics", prometheus.Handler())
http.Handle("/metrics", legacyregistry.Handler())
http.Handle("/health-check", healthCheck)
err := http.ListenAndServe(*address, nil)
klog.Fatalf("Failed to start metrics: %v", err)
Expand Down
119 changes: 60 additions & 59 deletions cluster-autoscaler/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
_ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client-go metrics registration
_ "k8s.io/component-base/metrics/prometheus/restclient" // for client-go metrics registration

"github.com/prometheus/client_golang/prometheus"
k8smetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog"
)

Expand Down Expand Up @@ -92,58 +93,58 @@ const (

var (
/**** Metrics related to cluster state ****/
clusterSafeToAutoscale = prometheus.NewGauge(
prometheus.GaugeOpts{
clusterSafeToAutoscale = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "cluster_safe_to_autoscale",
Help: "Whether or not cluster is healthy enough for autoscaling. 1 if it is, 0 otherwise.",
},
)

nodesCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
nodesCount = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "nodes_count",
Help: "Number of nodes in cluster.",
}, []string{"state"},
)

nodeGroupsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
nodeGroupsCount = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "node_groups_count",
Help: "Number of node groups managed by CA.",
}, []string{"node_group_type"},
)

unschedulablePodsCount = prometheus.NewGauge(
prometheus.GaugeOpts{
unschedulablePodsCount = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "unschedulable_pods_count",
Help: "Number of unschedulable pods in the cluster.",
},
)

/**** Metrics related to autoscaler execution ****/
lastActivity = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
lastActivity = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "last_activity",
Help: "Last time certain part of CA logic executed.",
}, []string{"activity"},
)

functionDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
functionDuration = k8smetrics.NewHistogramVec(
&k8smetrics.HistogramOpts{
Namespace: caNamespace,
Name: "function_duration_seconds",
Help: "Time taken by various parts of CA main loop.",
Buckets: []float64{0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, 22.5, 25.0, 27.5, 30.0, 50.0, 75.0, 100.0, 1000.0},
}, []string{"function"},
)

functionDurationSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
functionDurationSummary = k8smetrics.NewSummaryVec(
&k8smetrics.SummaryOpts{
Namespace: caNamespace,
Name: "function_duration_quantile_seconds",
Help: "Quantiles of time taken by various parts of CA main loop.",
Expand All @@ -152,97 +153,97 @@ var (
)

/**** Metrics related to autoscaler operations ****/
errorsCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
errorsCount = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "errors_total",
Help: "The number of CA loops failed due to an error.",
}, []string{"type"},
)

scaleUpCount = prometheus.NewCounter(
prometheus.CounterOpts{
scaleUpCount = k8smetrics.NewCounter(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "scaled_up_nodes_total",
Help: "Number of nodes added by CA.",
},
)

gpuScaleUpCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
gpuScaleUpCount = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "scaled_up_gpu_nodes_total",
Help: "Number of GPU nodes added by CA, by GPU name.",
}, []string{"gpu_name"},
)

failedScaleUpCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
failedScaleUpCount = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "failed_scale_ups_total",
Help: "Number of times scale-up operation has failed.",
}, []string{"reason"},
)

scaleDownCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
scaleDownCount = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "scaled_down_nodes_total",
Help: "Number of nodes removed by CA.",
}, []string{"reason"},
)

gpuScaleDownCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
gpuScaleDownCount = k8smetrics.NewCounterVec(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "scaled_down_gpu_nodes_total",
Help: "Number of GPU nodes removed by CA, by reason and GPU name.",
}, []string{"reason", "gpu_name"},
)

evictionsCount = prometheus.NewCounter(
prometheus.CounterOpts{
evictionsCount = k8smetrics.NewCounter(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "evicted_pods_total",
Help: "Number of pods evicted by CA",
},
)

unneededNodesCount = prometheus.NewGauge(
prometheus.GaugeOpts{
unneededNodesCount = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "unneeded_nodes_count",
Help: "Number of nodes currently considered unneeded by CA.",
},
)

scaleDownInCooldown = prometheus.NewGauge(
prometheus.GaugeOpts{
scaleDownInCooldown = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "scale_down_in_cooldown",
Help: "Whether or not the scale down is in cooldown. 1 if its, 0 otherwise.",
},
)

/**** Metrics related to NodeAutoprovisioning ****/
napEnabled = prometheus.NewGauge(
prometheus.GaugeOpts{
napEnabled = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "nap_enabled",
Help: "Whether or not Node Autoprovisioning is enabled. 1 if it is, 0 otherwise.",
},
)

nodeGroupCreationCount = prometheus.NewCounter(
prometheus.CounterOpts{
nodeGroupCreationCount = k8smetrics.NewCounter(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "created_node_groups_total",
Help: "Number of node groups created by Node Autoprovisioning.",
},
)

nodeGroupDeletionCount = prometheus.NewCounter(
prometheus.CounterOpts{
nodeGroupDeletionCount = k8smetrics.NewCounter(
&k8smetrics.CounterOpts{
Namespace: caNamespace,
Name: "deleted_node_groups_total",
Help: "Number of node groups deleted by Node Autoprovisioning.",
Expand All @@ -252,25 +253,25 @@ var (

// RegisterAll registers all metrics.
func RegisterAll() {
prometheus.MustRegister(clusterSafeToAutoscale)
prometheus.MustRegister(nodesCount)
prometheus.MustRegister(nodeGroupsCount)
prometheus.MustRegister(unschedulablePodsCount)
prometheus.MustRegister(lastActivity)
prometheus.MustRegister(functionDuration)
prometheus.MustRegister(functionDurationSummary)
prometheus.MustRegister(errorsCount)
prometheus.MustRegister(scaleUpCount)
prometheus.MustRegister(gpuScaleUpCount)
prometheus.MustRegister(failedScaleUpCount)
prometheus.MustRegister(scaleDownCount)
prometheus.MustRegister(gpuScaleDownCount)
prometheus.MustRegister(evictionsCount)
prometheus.MustRegister(unneededNodesCount)
prometheus.MustRegister(scaleDownInCooldown)
prometheus.MustRegister(napEnabled)
prometheus.MustRegister(nodeGroupCreationCount)
prometheus.MustRegister(nodeGroupDeletionCount)
legacyregistry.MustRegister(clusterSafeToAutoscale)
legacyregistry.MustRegister(nodesCount)
legacyregistry.MustRegister(nodeGroupsCount)
legacyregistry.MustRegister(unschedulablePodsCount)
legacyregistry.MustRegister(lastActivity)
legacyregistry.MustRegister(functionDuration)
legacyregistry.MustRegister(functionDurationSummary)
legacyregistry.MustRegister(errorsCount)
legacyregistry.MustRegister(scaleUpCount)
legacyregistry.MustRegister(gpuScaleUpCount)
legacyregistry.MustRegister(failedScaleUpCount)
legacyregistry.MustRegister(scaleDownCount)
legacyregistry.MustRegister(gpuScaleDownCount)
legacyregistry.MustRegister(evictionsCount)
legacyregistry.MustRegister(unneededNodesCount)
legacyregistry.MustRegister(scaleDownInCooldown)
legacyregistry.MustRegister(napEnabled)
legacyregistry.MustRegister(nodeGroupCreationCount)
legacyregistry.MustRegister(nodeGroupDeletionCount)
}

// UpdateDurationFromStart records the duration of the step identified by the
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/simulator/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes []*apiv1.Node, no

loggingQuota := glogx.PodsLoggingQuota()

tryNodeForPod := func(nodename string, pod *apiv1.Pod, predicateMeta predicates.PredicateMetadata) bool {
tryNodeForPod := func(nodename string, pod *apiv1.Pod, predicateMeta predicates.Metadata) bool {
nodeInfo, found := newNodeInfos[nodename]
if found {
if nodeInfo.Node() == nil {
Expand Down
Loading

0 comments on commit 3413247

Please sign in to comment.