Skip to content

Commit

Permalink
add leader election health check
Browse files Browse the repository at this point in the history
  • Loading branch information
verult committed Dec 5, 2020
1 parent 5a0c657 commit a6c7600
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 9 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Note that the external-resizer does not scale with more replicas. Only one exter

* `--workers <num>`: Number of simultaneously running `ControllerExpandVolume` operations. Default value is `10`.

* `--metrics-address`: The TCP network address where the prometheus metrics endpoint will run (example: `:8080` which corresponds to port 8080 on local host). The default is empty string, which means metrics endpoint is disabled.
* `--http-endpoint`: The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080` which corresponds to port 8080 on local host). The default is empty string, which means the server is disabled.

* `--metrics-path`: The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.

Expand All @@ -69,10 +69,20 @@ Note that the external-resizer does not scale with more replicas. Only one exter

* `--master <url>`: Master URL to build a client config from. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-resizer does not run as a Kubernetes pod, e.g. for debugging. Either this or `--kubeconfig` needs to be set if the external-resizer is being run out of cluster.

* `--metrics-address`: (deprecated) The TCP network address where the prometheus metrics endpoint will run (example: `:8080` which corresponds to port 8080 on local host). The default is empty string, which means metrics endpoint is disabled.

* `--version`: Prints current external-resizer version and quits.

* All glog / klog arguments are supported, such as `-v <log level>` or `-alsologtostderr`.

### HTTP endpoint

The external-resizer optionally exposes an HTTP endpoint at address:port specified by `--http-endpoint` argument. When set, these two paths are exposed:

* Metrics path, as set by `--metrics-path` argument (default is `/metrics`).
* Leader election health check at `/healthz/leader-election`. It is recommended to run a liveness probe against this endpoint when leader election is used to kill external-attacher leader that fails to connect to the API server to renew its leadership. See https://github.com/kubernetes-csi/csi-lib-utils/issues/66 for details.


## Community, discussion, contribution, and support

Learn how to engage with the Kubernetes community on the [community page](http://kubernetes.io/community/).
Expand Down
32 changes: 30 additions & 2 deletions cmd/csi-resizer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"flag"
"fmt"
"net/http"
"os"
"time"

Expand Down Expand Up @@ -56,7 +57,8 @@ var (
enableLeaderElection = flag.Bool("leader-election", false, "Enable leader election.")
leaderElectionNamespace = flag.String("leader-election-namespace", "", "Namespace where the leader election resource lives. Defaults to the pod namespace if not set.")

metricsAddress = flag.String("metrics-address", "", "The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled.")
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")

kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.")
Expand All @@ -78,6 +80,15 @@ func main() {
}
klog.Infof("Version : %s", version)

if *metricsAddress != "" && *httpEndpoint != "" {
klog.Error("only one of `--metrics-address` and `--http-endpoint` can be set.")
os.Exit(1)
}
addr := *metricsAddress
if addr == "" {
addr = *httpEndpoint
}

var config *rest.Config
var err error
if *master != "" || *kubeConfig != "" {
Expand All @@ -99,17 +110,31 @@ func main() {

informerFactory := informers.NewSharedInformerFactory(kubeClient, *resyncPeriod)

mux := http.NewServeMux()

csiResizer, err := resizer.NewResizer(
*csiAddress,
*timeout,
kubeClient,
informerFactory,
*metricsAddress,
mux,
addr,
*metricsPath)
if err != nil {
klog.Fatal(err.Error())
}

// Start HTTP server for metrics + leader election healthz
if addr != "" {
go func() {
klog.Infof("ServeMux listening at %q", addr)
err := http.ListenAndServe(addr, mux)
if err != nil {
klog.Fatalf("Failed to start HTTP server at specified address (%q) and metrics path (%q): %s", addr, *metricsPath, err)
}
}()
}

resizerName := csiResizer.Name()
rc := controller.NewResizeController(resizerName, csiResizer, kubeClient, *resyncPeriod, informerFactory,
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
Expand All @@ -129,6 +154,9 @@ func main() {
klog.Fatal(err.Error())
}
le := leaderelection.NewLeaderElection(leKubeClient, lockName, run)
if *httpEndpoint != "" {
le.PrepareHealthCheck(mux, leaderelection.DefaultHealthCheckTimeout)
}

if *leaderElectionNamespace != "" {
le.WithNamespace(*leaderElectionNamespace)
Expand Down
15 changes: 14 additions & 1 deletion deploy/kubernetes/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,28 @@ spec:
serviceAccount: csi-resizer
containers:
- name: csi-resizer
image: quay.io/k8scsi/csi-resizer:canary
image: gcr.io/k8s-staging-sig-storage/csi-resizer:canary
args:
- "--v=5"
- "--csi-address=$(ADDRESS)"
- "--leader-election"
- "--http-endpoint=:8080"
env:
- name: ADDRESS
value: /var/lib/csi/sockets/pluginproxy/mock.socket
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 8080
name: http-endpoint
protocol: TCP
livenessProbe:
failureThreshold: 1
httpGet:
path: /healthz/leader-election
port: http-endpoint
initialDelaySeconds: 10
timeoutSeconds: 10
periodSeconds: 20
volumeMounts:
- name: socket-dir
mountPath: /var/lib/csi/sockets/pluginproxy/
Expand Down
9 changes: 7 additions & 2 deletions pkg/resizer/csi_resizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func NewResizer(
timeout time.Duration,
k8sClient kubernetes.Interface,
informerFactory informers.SharedInformerFactory,
metricsServer metrics.Server,
metricsAddress, metricsPath string) (Resizer, error) {
metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
csiClient, err := csi.New(address, timeout, metricsManager)
Expand All @@ -58,6 +59,7 @@ func NewResizer(
k8sClient,
informerFactory,
metricsManager,
metricsServer,
metricsAddress,
metricsPath)
}
Expand All @@ -68,15 +70,18 @@ func NewResizerFromClient(
k8sClient kubernetes.Interface,
informerFactory informers.SharedInformerFactory,
metricsManager metrics.CSIMetricsManager,
metricsServer metrics.Server,
metricsAddress, metricsPath string) (Resizer, error) {
driverName, err := getDriverName(csiClient, timeout)
if err != nil {
return nil, fmt.Errorf("get driver name failed: %v", err)
}

klog.V(2).Infof("CSI driver name: %q", driverName)
metricsManager.SetDriverName(driverName)
metricsManager.StartMetricsEndpoint(metricsAddress, metricsPath)
if metricsAddress != "" {
metricsManager.RegisterToServer(metricsServer, metricsPath)
metricsManager.SetDriverName(driverName)
}

supportControllerService, err := supportsPluginControllerService(csiClient, timeout)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/resizer/csi_resizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func TestNewResizer(t *testing.T) {
metricsAddress := ""
metricsPath := ""
k8sClient, informerFactory := fakeK8s()
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, metricsAddress, metricsPath)
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, nil /* metricsServer */, metricsAddress, metricsPath)
if err != c.Error {
t.Errorf("Case %d: Unexpected error: wanted %v, got %v", i, c.Error, err)
}
Expand Down Expand Up @@ -160,7 +160,7 @@ func TestResizeMigratedPV(t *testing.T) {
metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
metricsAddress := ""
metricsPath := ""
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, metricsAddress, metricsPath)
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, nil /* metricsServer */, metricsAddress, metricsPath)
if err != nil {
t.Fatalf("Failed to create resizer: %v", err)
}
Expand Down Expand Up @@ -367,7 +367,7 @@ func TestCanSupport(t *testing.T) {
metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
metricsAddress := ""
metricsPath := ""
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, metricsAddress, metricsPath)
resizer, err := NewResizerFromClient(client, 0, k8sClient, informerFactory, metricsManager, nil /* metricsServer */, metricsAddress, metricsPath)
if err != nil {
t.Fatalf("Failed to create resizer: %v", err)
}
Expand Down

0 comments on commit a6c7600

Please sign in to comment.