Skip to content

Commit

Permalink
Add NodeResourceTopology garbage collector
Browse files Browse the repository at this point in the history
NodeResourceTopology(aka NRT) custom resource is used to enable NUMA aware Scheduling in Kubernetes.
As of now node-feature-discovery daemons are used to advertise those
resources but there is no service responsible for removing obsolete
objects(without corresponding Kubernetes node).

This patch adds new daemon called nfd-topology-gc which removes old
NRTs.

Signed-off-by: PiotrProkop <[email protected]>
  • Loading branch information
PiotrProkop committed Jan 11, 2023
1 parent 0159ab0 commit 59afae5
Show file tree
Hide file tree
Showing 18 changed files with 818 additions and 0 deletions.
88 changes: 88 additions & 0 deletions cmd/nfd-topology-gc/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"fmt"
"os"
"time"

"k8s.io/klog/v2"

nfdtopologygarbagecollector "sigs.k8s.io/node-feature-discovery/pkg/nfd-topology-gc"
"sigs.k8s.io/node-feature-discovery/pkg/version"
)

const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-topology-gc"
)

func main() {
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)

printVersion := flags.Bool("version", false, "Print version and exit.")

args := parseArgs(flags, os.Args[1:]...)

if *printVersion {
fmt.Println(ProgramName, version.Get())
os.Exit(0)
}

// Assert that the version is known
if version.Undefined() {
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
}

// Get new TopologyGC instance
gc, err := nfdtopologygarbagecollector.New(args)
if err != nil {
klog.Exit(err)
}

if err = gc.Run(); err != nil {
klog.Exit(err)
}
}

func parseArgs(flags *flag.FlagSet, osArgs ...string) *nfdtopologygarbagecollector.Args {
args := initFlags(flags)

_ = flags.Parse(osArgs)
if len(flags.Args()) > 0 {
fmt.Fprintf(flags.Output(), "unknown command line argument: %s\n", flags.Args()[0])
flags.Usage()
os.Exit(2)
}

return args
}

func initFlags(flagset *flag.FlagSet) *nfdtopologygarbagecollector.Args {
args := &nfdtopologygarbagecollector.Args{}

flagset.DurationVar(&args.GCPeriod, "gc-interval", time.Duration(1)*time.Hour,
"Interval between which Garbage Collector will try to cleanup any missed but already obsolete NodeResourceTopology. [Default: 1h]")
flagset.StringVar(&args.Kubeconfig, "kubeconfig", "",
"Kubeconfig to use")

klog.InitFlags(flagset)

return args
}
41 changes: 41 additions & 0 deletions cmd/nfd-topology-gc/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"testing"
"time"

. "github.com/smartystreets/goconvey/convey"
)

func TestArgsParse(t *testing.T) {
Convey("When parsing command line arguments", t, func() {
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)

Convey("When valid -gc-interval is specified", func() {
args := parseArgs(flags,
"-gc-interval=30s")

Convey("args.GCPeriod is set to appropriate values", func() {
So(args.GCPeriod, ShouldEqual, 30*time.Second)
})
})

})
}
9 changes: 9 additions & 0 deletions deployment/base/rbac-topology-gc/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: node-feature-discovery

resources:
- topology-gc-clusterrole.yaml
- topology-gc-clusterrolebinding.yaml
- topology-gc-serviceaccount.yaml
25 changes: 25 additions & 0 deletions deployment/base/rbac-topology-gc/topology-gc-clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: nfd-topology-gc
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: nfd-topology-gc
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nfd-topology-gc
subjects:
- kind: ServiceAccount
name: nfd-topology-gc
namespace: default
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfd-topology-gc
7 changes: 7 additions & 0 deletions deployment/base/topology-gc/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: node-feature-discovery

resources:
- topology-gc.yaml
23 changes: 23 additions & 0 deletions deployment/base/topology-gc/topology-gc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: nfd
name: nfd-topology-gc
spec:
selector:
matchLabels:
app: nfd-topology-gc
template:
metadata:
labels:
app: nfd-topology-gc
spec:
dnsPolicy: ClusterFirstWithHostNet
serviceAccount: nfd-topology-gc
containers:
- name: nfd-topology-gc
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
imagePullPolicy: Always
command:
- "nfd-topology-gc"
11 changes: 11 additions & 0 deletions deployment/helm/node-feature-discovery/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,14 @@ Create the name of the service account which topologyUpdater will use
{{ default "default" .Values.topologyUpdater.serviceAccount.name }}
{{- end -}}
{{- end -}}

{{/*
Create the name of the service account which topologyGC will use
*/}}
{{- define "node-feature-discovery.topologyGC.serviceAccountName" -}}
{{- if .Values.topologyGC.serviceAccount.create -}}
{{ default (printf "%s-topology-gc" (include "node-feature-discovery.fullname" .)) .Values.topologyGC.serviceAccount.name }}
{{- else -}}
{{ default "default" .Values.topologyGC.serviceAccount.name }}
{{- end -}}
{{- end -}}
31 changes: 31 additions & 0 deletions deployment/helm/node-feature-discovery/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,34 @@ rules:
- get
- update
{{- end }}

---
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,21 @@ subjects:
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}

---
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
subjects:
- kind: ServiceAccount
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ metadata:
{{- end }}
{{- end }}

---
{{- if and .Values.topologyGC.enable .Values.topologyGC.serviceAccount.create .Values.topologyUpdater.enable }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
namespace: {{ include "node-feature-discovery.namespace" . }}
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
{{- with .Values.topologyUpdater.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

---
{{- if .Values.worker.serviceAccount.create }}
apiVersion: v1
Expand Down
64 changes: 64 additions & 0 deletions deployment/helm/node-feature-discovery/templates/topology-gc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{{- if and .Values.topologyGC.enable .Values.topologyUpdater.enable -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
namespace: {{ include "node-feature-discovery.namespace" . }}
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
role: topology-gc
spec:
replicas: {{ .Values.topologyGC.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
role: topology-gc
template:
metadata:
labels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
role: topology-gc
annotations:
{{- toYaml .Values.topologyGC.annotations | nindent 8 }}
spec:
serviceAccountName: {{ .Values.topologyGC.serviceAccountName | default "nfd-topology-gc" }}
dnsPolicy: ClusterFirstWithHostNet
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.topologyGC.podSecurityContext | nindent 8 }}
containers:
- name: topology-gc
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
command:
- "nfd-topology-gc"
args:
{{- if .Values.topologyGC.interval | empty | not }}
- "-gc-interval={{ .Values.topologyGC.interval }}"
{{- end }}
resources:
{{- toYaml .Values.topologyGC.resources | nindent 12 }}
securityContext:
{{- toYaml .Values.topologyGC.securityContext | nindent 12 }}

{{- with .Values.topologyGC.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologyGC.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologyGC.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
Loading

0 comments on commit 59afae5

Please sign in to comment.