From e6c87aaecd84d8c0b0b98966949ccdd9c08075d4 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 12 Jan 2021 12:18:38 +0800 Subject: [PATCH] Implement agent simulator as an independent bin (#1493) This patch implements agent simulator as add an independent bin, it runs with kubemark which simulates kubelet, and mainly watches NetworkPolicies, AddressGroups and AppliedToGroups from antrea controller and prints the events of these resources to log. With agent simulator, we do not need to lauch large cluster for scale test. The agent simulator uses labels and nodeaffinity to disable antrea-agent/antrea-controller running on the simulated nodes, and we can add some taints to not allow other pods to run on simulated nodes. To use agent simulator, please refer to docs/antrea-agent-simulator.md. --- .github/workflows/build.yml | 17 ++ Makefile | 16 ++ .../images/Dockerfile.simulator.build.ubuntu | 22 ++ .../patches/simulator/agentNodeAffinity.yml | 16 ++ .../simulator/antrea-agent-simulator.yml | 153 ++++++++++++++ .../simulator/controllerNodeAffinity.yml | 16 ++ cmd/antrea-agent-simulator/main.go | 63 ++++++ cmd/antrea-agent-simulator/simulator.go | 193 ++++++++++++++++++ docs/antrea-agent-simulator.md | 53 +++++ docs/maintainers/build-kubemark.md | 13 ++ hack/.notableofcontents | 2 + hack/generate-manifest.sh | 18 ++ 12 files changed, 582 insertions(+) create mode 100644 build/images/Dockerfile.simulator.build.ubuntu create mode 100644 build/yamls/patches/simulator/agentNodeAffinity.yml create mode 100644 build/yamls/patches/simulator/antrea-agent-simulator.yml create mode 100644 build/yamls/patches/simulator/controllerNodeAffinity.yml create mode 100644 cmd/antrea-agent-simulator/main.go create mode 100644 cmd/antrea-agent-simulator/simulator.go create mode 100644 docs/antrea-agent-simulator.md create mode 100644 docs/maintainers/build-kubemark.md diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 634993eea8b..d78bb016cff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,6 +42,23 @@ jobs: echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin docker push antrea/antrea-ubuntu:latest + build-scale: + needs: check-changes + if: ${{ needs.check-changes.outputs.has_changes == 'yes' || github.event_name == 'push' }} + runs-on: [ubuntu-18.04] + steps: + - uses: actions/checkout@v2 + - name: Build Antrea Agent Simulator Docker image + run: make build-scale-simulator + - name: Push Antrea Agent Simulator Docker image to registry + if: ${{ github.repository == 'vmware-tanzu/antrea' && github.event_name == 'push' && github.ref == 'refs/heads/master' }} + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + run: | + echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin + docker push antrea/antrea-ubuntu-simulator:latest + build-windows: needs: check-changes if: ${{ needs.check-changes.outputs.has_changes == 'yes' || github.event_name == 'push' }} diff --git a/Makefile b/Makefile index d4ce769628d..a72a52fca19 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,11 @@ antrea-agent: @mkdir -p $(BINDIR) GOOS=linux $(GO) build -o $(BINDIR) $(GOFLAGS) -ldflags '$(LDFLAGS)' github.com/vmware-tanzu/antrea/cmd/antrea-agent +.PHONY: antrea-agent-simulator +antrea-agent-simulator: + @mkdir -p $(BINDIR) + GOOS=linux $(GO) build -o $(BINDIR) $(GOFLAGS) -ldflags '$(LDFLAGS)' github.com/vmware-tanzu/antrea/cmd/antrea-agent-simulator + .PHONY: antrea-agent-instr-binary antrea-agent-instr-binary: @mkdir -p $(BINDIR) @@ -295,6 +300,12 @@ else endif docker tag antrea/antrea-ubuntu-coverage:$(DOCKER_IMG_VERSION) antrea/antrea-ubuntu-coverage +.PHONY: build-scale-simulator +build-scale-simulator: + @echo "===> Building simulator bin and antrea-ubuntu-simulator image" + docker build -t antrea/antrea-ubuntu-simulator:$(DOCKER_IMG_VERSION) -f build/images/Dockerfile.simulator.build.ubuntu . + docker tag antrea/antrea-ubuntu-simulator:$(DOCKER_IMG_VERSION) antrea/antrea-ubuntu-simulator + .PHONY: manifest manifest: @echo "===> Generating dev manifest for Antrea <===" @@ -307,6 +318,11 @@ manifest: $(CURDIR)/hack/generate-manifest-windows.sh --mode dev > build/yamls/antrea-windows.yml $(CURDIR)/hack/generate-manifest-flow-aggregator.sh --mode dev > build/yamls/flow-aggregator.yml +.PHONY: manifest-scale +manifest-scale: + @echo "===> Generating simulator manifest for Antrea <===" + $(CURDIR)/hack/generate-manifest.sh --mode dev --simulator > build/yamls/antrea-scale.yml + .PHONY: manifest-coverage manifest-coverage: $(CURDIR)/hack/generate-manifest.sh --mode dev --coverage > build/yamls/antrea-coverage.yml diff --git a/build/images/Dockerfile.simulator.build.ubuntu b/build/images/Dockerfile.simulator.build.ubuntu new file mode 100644 index 00000000000..e175774a12b --- /dev/null +++ b/build/images/Dockerfile.simulator.build.ubuntu @@ -0,0 +1,22 @@ +FROM golang:1.15 as antrea-build + +WORKDIR /antrea + +COPY go.mod /antrea/go.mod + +RUN go mod download + +COPY . /antrea + +RUN make antrea-agent-simulator + + +FROM ubuntu:20.04 + +LABEL maintainer="Antrea " +LABEL description="The Docker image to deploy the Antrea simulator. " + +USER root + +COPY --from=antrea-build /antrea/bin/* /usr/local/bin/ + diff --git a/build/yamls/patches/simulator/agentNodeAffinity.yml b/build/yamls/patches/simulator/agentNodeAffinity.yml new file mode 100644 index 00000000000..1a922cecd3f --- /dev/null +++ b/build/yamls/patches/simulator/agentNodeAffinity.yml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: antrea-agent +spec: + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: antrea/instance + operator: NotIn + values: + - simulator diff --git a/build/yamls/patches/simulator/antrea-agent-simulator.yml b/build/yamls/patches/simulator/antrea-agent-simulator.yml new file mode 100644 index 00000000000..d66b83f904c --- /dev/null +++ b/build/yamls/patches/simulator/antrea-agent-simulator.yml @@ -0,0 +1,153 @@ +--- +apiVersion: v1 +data: + content.type: test-cluster +kind: ConfigMap +metadata: + name: node-configmap + namespace: kube-system +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: antrea-agent-simulator + namespace: kube-system +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: antrea + component: antrea-agent-simulator + serviceName: antrea-agent-simulator + template: + metadata: + labels: + app: antrea + component: antrea-agent-simulator + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: antrea/instance + operator: NotIn + values: + - simulator + serviceAccountName: antrea-agent + initContainers: + - name: init-inotify-limit + image: projects.registry.vmware.com/library/busybox:latest + command: ['sysctl', '-w', 'fs.inotify.max_user_instances=200'] + securityContext: + privileged: true + volumes: + - name: kubeconfig-volume + secret: + secretName: kubeconfig + - name: logs-volume + hostPath: + path: /var/log + containers: + - name: simulator + image: projects.registry.vmware.com/antrea/antrea-ubuntu-simulator:latest + imagePullPolicy: IfNotPresent + command: ['/usr/local/bin/antrea-agent-simulator', '-v', '5'] + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: kubeconfig-volume + mountPath: /kubeconfig + readOnly: true + - name: logs-volume + mountPath: /var/log + - name: hollow-kubelet + image: projects.registry.vmware.com/antrea/kubemark:v1.18.4 + ports: + - containerPort: 4194 + - containerPort: 10250 + - containerPort: 10255 + env: + - name: CONTENT_TYPE + valueFrom: + configMapKeyRef: + name: node-configmap + key: content.type + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + command: [ + "/kubemark", + "--morph=kubelet", + "--name=$(NODE_NAME)", + "--kubeconfig=/kubeconfig/admin.conf", + "$(CONTENT_TYPE)", + "--v=2", + "--log-file=/var/log/kubelet-$(NODE_NAME).log", + "--node-labels=antrea/instance=simulator", + ] + volumeMounts: + - name: kubeconfig-volume + mountPath: /kubeconfig + readOnly: true + - name: logs-volume + mountPath: /var/log + resources: + requests: + cpu: 20m + memory: 50M + securityContext: + privileged: true + - name: hollow-proxy + image: projects.registry.vmware.com/antrea/kubemark:v1.18.4 + env: + - name: CONTENT_TYPE + valueFrom: + configMapKeyRef: + name: node-configmap + key: content.type + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + command: [ + "/kubemark", + "--morph=proxy", + "--name=$(NODE_NAME)", + "--use-real-proxier=false", + "--kubeconfig=/kubeconfig/admin.conf", + "$(CONTENT_TYPE)", + "--alsologtostderr", + "--v=2", + "--log-file=/var/log/kubelet-$(NODE_NAME).log" + ] + volumeMounts: + - name: kubeconfig-volume + mountPath: /kubeconfig + readOnly: true + - name: logs-volume + mountPath: /var/log + resources: + requests: + cpu: 20m + memory: 50M + tolerations: + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists diff --git a/build/yamls/patches/simulator/controllerNodeAffinity.yml b/build/yamls/patches/simulator/controllerNodeAffinity.yml new file mode 100644 index 00000000000..e88e7e23d7d --- /dev/null +++ b/build/yamls/patches/simulator/controllerNodeAffinity.yml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: antrea-controller +spec: + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: antrea/instance + operator: NotIn + values: + - simulator diff --git a/cmd/antrea-agent-simulator/main.go b/cmd/antrea-agent-simulator/main.go new file mode 100644 index 00000000000..86d03ccd903 --- /dev/null +++ b/cmd/antrea-agent-simulator/main.go @@ -0,0 +1,63 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The simulator binary is responsible to run simulated nodes for antrea agent. +// It watches NetworkPolicies, AddressGroups and AppliedToGroups from antrea +// controller and prints the events of these resources to log. +package main + +import ( + "flag" + "os" + + "github.com/spf13/cobra" + "k8s.io/component-base/logs" + "k8s.io/klog" + + "github.com/vmware-tanzu/antrea/pkg/log" + "github.com/vmware-tanzu/antrea/pkg/version" +) + +func main() { + logs.InitLogs() + defer logs.FlushLogs() + + command := newSimulatorCommand() + if err := command.Execute(); err != nil { + logs.FlushLogs() + os.Exit(1) + } +} + +func newSimulatorCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "antrea-agent-simulator", + Long: "The Antrea agent simulator.", + Run: func(cmd *cobra.Command, args []string) { + log.InitLogFileLimits(cmd.Flags()) + + if err := run(); err != nil { + klog.Fatalf("Error running agent: %v", err) + } + }, + Version: version.GetFullVersionWithRuntimeInfo(), + } + + flags := cmd.Flags() + log.AddFlags(flags) + + // Install log flags + flags.AddGoFlagSet(flag.CommandLine) + return cmd +} diff --git a/cmd/antrea-agent-simulator/simulator.go b/cmd/antrea-agent-simulator/simulator.go new file mode 100644 index 00000000000..f74a86fb1db --- /dev/null +++ b/cmd/antrea-agent-simulator/simulator.go @@ -0,0 +1,193 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The simulator binary is responsible for running simulated antrea agent. +// It watches NetworkPolicies, AddressGroups and AppliedToGroups from antrea controller +// and prints the events of these resources to log. +package main + +import ( + "context" + "fmt" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + componentbaseconfig "k8s.io/component-base/config" + "k8s.io/klog" + + "github.com/vmware-tanzu/antrea/pkg/agent" + "github.com/vmware-tanzu/antrea/pkg/k8s" + "github.com/vmware-tanzu/antrea/pkg/signals" + "github.com/vmware-tanzu/antrea/pkg/util/env" + "github.com/vmware-tanzu/antrea/pkg/version" +) + +func run() error { + klog.Infof("Starting Antrea agent simulator (version %s)", version.GetFullVersion()) + k8sClient, _, _, err := k8s.CreateClients(componentbaseconfig.ClientConnectionConfiguration{}) + if err != nil { + return fmt.Errorf("error creating K8s clients: %v", err) + } + + nodeName, err := env.GetNodeName() + if err != nil { + return fmt.Errorf("failed to get hostname: %v", err) + } + + // Create Antrea Clientset for the given config. + antreaClientProvider := agent.NewAntreaClientProvider(componentbaseconfig.ClientConnectionConfiguration{}, k8sClient) + + if err = antreaClientProvider.RunOnce(); err != nil { + return err + } + + // Create the stop chan with signals + stopCh := signals.RegisterSignalHandlers() + + go antreaClientProvider.Run(stopCh) + + // Add loop to check whether client is ready + attempts := 0 + if err := wait.PollImmediateUntil(200*time.Millisecond, func() (bool, error) { + if attempts%10 == 0 { + klog.Info("Waiting for Antrea client to be ready") + } + if _, err := antreaClientProvider.GetAntreaClient(); err != nil { + attempts++ + return false, nil + } + return true, nil + }, stopCh); err != nil { + klog.Info("Stopped waiting for Antrea client") + return err + } + + klog.Info("Antrea client is ready") + + options := metav1.ListOptions{ + FieldSelector: fields.OneTermEqualSelector("nodeName", nodeName).String(), + } + klog.Infof("Nodename: %s", nodeName) + + // Wrapper watcher to call watch + networkPolicyControllerWatcher := &watchWrapper{ + func() (watch.Interface, error) { + antreaClient, err := antreaClientProvider.GetAntreaClient() + if err != nil { + return nil, fmt.Errorf("failed to get antrea client: %s", err.Error()) + } + return antreaClient.ControlplaneV1beta1().NetworkPolicies("").Watch(context.TODO(), options) + }, + "networkPolicy", + } + addressGroupWatcher := &watchWrapper{ + func() (watch.Interface, error) { + antreaClient, err := antreaClientProvider.GetAntreaClient() + if err != nil { + return nil, fmt.Errorf("failed to get antrea client: %s", err.Error()) + } + return antreaClient.ControlplaneV1beta1().AddressGroups().Watch(context.TODO(), options) + }, + "addressGroup", + } + appliedGroupWatcher := &watchWrapper{ + func() (watch.Interface, error) { + antreaClient, err := antreaClientProvider.GetAntreaClient() + if err != nil { + return nil, fmt.Errorf("failed to get antrea client: %s", err.Error()) + } + return antreaClient.ControlplaneV1beta1().AppliedToGroups().Watch(context.TODO(), options) + }, + "appliedGroup", + } + + // watch NetworkPolicies, AddressGroups, AppliedToGroups + go wait.NonSlidingUntil(networkPolicyControllerWatcher.watch, 5*time.Second, stopCh) + go wait.NonSlidingUntil(addressGroupWatcher.watch, 5*time.Second, stopCh) + go wait.NonSlidingUntil(appliedGroupWatcher.watch, 5*time.Second, stopCh) + + <-stopCh + klog.Info("Stopping Antrea agent simulator") + return nil +} + +type watchWrapper struct { + watchFunc func() (watch.Interface, error) + name string +} + +func (w *watchWrapper) watch() { + klog.Infof("Starting watch for %s", w.name) + + // Call the watch func which is initialized in watchWrapper + watcher, err := w.watchFunc() + if err != nil { + klog.Warningf("Failed to start watch for %s: %v", w.name, err) + return + } + eventCount := 0 + + // Stop the watcher upon exit + defer func() { + klog.Infof("Stopped watch for %s, total items received %d", w.name, eventCount) + watcher.Stop() + }() + initCount := 0 + + // Watch the init events from chan, and log the events +loop: + for { + select { + case event, ok := <-watcher.ResultChan(): + if !ok { + klog.Warningf("Result channel for %s was closed", w.name) + return + } + switch event.Type { + case watch.Added: + klog.V(2).Infof("Added %s (%#v)", w.name, event.Object) + initCount++ + case watch.Bookmark: + break loop + } + } + } + klog.Infof("Received %d init events for %s", initCount, w.name) + eventCount += initCount + + // Watch the events from chan, and log the events + for { + select { + case event, ok := <-watcher.ResultChan(): + if !ok { + return + } + switch event.Type { + case watch.Added: + klog.V(2).Infof("Added %s (%#v)", w.name, event.Object) + case watch.Modified: + klog.V(2).Infof("Updated %s (%#v)", w.name, event.Object) + case watch.Deleted: + klog.V(2).Infof("Removed %s (%#v)", w.name, event.Object) + default: + klog.Errorf("Unknown event: %v", event) + return + } + eventCount++ + } + } +} diff --git a/docs/antrea-agent-simulator.md b/docs/antrea-agent-simulator.md new file mode 100644 index 00000000000..23f6d21c916 --- /dev/null +++ b/docs/antrea-agent-simulator.md @@ -0,0 +1,53 @@ +# Run Antrea agent simulator + +This document describes how to run the Antrea agent simulator. The simulator is +useful for Antrea scalability testing, without having to create a very large +cluster. + +## Build the images + + ```bash +make build-scale-simulator + ``` + +## Create the yaml file + +This demo uses 1 simulator, this command will create a yaml file +build/yamls/antrea-scale.yml + + ```bash +make manifest-scale + ``` + +The above yaml will create one simulated Node/Pod, to change the number of +instances, you can modify `spec.replicas` of the StatefulSet +`antrea-agent-simulator` in the yaml, or scale it via +`kubectl scale statefulset/antrea-agent-simulator -n kube-system --replicas=` +after deploying it. + +## Taint the simulator node + +To prevent Pods from being scheduled on the simulated Node(s), you can use the +following taint. + + ```bash +kubectl taint -l 'antrea/instance=simulator' node mocknode=true:NoExecute + ``` + +## Create secret for kubemark + + ```bash +kubectl create secret generic kubeconfig --type=Opaque --namespace=kube-system --from-file= + ``` + +## Apply the yaml file + + ```bash +kubectl apply -f build/yamls/antrea-scale.yml + ``` + +check the simulated Node: + + ```bash +kubectl get nodes -l 'antrea/instance=simulator' + ``` diff --git a/docs/maintainers/build-kubemark.md b/docs/maintainers/build-kubemark.md new file mode 100644 index 00000000000..45ba8f7dd38 --- /dev/null +++ b/docs/maintainers/build-kubemark.md @@ -0,0 +1,13 @@ +# Build the kubemark image + +This documentation simply describes how to build the kubemark image used in +[Antrea scale testing](../antrea-agent-simulator.md) + + ```bash +cd $KUBERNETES_PATH +git checkout v1.18.4 +make WHAT=cmd/kubemark KUBE_BUILD_PLATFORMS=linux/amd64 +cp ./_output/local/go/bin/linux_amd64/kubemark cluster/images/kubemark +cd cluster/images/kubemark +docker build -t antrea/kubemark:v1.18.4 . + ``` diff --git a/hack/.notableofcontents b/hack/.notableofcontents index bf5ffd45d15..766b9ff228b 100644 --- a/hack/.notableofcontents +++ b/hack/.notableofcontents @@ -1,5 +1,6 @@ docs/aks-installation.md docs/api.md +docs/antrea-agent-simulator.md docs/assets/README.md docs/assets/logo/README.md docs/contributors/code-generation.md @@ -19,6 +20,7 @@ docs/getting-started.md docs/gke-installation.md docs/ipsec-tunnel.md docs/kind.md +docs/maintainers/build-kubemark.md docs/maintainers/getting-started-gif.md docs/maintainers/release.md docs/octant-plugin-installation.md diff --git a/hack/generate-manifest.sh b/hack/generate-manifest.sh index 9e7df558cb7..2b6adb31b4d 100755 --- a/hack/generate-manifest.sh +++ b/hack/generate-manifest.sh @@ -38,6 +38,7 @@ Generate a YAML manifest for Antrea using Kustomize and print it to stdout. --on-delete Generate a manifest with antrea-agent's update strategy set to OnDelete. This option will work only for Kind clusters (when using '--kind'). --coverage Generates a manifest which supports measuring code coverage of Antrea binaries. + --simulator Generates a manifest with antrea-agent simulator included --help, -h Print this message and exit In 'release' mode, environment variables IMG_NAME and IMG_TAG must be set. @@ -70,6 +71,7 @@ VERBOSE_LOG=false ON_DELETE=false COVERAGE=false K8S_115=false +SIMULATOR=false while [[ $# -gt 0 ]] do @@ -132,6 +134,10 @@ case $key in COVERAGE=true shift ;; + --simulator) + SIMULATOR=true + shift + ;; -h|--help) print_usage exit 0 @@ -321,6 +327,18 @@ if [[ $CLOUD == "EKS" ]]; then cd .. fi +if $SIMULATOR; then + mkdir simulator && cd simulator + cp ../../patches/simulator/*.yml . + touch kustomization.yml + $KUSTOMIZE edit add base $BASE + $KUSTOMIZE edit add patch --path agentNodeAffinity.yml + $KUSTOMIZE edit add patch --path controllerNodeAffinity.yml + $KUSTOMIZE edit add resource antrea-agent-simulator.yml + BASE=../simulator + cd .. +fi + if $KIND; then mkdir kind && cd kind cp ../../patches/kind/*.yml .