diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 887b1971e..73271645a 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -136,3 +136,12 @@ jobs: run: | sudo npm install -g markdownlint-cli@0.31.1 make markdownlint + - name: Checking whether autogenerated Helm chart documentation is up-to-date + working-directory: build/charts/ + run: | + make helm-docs + DIFF=$(git diff .) + if [ -n "$DIFF" ]; then + echo "The Helm chart documentation is out-of-date; please run 'make helm-docs' in 'build/charts/' and commit the changes" + exit 1 + fi diff --git a/Makefile b/Makefile index 0a13b4ef7..550773dbf 100644 --- a/Makefile +++ b/Makefile @@ -157,4 +157,3 @@ clickhouse-monitor: clickhouse-monitor-plugin: @mkdir -p $(BINDIR) GOOS=linux $(GO) build -o $(BINDIR) $(GOFLAGS) -ldflags '$(LDFLAGS)' antrea.io/theia/plugins/clickhouse-monitor - \ No newline at end of file diff --git a/build/charts/Makefile b/build/charts/Makefile new file mode 100644 index 000000000..608a7eafa --- /dev/null +++ b/build/charts/Makefile @@ -0,0 +1,6 @@ +USERID := $(shell id -u) +GRPID := $(shell id -g) + +.PHONY: helm-docs +helm-docs: + docker run --rm --volume "$(CURDIR):/helm-docs" --user=$(USERID):$(GRPID) jnorwood/helm-docs:v1.7.0 diff --git a/build/charts/theia/.helmignore b/build/charts/theia/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/build/charts/theia/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/build/charts/theia/Chart.yaml b/build/charts/theia/Chart.yaml new file mode 100644 index 000000000..1ca32cea0 --- /dev/null +++ b/build/charts/theia/Chart.yaml @@ -0,0 +1,19 @@ +apiVersion: v2 +name: theia +type: application +displayName: Theia +home: https://antrea.io/ +version: 0.1.0-dev +appVersion: 0.1.0-dev +kubeVersion: ">= 1.16.0-0" +icon: https://raw.githubusercontent.com/antrea-io/antrea/main/docs/assets/logo/antrea_logo.svg +description: Antrea Network Flow Visibility +keywords: + - Kubernetes + - CNCF + - Networking + - CNI + - Security + - Flow visibility +sources: + - https://github.com/antrea-io/theia diff --git a/build/charts/theia/README.md b/build/charts/theia/README.md new file mode 100644 index 000000000..b74dea0b0 --- /dev/null +++ b/build/charts/theia/README.md @@ -0,0 +1,46 @@ +# theia + +![Version: 0.1.0-dev](https://img.shields.io/badge/Version-0.1.0--dev-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-dev](https://img.shields.io/badge/AppVersion-0.1.0--dev-informational?style=flat-square) + +Antrea Network Flow Visibility + +**Homepage:** + +## Source Code + +* + +## Requirements + +Kubernetes: `>= 1.16.0-0` + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| clickhouse.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-clickhouse-server","tag":"21.11"}` | Container image to use for the ClickHouse. | +| clickhouse.monitor.deletePercentage | float | `0.5` | The percentage of records in ClickHouse that will be deleted when the storage grows above threshold. Vary from 0 to 1. | +| clickhouse.monitor.enable | bool | `true` | Determine whether to run a monitor to periodically check the ClickHouse memory usage and clean data. | +| clickhouse.monitor.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-clickhouse-monitor","tag":"latest"}` | Container image to use for the ClickHouse Monitor. | +| clickhouse.monitor.threshold | float | `0.5` | The storage percentage at which the monitor starts to delete old records. Vary from 0 to 1. | +| clickhouse.persistentVolume.affinity | object | `{"required":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"antrea.io/clickhouse-data-node","operator":"Exists"}]}]}}` | Affinity for the Local Persistent Volume. Required when Persistent Volumes is enable and the provisioner is "Local". | +| clickhouse.persistentVolume.enable | bool | `false` | Enable deploying the ClickHouse with Persistent Volumes. | +| clickhouse.persistentVolume.localPath | string | `"/data"` | The local path. Required when Persistent Volumes is enable and the provisioner is "Local". | +| clickhouse.persistentVolume.nfsHost | string | `""` | The NFS server hostname or IP address. Required when Persistent Volumes is enable the provisioner is "NFS". | +| clickhouse.persistentVolume.nfsPath | string | `""` | The path exported on the NFS server. Required when Persistent Volumes is enable the provisioner is "NFS". | +| clickhouse.persistentVolume.provisioner | string | `"Local"` | Persistent Volume Provisioner. Required if Persistent Volumes is enable. It must be one of "StorageClass", "Local", "NFS". | +| clickhouse.persistentVolume.storageClass | string | `""` | The StorageClass used to dynamically provision the Persistent Volume. Required when Persistent Volumes is enable the provisioner is "StorageClass". | +| clickhouse.port.http | int | `8123` | HTTP port number for the ClickHouse service. | +| clickhouse.port.tcp | int | `9000` | TCP port number for the ClickHouse service. | +| clickhouse.secret.password | string | `"clickhouse_operator_password"` | ClickHouse password. It will be stored in a secret. | +| clickhouse.secret.username | string | `"clickhouse_operator"` | ClickHouse username. It will be stored in a secret. | +| clickhouse.storageSize | string | `"8Gi"` | ClickHouse storage size. Can be a plain integer or as a fixed-point number using one of these quantity suffixes: E, P, T, G, M, K. Or the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | +| clickhouse.ttl | int | `3600` | Time to live in seconds for data in the ClickHouse. | +| grafana.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-grafana","tag":"8.3.3"}` | Container image to use for the Grafana. | +| grafana.secret.password | string | `"admin"` | Grafana password. It will be stored in a secret. | +| grafana.secret.username | string | `"admin"` | Grafana username. It will be stored in a secret. | +| grafana.service.tcpPort | int | `3000` | TCP port number for the Grafana service. | +| grafana.service.type | string | `"NodePort"` | The type of service exposes Grafana. It must be one of NodePort or LoadBalancer. | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.7.0](https://github.com/norwoodj/helm-docs/releases/v1.7.0) diff --git a/build/yamls/clickhouse-operator-install-bundle.yml b/build/charts/theia/crds/clickhouse-operator-install-bundle.yaml similarity index 100% rename from build/yamls/clickhouse-operator-install-bundle.yml rename to build/charts/theia/crds/clickhouse-operator-install-bundle.yaml diff --git a/build/yamls/base/provisioning/dashboards/dashboard_provider.yml b/build/charts/theia/provisioning/dashboards/dashboard_provider.yaml similarity index 100% rename from build/yamls/base/provisioning/dashboards/dashboard_provider.yml rename to build/charts/theia/provisioning/dashboards/dashboard_provider.yaml diff --git a/build/yamls/base/provisioning/dashboards/flow_records_dashboard.json b/build/charts/theia/provisioning/dashboards/flow_records_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/flow_records_dashboard.json rename to build/charts/theia/provisioning/dashboards/flow_records_dashboard.json diff --git a/build/yamls/base/provisioning/dashboards/networkpolicy_allow_dashboard.json b/build/charts/theia/provisioning/dashboards/networkpolicy_allow_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/networkpolicy_allow_dashboard.json rename to build/charts/theia/provisioning/dashboards/networkpolicy_allow_dashboard.json diff --git a/build/yamls/base/provisioning/dashboards/node_to_node_dashboard.json b/build/charts/theia/provisioning/dashboards/node_to_node_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/node_to_node_dashboard.json rename to build/charts/theia/provisioning/dashboards/node_to_node_dashboard.json diff --git a/build/yamls/base/provisioning/dashboards/pod_to_external_dashboard.json b/build/charts/theia/provisioning/dashboards/pod_to_external_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/pod_to_external_dashboard.json rename to build/charts/theia/provisioning/dashboards/pod_to_external_dashboard.json diff --git a/build/yamls/base/provisioning/dashboards/pod_to_pod_dashboard.json b/build/charts/theia/provisioning/dashboards/pod_to_pod_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/pod_to_pod_dashboard.json rename to build/charts/theia/provisioning/dashboards/pod_to_pod_dashboard.json diff --git a/build/yamls/base/provisioning/dashboards/pod_to_service_dashboard.json b/build/charts/theia/provisioning/dashboards/pod_to_service_dashboard.json similarity index 100% rename from build/yamls/base/provisioning/dashboards/pod_to_service_dashboard.json rename to build/charts/theia/provisioning/dashboards/pod_to_service_dashboard.json diff --git a/build/yamls/base/provisioning/datasources/create_table.sh b/build/charts/theia/provisioning/datasources/create_table.sh similarity index 90% rename from build/yamls/base/provisioning/datasources/create_table.sh rename to build/charts/theia/provisioning/datasources/create_table.sh index 9f1355794..91709045c 100644 --- a/build/yamls/base/provisioning/datasources/create_table.sh +++ b/build/charts/theia/provisioning/datasources/create_table.sh @@ -69,10 +69,10 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL trusted UInt8 DEFAULT 0 ) engine=MergeTree ORDER BY (timeInserted, flowEndSeconds) - TTL timeInserted + INTERVAL 1 HOUR - SETTINGS merge_with_ttl_timeout = 3600; + TTL timeInserted + INTERVAL {{ .Values.clickhouse.ttl }} SECOND + SETTINGS merge_with_ttl_timeout = {{ .Values.clickhouse.ttl }}; - CREATE MATERIALIZED VIEW flows_pod_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_pod_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, @@ -86,8 +86,8 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL flowType, sourcePodNamespace, destinationPodNamespace) - TTL timeInserted + INTERVAL 1 HOUR - SETTINGS merge_with_ttl_timeout = 3600 + TTL timeInserted + INTERVAL {{ .Values.clickhouse.ttl }} SECOND + SETTINGS merge_with_ttl_timeout = {{ .Values.clickhouse.ttl }} POPULATE AS SELECT timeInserted, @@ -121,7 +121,7 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL sourcePodNamespace, destinationPodNamespace; - CREATE MATERIALIZED VIEW flows_node_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_node_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, @@ -132,8 +132,8 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL destinationNodeName, sourcePodNamespace, destinationPodNamespace) - TTL timeInserted + INTERVAL 1 HOUR - SETTINGS merge_with_ttl_timeout = 3600 + TTL timeInserted + INTERVAL {{ .Values.clickhouse.ttl }} SECOND + SETTINGS merge_with_ttl_timeout = {{ .Values.clickhouse.ttl }} POPULATE AS SELECT timeInserted, @@ -163,7 +163,7 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL sourcePodNamespace, destinationPodNamespace; - CREATE MATERIALIZED VIEW flows_policy_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_policy_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, @@ -176,8 +176,8 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL ingressNetworkPolicyRuleAction, sourcePodNamespace, destinationPodNamespace) - TTL timeInserted + INTERVAL 1 HOUR - SETTINGS merge_with_ttl_timeout = 3600 + TTL timeInserted + INTERVAL {{ .Values.clickhouse.ttl }} SECOND + SETTINGS merge_with_ttl_timeout = {{ .Values.clickhouse.ttl }} POPULATE AS SELECT timeInserted, diff --git a/build/yamls/base/provisioning/datasources/datasource_provider.yml b/build/charts/theia/provisioning/datasources/datasource_provider.yaml similarity index 52% rename from build/yamls/base/provisioning/datasources/datasource_provider.yml rename to build/charts/theia/provisioning/datasources/datasource_provider.yaml index 31858cba9..a2b058792 100644 --- a/build/yamls/base/provisioning/datasources/datasource_provider.yml +++ b/build/charts/theia/provisioning/datasources/datasource_provider.yaml @@ -3,11 +3,11 @@ datasources: - name: ClickHouse type: grafana-clickhouse-datasource access: proxy - url: http://clickhouse-clickhouse.flow-visibility.svc:8123 + url: http://clickhouse-clickhouse.{{ .Release.Namespace }}.svc:{{ .Values.clickhouse.port.http }} editable: true jsonData: - server: clickhouse-clickhouse.flow-visibility.svc - port: 9000 + server: clickhouse-clickhouse.{{ .Release.Namespace }}.svc + port: {{ .Values.clickhouse.port.tcp }} username: $CLICKHOUSE_USERNAME secureJsonData: password: $CLICKHOUSE_PASSWORD diff --git a/build/charts/theia/templates/NOTES.txt b/build/charts/theia/templates/NOTES.txt new file mode 100644 index 000000000..84d31048b --- /dev/null +++ b/build/charts/theia/templates/NOTES.txt @@ -0,0 +1 @@ +The Theia has been successfully installed. diff --git a/build/charts/theia/templates/_helpers.tpl b/build/charts/theia/templates/_helpers.tpl new file mode 100644 index 000000000..e69de29bb diff --git a/build/charts/theia/templates/clickhouse/clickhouseinstallation.yaml b/build/charts/theia/templates/clickhouse/clickhouseinstallation.yaml new file mode 100644 index 000000000..507957557 --- /dev/null +++ b/build/charts/theia/templates/clickhouse/clickhouseinstallation.yaml @@ -0,0 +1,100 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: clickhouse + labels: + app: clickhouse + namespace: {{ .Release.Namespace }} +spec: + configuration: + users: + {{ .Values.clickhouse.secret.username }}/k8s_secret_password: {{ .Release.Namespace }}/clickhouse-secret/password + {{ .Values.clickhouse.secret.username }}/networks/ip: "::/0" + clusters: + - name: "clickhouse" + layout: + shardsCount: 1 + replicasCount: 1 + defaults: + templates: + podTemplate: pod-template + serviceTemplate: service-template + {{- if .Values.clickhouse.persistentVolume.enable }} + dataVolumeClaimTemplate: clickhouse-storage-template + {{- end }} + templates: + serviceTemplates: + - name: service-template + spec: + ports: + - name: http + port: {{ .Values.clickhouse.port.http }} + - name: tcp + port: {{ .Values.clickhouse.port.tcp }} + podTemplates: + - name: pod-template + spec: + containers: + - name: clickhouse + image: {{ .Values.clickhouse.image.repository }}:{{ .Values.clickhouse.image.tag }} + imagePullPolicy: {{ .Values.clickhouse.image.pullPolicy }} + volumeMounts: + - name: clickhouse-configmap-volume + mountPath: /docker-entrypoint-initdb.d + {{- if not .Values.clickhouse.persistentVolume.enable }} + - name: clickhouse-storage-volume + mountPath: /var/lib/clickhouse + {{- end }} + {{- if .Values.clickhouse.monitor.enable}} + - name: clickhouse-monitor + image: {{ .Values.clickhouse.monitor.image.repository }}:{{ .Values.clickhouse.monitor.image.tag }} + imagePullPolicy: {{ .Values.clickhouse.monitor.image.pullPolicy }} + env: + - name: CLICKHOUSE_USERNAME + valueFrom: + secretKeyRef: + name: clickhouse-secret + key: username + - name: CLICKHOUSE_PASSWORD + valueFrom: + secretKeyRef: + name: clickhouse-secret + key: password + - name: DB_URL + value: "tcp://localhost:9000" + - name: TABLE_NAME + value: "default.flows" + - name: MV_NAMES + value: "default.flows_pod_view default.flows_node_view default.flows_policy_view" + - name: STORAGE_SIZE + value: {{ .Values.clickhouse.storageSize | quote }} + - name: THRESHOLD + value: {{ .Values.clickhouse.monitor.threshold | quote }} + - name: DELETE_PERCENTAGE + value: {{ .Values.clickhouse.monitor.deletePercentage | quote }} + {{- end}} + volumes: + - name: clickhouse-configmap-volume + configMap: + name: clickhouse-mounted-configmap + {{- if not .Values.clickhouse.persistentVolume.enable }} + - name: clickhouse-storage-volume + emptyDir: + medium: Memory + sizeLimit: {{ .Values.clickhouse.storageSize }} + {{- end }} + {{- if .Values.clickhouse.persistentVolume.enable }} + volumeClaimTemplates: + - name: clickhouse-storage-template + spec: + {{- if eq .Values.clickhouse.persistentVolume.provisioner "StorageClass"}} + storageClassName: {{ .Values.clickhouse.persistentVolume.storageClass}} + {{- else }} + storageClassName: clickhouse-storage + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.clickhouse.storageSize }} + {{- end }} diff --git a/build/charts/theia/templates/clickhouse/configmap.yaml b/build/charts/theia/templates/clickhouse/configmap.yaml new file mode 100644 index 000000000..573bfd630 --- /dev/null +++ b/build/charts/theia/templates/clickhouse/configmap.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clickhouse-mounted-configmap + namespace: {{ .Release.Namespace }} +data: + create_table.sh: |- +{{ tpl (.Files.Get "provisioning/datasources/create_table.sh") . | indent 4}} diff --git a/build/charts/theia/templates/clickhouse/local-persistentvolume.yaml b/build/charts/theia/templates/clickhouse/local-persistentvolume.yaml new file mode 100644 index 000000000..370e4898e --- /dev/null +++ b/build/charts/theia/templates/clickhouse/local-persistentvolume.yaml @@ -0,0 +1,19 @@ +{{- if and (.Values.clickhouse.persistentVolume.enable) (eq (.Values.clickhouse.persistentVolume.provisioner) "Local") }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: clickhouse-pv +spec: + storageClassName: clickhouse-storage + capacity: + storage: {{ .Values.clickhouse.storageSize }} + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + local: + path: {{ .Values.clickhouse.persistentVolume.localPath }} + {{- with .Values.clickhouse.persistentVolume.affinity }} + nodeAffinity: + {{- toYaml . | trim | nindent 4 }} + {{- end }} +{{- end }} diff --git a/build/charts/theia/templates/clickhouse/nfs-persistentvolume.yaml b/build/charts/theia/templates/clickhouse/nfs-persistentvolume.yaml new file mode 100644 index 000000000..27234cc36 --- /dev/null +++ b/build/charts/theia/templates/clickhouse/nfs-persistentvolume.yaml @@ -0,0 +1,16 @@ +{{- if and (.Values.clickhouse.persistentVolume.enable) (eq (.Values.clickhouse.persistentVolume.provisioner) "NFS") }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: clickhouse-pv +spec: + storageClassName: clickhouse-storage + capacity: + storage: {{ .Values.clickhouse.storageSize }} + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + nfs: + path: {{ .Values.clickhouse.persistentVolume.nfsPath }} + server: {{ .Values.clickhouse.persistentVolume.nfsHost }} +{{- end }} diff --git a/build/charts/theia/templates/clickhouse/secret.yaml b/build/charts/theia/templates/clickhouse/secret.yaml new file mode 100644 index 000000000..8a38773b1 --- /dev/null +++ b/build/charts/theia/templates/clickhouse/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: clickhouse-secret + namespace: {{ .Release.Namespace }} +type: Opaque +stringData: + username: {{ .Values.clickhouse.secret.username }} + password: {{ .Values.clickhouse.secret.password }} diff --git a/build/charts/theia/templates/clickhouse/storageclass.yaml b/build/charts/theia/templates/clickhouse/storageclass.yaml new file mode 100644 index 000000000..a8608b2e8 --- /dev/null +++ b/build/charts/theia/templates/clickhouse/storageclass.yaml @@ -0,0 +1,10 @@ +{{- if and (.Values.clickhouse.persistentVolume.enable) (not (eq (.Values.clickhouse.persistentVolume.provisioner) "StorageClass")) }} +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: clickhouse-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain +allowVolumeExpansion: True +{{- end }} diff --git a/build/charts/theia/templates/grafana/dashboard-configmap.yaml b/build/charts/theia/templates/grafana/dashboard-configmap.yaml new file mode 100644 index 000000000..7720bfc08 --- /dev/null +++ b/build/charts/theia/templates/grafana/dashboard-configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-config + namespace: {{ .Release.Namespace }} +data: +{{ (.Files.Glob "provisioning/dashboards/*.json").AsConfig | indent 2}} diff --git a/build/charts/theia/templates/grafana/dashboard-provider-configmap.yaml b/build/charts/theia/templates/grafana/dashboard-provider-configmap.yaml new file mode 100644 index 000000000..121073e0a --- /dev/null +++ b/build/charts/theia/templates/grafana/dashboard-provider-configmap.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-provider + namespace: {{ .Release.Namespace }} +data: + dashboard_provider.yaml: |- +{{ .Files.Get "provisioning/dashboards/dashboard_provider.yaml" | indent 4}} diff --git a/build/charts/theia/templates/grafana/datasource-provider-configmap.yaml b/build/charts/theia/templates/grafana/datasource-provider-configmap.yaml new file mode 100644 index 000000000..fd6a2e1c5 --- /dev/null +++ b/build/charts/theia/templates/grafana/datasource-provider-configmap.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasource-provider + namespace: {{ .Release.Namespace }} +data: + datasource_provider.yaml: |- +{{ tpl (.Files.Get "provisioning/datasources/datasource_provider.yaml") . | indent 4}} diff --git a/build/yamls/base/grafana.yml b/build/charts/theia/templates/grafana/deployment.yaml similarity index 63% rename from build/yamls/base/grafana.yml rename to build/charts/theia/templates/grafana/deployment.yaml index 50401af44..2761ece6d 100644 --- a/build/yamls/base/grafana.yml +++ b/build/charts/theia/templates/grafana/deployment.yaml @@ -1,92 +1,10 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: flow-visibility ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - labels: - app: flow-visibility - name: grafana-role -rules: - - apiGroups: - - "" - resources: - - services - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - labels: - app: flow-visibility - name: grafana-role-binding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: grafana-role -subjects: - - kind: ServiceAccount - name: grafana ---- -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: grafana-storage -provisioner: kubernetes.io/no-provisioner -volumeBindingMode: WaitForFirstConsumer -reclaimPolicy: Delete -allowVolumeExpansion: True ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: grafana-pvc -spec: - storageClassName: grafana-storage - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: grafana-pv -spec: - storageClassName: grafana-storage - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - hostPath: - path: "/data/grafana" ---- -apiVersion: v1 -kind: Secret -metadata: - name: grafana-secret -type: Opaque -stringData: - admin-username: admin - admin-password: admin ---- apiVersion: apps/v1 kind: Deployment metadata: labels: app: grafana name: grafana + namespace: {{ .Release.Namespace }} spec: selector: matchLabels: @@ -103,8 +21,8 @@ spec: - 0 containers: - name: grafana - image: projects.registry.vmware.com/antrea/theia-grafana:8.3.3 - imagePullPolicy: IfNotPresent + image: {{ .Values.grafana.image.repository }}:{{ .Values.grafana.image.tag }} + imagePullPolicy: {{ .Values.grafana.image.pullPolicy }} env: - name: GF_INSTALL_PLUGINS value: "https://downloads.antrea.io/artifacts/grafana-custom-plugins/theia-grafana-sankey-plugin-1.0.0.zip;theia-grafana-sankey-plugin,grafana-clickhouse-datasource 1.0.1" @@ -180,17 +98,3 @@ spec: - name: grafana-dashboard-config configMap: name: grafana-dashboard-config ---- -apiVersion: v1 -kind: Service -metadata: - name: grafana -spec: - ports: - - port: 3000 - protocol: TCP - targetPort: http-grafana - selector: - app: grafana - sessionAffinity: None - type: NodePort diff --git a/build/charts/theia/templates/grafana/persistentvolume.yaml b/build/charts/theia/templates/grafana/persistentvolume.yaml new file mode 100644 index 000000000..6d585cda1 --- /dev/null +++ b/build/charts/theia/templates/grafana/persistentvolume.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: grafana-pv +spec: + storageClassName: grafana-storage + capacity: + storage: 2Gi + accessModes: + - ReadWriteOnce + hostPath: + path: "/data/grafana" diff --git a/build/charts/theia/templates/grafana/persistentvolumeclaim.yaml b/build/charts/theia/templates/grafana/persistentvolumeclaim.yaml new file mode 100644 index 000000000..1ddf22a06 --- /dev/null +++ b/build/charts/theia/templates/grafana/persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana-pvc + namespace: {{ .Release.Namespace }} +spec: + storageClassName: grafana-storage + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/build/charts/theia/templates/grafana/role.yaml b/build/charts/theia/templates/grafana/role.yaml new file mode 100644 index 000000000..8761ada07 --- /dev/null +++ b/build/charts/theia/templates/grafana/role.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app: {{ .Release.Namespace }} + name: grafana-role + namespace: {{ .Release.Namespace }} +rules: + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch diff --git a/build/charts/theia/templates/grafana/rolebinding.yaml b/build/charts/theia/templates/grafana/rolebinding.yaml new file mode 100644 index 000000000..c3d8591a9 --- /dev/null +++ b/build/charts/theia/templates/grafana/rolebinding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app: {{ .Release.Namespace }} + name: grafana-role-binding + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: grafana-role +subjects: + - kind: ServiceAccount + name: grafana + namespace: {{ .Release.Namespace }} diff --git a/build/charts/theia/templates/grafana/secret.yaml b/build/charts/theia/templates/grafana/secret.yaml new file mode 100644 index 000000000..bfa7d70c9 --- /dev/null +++ b/build/charts/theia/templates/grafana/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: grafana-secret + namespace: {{ .Release.Namespace }} +type: Opaque +stringData: + admin-username: {{ .Values.grafana.secret.username }} + admin-password: {{ .Values.grafana.secret.password }} diff --git a/build/charts/theia/templates/grafana/service.yaml b/build/charts/theia/templates/grafana/service.yaml new file mode 100644 index 000000000..259f65f3a --- /dev/null +++ b/build/charts/theia/templates/grafana/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: {{ .Release.Namespace }} +spec: + ports: + - port: {{ .Values.grafana.service.tcpPort }} + protocol: TCP + targetPort: http-grafana + selector: + app: grafana + sessionAffinity: None + type: {{ .Values.grafana.service.type }} diff --git a/build/charts/theia/templates/grafana/serviceaccount.yaml b/build/charts/theia/templates/grafana/serviceaccount.yaml new file mode 100644 index 000000000..9593ee43b --- /dev/null +++ b/build/charts/theia/templates/grafana/serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana + namespace: {{ .Release.Namespace }} diff --git a/build/charts/theia/templates/grafana/storageclass.yaml b/build/charts/theia/templates/grafana/storageclass.yaml new file mode 100644 index 000000000..d6d06fbab --- /dev/null +++ b/build/charts/theia/templates/grafana/storageclass.yaml @@ -0,0 +1,8 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: grafana-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +allowVolumeExpansion: True diff --git a/build/charts/theia/values.yaml b/build/charts/theia/values.yaml new file mode 100644 index 000000000..30e3c5dcc --- /dev/null +++ b/build/charts/theia/values.yaml @@ -0,0 +1,79 @@ +clickhouse: + # -- Container image to use for the ClickHouse. + image: + repository: "projects.registry.vmware.com/antrea/theia-clickhouse-server" + pullPolicy: "IfNotPresent" + tag: "21.11" + monitor: + # -- Determine whether to run a monitor to periodically check the ClickHouse + # memory usage and clean data. + enable: true + # -- The storage percentage at which the monitor starts to delete old records. + # Vary from 0 to 1. + threshold: 0.5 + # -- The percentage of records in ClickHouse that will be deleted when the + # storage grows above threshold. Vary from 0 to 1. + deletePercentage: 0.5 + # -- Container image to use for the ClickHouse Monitor. + image: + repository: "projects.registry.vmware.com/antrea/theia-clickhouse-monitor" + pullPolicy: "IfNotPresent" + tag: "latest" + secret: + # -- ClickHouse username. It will be stored in a secret. + username: "clickhouse_operator" + # -- ClickHouse password. It will be stored in a secret. + password: "clickhouse_operator_password" + port: + # -- HTTP port number for the ClickHouse service. + http: 8123 + # -- TCP port number for the ClickHouse service. + tcp: 9000 + # -- ClickHouse storage size. Can be a plain integer or as a fixed-point + # number using one of these quantity suffixes: E, P, T, G, M, K. Or the + # power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. + storageSize: "8Gi" + # -- Time to live in seconds for data in the ClickHouse. + ttl: 3600 + persistentVolume: + # -- Enable deploying the ClickHouse with Persistent Volumes. + enable: false + # -- Persistent Volume Provisioner. Required if Persistent Volumes is enable. + # It must be one of "StorageClass", "Local", "NFS". + provisioner: "Local" + # -- The local path. Required when Persistent Volumes is enable and the + # provisioner is "Local". + localPath: "/data" + # -- Affinity for the Local Persistent Volume. Required when Persistent + # Volumes is enable and the provisioner is "Local". + affinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: antrea.io/clickhouse-data-node + operator: Exists + # -- The NFS server hostname or IP address. Required when Persistent Volumes + # is enable the provisioner is "NFS". + nfsHost: "" + # -- The path exported on the NFS server. Required when Persistent Volumes + # is enable the provisioner is "NFS". + nfsPath: "" + # -- The StorageClass used to dynamically provision the Persistent Volume. + # Required when Persistent Volumes is enable the provisioner is "StorageClass". + storageClass: "" +grafana: + # -- Container image to use for the Grafana. + image: + repository: "projects.registry.vmware.com/antrea/theia-grafana" + pullPolicy: "IfNotPresent" + tag: "8.3.3" + secret: + # -- Grafana username. It will be stored in a secret. + username: "admin" + # -- Grafana password. It will be stored in a secret. + password: "admin" + service: + # -- The type of service exposes Grafana. It must be one of NodePort or LoadBalancer. + type: NodePort + # -- TCP port number for the Grafana service. + tcpPort: 3000 diff --git a/build/yamls/base/clickhouse.yml b/build/yamls/base/clickhouse.yml deleted file mode 100644 index 70add9d97..000000000 --- a/build/yamls/base/clickhouse.yml +++ /dev/null @@ -1,76 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: clickhouse-secret -type: Opaque -stringData: - username: clickhouse_operator - password: clickhouse_operator_password ---- -apiVersion: "clickhouse.altinity.com/v1" -kind: "ClickHouseInstallation" -metadata: - name: clickhouse - labels: - app: clickhouse -spec: - configuration: - users: - clickhouse_operator/k8s_secret_password: flow-visibility/clickhouse-secret/password - clickhouse_operator/networks/ip: "::/0" - clusters: - - name: "clickhouse" - layout: - shardsCount: 1 - replicasCount: 1 - defaults: - templates: - podTemplate: pod-template - serviceTemplate: service-template - templates: - serviceTemplates: - - name: service-template - spec: - ports: - - name: http - port: 8123 - - name: tcp - port: 9000 - podTemplates: - - name: pod-template - spec: - containers: - - name: clickhouse - image: projects.registry.vmware.com/antrea/theia-clickhouse-server:21.11 - volumeMounts: - - name: clickhouse-configmap-volume - mountPath: /docker-entrypoint-initdb.d - - name: clickhouse-storage-volume - mountPath: /var/lib/clickhouse - - name: clickhouse-monitor - image: clickhouse-monitor - env: - - name: CLICKHOUSE_USERNAME - valueFrom: - secretKeyRef: - name: clickhouse-secret - key: username - - name: CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: clickhouse-secret - key: password - - name: DB_URL - value: "tcp://localhost:9000" - - name: TABLE_NAME - value: "default.flows" - - name: MV_NAMES - value: "default.flows_pod_view default.flows_node_view default.flows_policy_view" - volumes: - - name: clickhouse-configmap-volume - configMap: - name: $(CLICKHOUSE_CONFIG_MAP_NAME) - - name: clickhouse-storage-volume - emptyDir: - medium: Memory - sizeLimit: 8Gi diff --git a/build/yamls/base/kustomization.yaml b/build/yamls/base/kustomization.yaml new file mode 100644 index 000000000..fa6ed17ad --- /dev/null +++ b/build/yamls/base/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: flow-visibility + +resources: +- namespace.yaml +- manifest.yaml diff --git a/build/yamls/base/kustomization.yml b/build/yamls/base/kustomization.yml deleted file mode 100644 index a1d6c8294..000000000 --- a/build/yamls/base/kustomization.yml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: flow-visibility - -resources: -- clickhouse.yml -- grafana.yml - -configMapGenerator: -- name: grafana-datasource-provider - files: - - provisioning/datasources/datasource_provider.yml -- name: grafana-dashboard-provider - files: - - provisioning/dashboards/dashboard_provider.yml -- name: clickhouse-mounted-configmap - namespace: flow-visibility - files: - - provisioning/datasources/create_table.sh -- name: grafana-dashboard-config - files: - - provisioning/dashboards/flow_records_dashboard.json - - provisioning/dashboards/pod_to_pod_dashboard.json - - provisioning/dashboards/pod_to_service_dashboard.json - - provisioning/dashboards/pod_to_external_dashboard.json - - provisioning/dashboards/node_to_node_dashboard.json - - provisioning/dashboards/networkpolicy_allow_dashboard.json - -# CLICKHOUSE_CONFIG_MAP_NAME exports the value in `metadata.name` from `ConfigMap` named `clickhouse-mounted-configmap`, -# which is used for inserting the value to a CRD for an object of kind `ClickHouseInstallation` -vars: -- name: CLICKHOUSE_CONFIG_MAP_NAME - objref: - kind: ConfigMap - name: clickhouse-mounted-configmap - apiVersion: v1 - fieldref: - fieldpath: metadata.name - -configurations: -- kustomize-config.yml diff --git a/build/yamls/base/kustomize-config.yml b/build/yamls/base/kustomize-config.yml deleted file mode 100644 index 3e103590d..000000000 --- a/build/yamls/base/kustomize-config.yml +++ /dev/null @@ -1,5 +0,0 @@ -# These are the "extra" (non built-in) paths where Kustomize needs to look for variable -# substitutions if needed. The value of the variable comes from the ConfigMap declaration. -varReference: -- path: spec/templates/podTemplates/spec/volumes/configMap/name - kind: ClickHouseInstallation diff --git a/build/yamls/base/namespace.yaml b/build/yamls/base/namespace.yaml new file mode 100644 index 000000000..f85217da1 --- /dev/null +++ b/build/yamls/base/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: flow-visibility diff --git a/build/yamls/flow-visibility.yml b/build/yamls/flow-visibility.yml index 5d997e1d1..4a37ffff3 100644 --- a/build/yamls/flow-visibility.yml +++ b/build/yamls/flow-visibility.yml @@ -85,30 +85,31 @@ data: UInt64,\n throughputFromDestinationNode UInt64,\n reverseThroughputFromSourceNode UInt64,\n reverseThroughputFromDestinationNode UInt64,\n trusted UInt8 DEFAULT 0\n ) engine=MergeTree\n ORDER BY (timeInserted, flowEndSeconds)\n - \ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = - 3600;\n\n CREATE MATERIALIZED VIEW flows_pod_view\n ENGINE = SummingMergeTree\n - \ ORDER BY (\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n - \ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n - \ destinationIP,\n destinationServicePortName,\n flowType,\n - \ sourcePodNamespace,\n destinationPodNamespace)\n TTL timeInserted - + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n - \ AS SELECT\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n - \ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n - \ destinationIP,\n destinationServicePortName,\n flowType,\n - \ sourcePodNamespace,\n destinationPodNamespace,\n sum(octetDeltaCount) - AS octetDeltaCount,\n sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n - \ sum(throughput) AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n - \ sum(throughputFromSourceNode) AS throughputFromSourceNode,\n sum(throughputFromDestinationNode) - AS throughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n + \ TTL timeInserted + INTERVAL 3600 SECOND\n SETTINGS merge_with_ttl_timeout + = 3600;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS flows_pod_view\n ENGINE + = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n + \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ sourcePodName,\n destinationPodName,\n destinationIP,\n + \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n + \ destinationPodNamespace)\n TTL timeInserted + INTERVAL 3600 SECOND\n + \ SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourcePodName,\n destinationPodName,\n destinationIP,\n \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n - \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_node_view\n - \ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n + \ destinationPodNamespace,\n sum(octetDeltaCount) AS octetDeltaCount,\n + \ sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n sum(throughput) + AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n sum(throughputFromSourceNode) + AS throughputFromSourceNode,\n sum(throughputFromDestinationNode) AS throughputFromDestinationNode\n + \ FROM flows\n GROUP BY\n timeInserted,\n flowEndSeconds,\n \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ sourcePodName,\n destinationPodName,\n destinationIP,\n + \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n + \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS + flows_node_view\n ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n + \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n - \ destinationPodNamespace)\n TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS - merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n + \ destinationPodNamespace)\n TTL timeInserted + INTERVAL 3600 SECOND\n + \ SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n \ destinationPodNamespace,\n sum(octetDeltaCount) AS octetDeltaCount,\n @@ -120,13 +121,13 @@ data: AS reverseThroughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n - \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_policy_view\n - \ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n - \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS + flows_policy_view\n ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n + \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n \ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace)\n - \ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = - 3600\n POPULATE\n AS SELECT\n timeInserted,\n flowEndSeconds,\n + \ TTL timeInserted + INTERVAL 3600 SECOND\n SETTINGS merge_with_ttl_timeout + = 3600\n POPULATE\n AS SELECT\n timeInserted,\n flowEndSeconds,\n \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n \ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace,\n @@ -142,10 +143,10 @@ data: \ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace;\n\n \ CREATE TABLE IF NOT EXISTS recommendations (\n id String,\n type String,\n timeCreated DateTime,\n yamls String\n ) engine=MergeTree\n - \ ORDER BY (timeCreated);\n \nEOSQL\n" + \ ORDER BY (timeCreated);\n \nEOSQL" kind: ConfigMap metadata: - name: clickhouse-mounted-configmap-dkbmg82ctg + name: clickhouse-mounted-configmap namespace: flow-visibility --- apiVersion: v1 @@ -4681,12 +4682,12 @@ data: } kind: ConfigMap metadata: - name: grafana-dashboard-config-h4fg25d7k9 + name: grafana-dashboard-config namespace: flow-visibility --- apiVersion: v1 data: - dashboard_provider.yml: | + dashboard_provider.yaml: |- apiVersion: 1 providers: - name: grafana-dashboards @@ -4697,12 +4698,12 @@ data: path: /var/lib/grafana/dashboards kind: ConfigMap metadata: - name: grafana-dashboard-provider-m7d5kfmmc6 + name: grafana-dashboard-provider namespace: flow-visibility --- apiVersion: v1 data: - datasource_provider.yml: | + datasource_provider.yaml: |- apiVersion: 1 datasources: - name: ClickHouse @@ -4718,7 +4719,7 @@ data: password: $CLICKHOUSE_PASSWORD kind: ConfigMap metadata: - name: grafana-datasource-provider-h868k56k95 + name: grafana-datasource-provider namespace: flow-visibility --- apiVersion: v1 @@ -4875,13 +4876,13 @@ spec: persistentVolumeClaim: claimName: grafana-pvc - configMap: - name: grafana-datasource-provider-h868k56k95 + name: grafana-datasource-provider name: grafana-datasource-provider - configMap: - name: grafana-dashboard-provider-m7d5kfmmc6 + name: grafana-dashboard-provider name: grafana-dashboard-provider - configMap: - name: grafana-dashboard-config-h4fg25d7k9 + name: grafana-dashboard-config name: grafana-dashboard-config --- apiVersion: clickhouse.altinity.com/v1 @@ -4911,6 +4912,7 @@ spec: spec: containers: - image: projects.registry.vmware.com/antrea/theia-clickhouse-server:21.11 + imagePullPolicy: IfNotPresent name: clickhouse volumeMounts: - mountPath: /docker-entrypoint-initdb.d @@ -4934,12 +4936,18 @@ spec: value: default.flows - name: MV_NAMES value: default.flows_pod_view default.flows_node_view default.flows_policy_view + - name: STORAGE_SIZE + value: 8Gi + - name: THRESHOLD + value: "0.5" + - name: DELETE_PERCENTAGE + value: "0.5" image: projects.registry.vmware.com/antrea/theia-clickhouse-monitor:latest imagePullPolicy: IfNotPresent name: clickhouse-monitor volumes: - configMap: - name: clickhouse-mounted-configmap-dkbmg82ctg + name: clickhouse-mounted-configmap name: clickhouse-configmap-volume - emptyDir: medium: Memory diff --git a/build/yamls/patches/dev/imagePullPolicy.yml b/build/yamls/patches/dev/imagePullPolicy.yml deleted file mode 100644 index 1045d54a7..000000000 --- a/build/yamls/patches/dev/imagePullPolicy.yml +++ /dev/null @@ -1,3 +0,0 @@ -- op: add - path: /spec/templates/podTemplates/0/spec/containers/1/imagePullPolicy - value: IfNotPresent diff --git a/build/yamls/patches/release/.gitignore b/build/yamls/patches/release/.gitignore deleted file mode 100644 index fdffa2a0f..000000000 --- a/build/yamls/patches/release/.gitignore +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/docs/network-flow-visibility.md b/docs/network-flow-visibility.md index fcc460f96..eba788a55 100644 --- a/docs/network-flow-visibility.md +++ b/docs/network-flow-visibility.md @@ -10,6 +10,9 @@ - [Deployment Steps](#deployment-steps) - [Credentials Configuration](#credentials-configuration) - [ClickHouse Configuration](#clickhouse-configuration) + - [Service Customization](#service-customization) + - [Performance Configuration](#performance-configuration) + - [Persistent Volumes](#persistent-volumes) - [Pre-built Dashboards](#pre-built-dashboards) - [Flow Records Dashboard](#flow-records-dashboard) - [Pod-to-Pod Flows Dashboard](#pod-to-pod-flows-dashboard) @@ -56,14 +59,29 @@ ClickHouse as the data storage, and use Grafana as the data visualization and mo ### Deployment Steps -To deploy the Grafana Flow Collector, the first step is to install the ClickHouse -Operator, which creates, configures and manages ClickHouse clusters. Check the [homepage](https://github.com/Altinity/clickhouse-operator) -for more information about the ClickHouse Operator. Current checked-in yaml is based on their -[v0.18.2](https://github.com/Altinity/clickhouse-operator/blob/refs/tags/0.18.2/deploy/operator/clickhouse-operator-install-bundle.yaml) released version. Running the following command -will install ClickHouse Operator into `kube-system` Namespace. +We support deploying the Grafana Flow Collector with Helm. Here is the +[Helm chart](../build/charts/theia/) for the Grafana Flow Collector. Please follow +the instructions from the Helm chart [README](../build/charts/theia/README.md) +to customize the installation. + +You can clone the repository and run the following command to install the Grafana +Flow Collector into Namespace `flow-visibility`. + +```bash +helm install -f theia ./build/charts/theia -n flow-visibility --create-namespace +``` + +We recommend using Helm to deploy the Grafana Flow Collector. But if you prefer +not to clone the repository, you can mannually deploy it. The first step is to +install the ClickHouse Operator, which creates, configures and manages ClickHouse +clusters. Check the [homepage](https://github.com/Altinity/clickhouse-operator) +for more information about the ClickHouse Operator. Current checked-in yaml is +based on their [v0.18.2](https://github.com/Altinity/clickhouse-operator/blob/refs/tags/0.18.2/deploy/operator/clickhouse-operator-install-bundle.yaml) +released version. Running the following command will install ClickHouse Operator +into `kube-system` Namespace. ```bash -kubectl apply -f https://raw.githubusercontent.com/antrea-io/theia/main/build/yamls/clickhouse-operator-install-bundle.yaml +kubectl apply -f https://raw.githubusercontent.com/antrea-io/theia/main/build/charts/theia/crds/clickhouse-operator-install-bundle.yaml ``` To deploy a released version of the Grafana Flow Collector, find a deployment manifest @@ -151,11 +169,21 @@ You should be able to see a Grafana login page. Login credentials: - username: admin - password: admin -To stop the Grafana Flow Collector, run the following commands: +To stop the Grafana Flow Collector, run the following commands if you deploy it +by Helm: + +```shell +helm uninstall theia -n flow-visibility +kubectl delete namespace flow-visibility +kubectl delete -f https://raw.githubusercontent.com/antrea-io/theia/main/build/charts/theia/crds/clickhouse-operator-install-bundle.yaml -n kube-system +``` + +Run the following commands if you deploy it by the generated manifest available +online: ```shell kubectl delete -f flow-visibility.yml -kubectl delete -f https://raw.githubusercontent.com/antrea-io/theia/main/build/yamls/clickhouse-operator-install-bundle.yml -n kube-system +kubectl delete -f https://raw.githubusercontent.com/antrea-io/theia/main/build/charts/theia/crds/clickhouse-operator-install-bundle.yaml -n kube-system ``` #### Credentials Configuration @@ -230,6 +258,8 @@ Collector in production. #### ClickHouse Configuration +##### Service Customization + The ClickHouse database can be accessed through the Service `clickhouse-clickhouse`. The Pod exposes HTTP port at 8123 and TCP port at 9000 by default. The ports are specified in `flow-visibility.yml` as `serviceTemplates` of a `ClickHouseInstallation` @@ -278,6 +308,8 @@ metadata: namespace: flow-visibility ``` +##### Performance Configuration + The ClickHouse throughput depends on two factors - the storage size of the ClickHouse and the time interval between the batch commits to the ClickHouse. Larger storage size and longer commit interval provide higher throughput. @@ -297,11 +329,161 @@ storage size, please modify the `sizeLimit` in the following section. name: clickhouse-storage-volume ``` +To deploy ClickHouse with Persistent Volumes and limited storage size, please refer +to [Persistent Volumes](#persistent-volumes). + The time interval between the batch commits to the ClickHouse is specified in the [Flow Aggregator Configuration](https://github.com/antrea-io/antrea/blob/main/docs/network-flow-visibility.md#configuration-1) as `commitInterval`. The ClickHouse throughput grows sightly when the commit interval grows from 1s to 8s. A commit interval larger than 8s provides little improvement on the throughput. +##### Persistent Volumes + +By default, ClickHouse is deployed in memory. We support deploying ClickHouse with +Persistent Volumes. + +[PersistentVolume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) +(PV) is a piece of storage in the K8s cluster, which requires to be manually +provisioned by an administrator or dynamically provisioned using Storage Classes. +A PersistentVolumeClaim (PVC) is a request for storage which consumes PV. As +ClickHouse is deployed as a StatefulSet, the volume can be claimed using +`volumeClaimTemplate`. + +Please follow the steps below to deploy the ClickHouse with Persistent Volumes: + +1. Provision the PersistentVolume. K8s supports a great number of +[PersistentVolume types](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#types-of-persistent-volumes). +You can provision your own PersistentVolume per your requirements. Here are +two simple examples for your reference. + + Before creating the PV manually, you need to create a `StorageClass` shown + in the section below. + + ```yaml + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: clickhouse-storage + provisioner: kubernetes.io/no-provisioner + volumeBindingMode: WaitForFirstConsumer + reclaimPolicy: Retain + allowVolumeExpansion: True + ``` + + After the `StorageClass` is created, you can create a Local PV or a NFS PV + by following the steps below. + + - Local PV allows you to store the ClickHouse data at a pre-defined path on + a specific Node. Refer to [createLocalPv.yml][local_pv_yaml] to create the + PV. Please replace `LOCAL_PATH` with the path to store the ClickHouse data + and label the Node used to store the ClickHouse data with + `antrea.io/clickhouse-data-node=`. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: clickhouse-pv + spec: + storageClassName: clickhouse-storage + capacity: + storage: 8Gi + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + local: + path: LOCAL_PATH + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: antrea.io/clickhouse-data-node + operator: Exists + ``` + + - NFS PV allows you to store the ClickHouse data on an existing NFS server. + Refer to the section below to create the PV. Please replace `NFS_SERVER_ADDRESS` + with the host name of the NFS server and `NFS_SERVER_PATH` with the exported + path on the NFS server. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: clickhouse-pv + spec: + storageClassName: clickhouse-storage + capacity: + storage: 8Gi + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + nfs: + path: NFS_SERVER_PATH + server: NFS_SERVER_ADDRESS + ``` + + In both examples, you can set `.spec.capacity.storage` in PersistentVolume + to your storage size. This value is for informative purpose as K8s does not + enforce the capacity of PVs. If you want to limit the storage usage, you need + to ask for your storage system to enforce that. For example, you can create + a Local PV on a partition with the limited size. We recommend using a dedicated + saving space for the ClickHouse if you are going to run the Flow Collector in + production. + + As these examples do not use any dynamic provisioner, the reclaim policy + for the PVs is `Retain` by default. After stopping the Grafana Flow Collector, + if you no long need the data for future use, you may need to manually clean + up the data on the local disk or NFS server. + +1. Request the PV for ClickHouse. Please add a `volumeClaimTemplate` section +under `.spec.templates` for the resource `ClickHouseInstallation` in +`flow-visibility.yml` as shown in the example below. `storageClassName` should +be set to your own `StorageClass` name, and `.resources.requests.storage` +should be set to your storage size. + + ```yaml + volumeClaimTemplates: + - name: clickhouse-storage-template + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 8Gi + storageClassName: clickhouse-storage + ``` + + Then add this template as `dataVolumeClaimTemplate` to the section below. + + ```yaml + defaults: + templates: + dataVolumeClaimTemplate: clickhouse-storage-template + podTemplate: pod-template + serviceTemplate: service-template + ``` + +1. Remove the in-memory related deployment options, by removing the appropriate +`volume` and `volumeMount` for the `ClickHouseInstallation` resource in +`flow-visibility.yml`. + + The `volumeMounts` entry to be removed is the following one: + + ```yaml + - mountPath: /var/lib/clickhouse + name: clickhouse-storage-volume + ``` + + The `volumes` entry to be removed is the following one: + + ```yaml + - emptyDir: + medium: Memory + sizeLimit: 8Gi + name: clickhouse-storage-volume + ``` + ### Pre-built Dashboards The following dashboards are pre-built and are recommended for Antrea flow @@ -411,26 +593,15 @@ are two ways to import the dashboard depending on your needs: like our pre-built dashboards, generate a deployment manifest with the changes by following the steps below: -1. Clone the repository. Exported dashboard JSON files should be placed under `antrea/build/yamls/base/provisioning/dashboards`. -1. If a new dashboard is added, edit [kustomization.yml][flow_visibility_kustomization_yaml] -by adding the file in the following section: +1. Clone the repository. Exported dashboard JSON files should be placed under `theia/build/charts/theia/provisioning/dashboards`. +1. Deploy the Grafana Flow Collector with Helm by running - ```yaml - - name: grafana-dashboard-config - files: - - provisioning/dashboards/flow_records_dashboard.json - - provisioning/dashboards/pod_to_pod_dashboard.json - - provisioning/dashboards/pod_to_service_dashboard.json - - provisioning/dashboards/pod_to_external_dashboard.json - - provisioning/dashboards/node_to_node_dashboard.json - - provisioning/dashboards/networkpolicy_allow_dashboard.json - - provisioning/dashboards/[new_dashboard_name].json + ```bash + helm install -f theia ./build/charts/theia -n flow-visibility --create-namespace ``` -1. Generate the new YAML manifest by running: + Or generate the new YAML manifest by running: -```bash -./hack/generate-manifest.sh > build/yamls/flow-visibility.yml -``` - -[flow_visibility_kustomization_yaml]: ../build/yamls/base/kustomization.yml + ```bash + ./hack/generate-manifest.sh > build/yamls/flow-visibility.yml + ``` diff --git a/go.mod b/go.mod index 21f4f033d..e7980aceb 100644 --- a/go.mod +++ b/go.mod @@ -50,7 +50,7 @@ require ( github.com/evanphx/json-patch v4.11.0+incompatible // indirect github.com/fatih/color v1.10.0 // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect - github.com/go-logr/logr v0.4.0 // indirect + github.com/go-logr/logr v1.2.0 // indirect github.com/go-openapi/jsonpointer v0.19.3 // indirect github.com/go-openapi/jsonreference v0.19.3 // indirect github.com/go-openapi/spec v0.19.5 // indirect diff --git a/go.sum b/go.sum index 46e419b4b..3b04e5be0 100644 --- a/go.sum +++ b/go.sum @@ -245,8 +245,9 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v1.2.0 h1:QK40JKJyMdUDz+h+xvCsru/bJhvG0UxvePV0ufL/AcE= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/zapr v0.1.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-logr/zapr v0.4.0 h1:uc1uML3hRYL9/ZZPdgHS/n8Nzo+eaYL/Efxkkamf7OM= github.com/go-logr/zapr v0.4.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= diff --git a/hack/generate-manifest.sh b/hack/generate-manifest.sh index 48662c951..d1a241c20 100755 --- a/hack/generate-manifest.sh +++ b/hack/generate-manifest.sh @@ -21,15 +21,14 @@ function echoerr { } _usage="Usage: $0 [--mode (dev|release)] [--keep] [--help|-h] -Generate a YAML manifest for the Clickhouse-Grafana Flow-visibility Solution, using Kustomize, and -print it to stdout. +Generate a YAML manifest for the Clickhouse-Grafana Flow-visibility Solution, using Helm and +Kustomize, and print it to stdout. --mode (dev|release) Choose the configuration variant that you need (default is 'dev') - --keep Debug flag which will preserve the generated kustomization.yml - -This tool uses kustomize (https://github.com/kubernetes-sigs/kustomize) to generate manifests for -Clickhouse-Grafana Flow-visibility Solution. You can set the KUSTOMIZE environment variable to the -path of the kustomize binary you want us to use. Otherwise we will look for kustomize in your PATH -and your GOPATH. If we cannot find kustomize there, we will try to install it." +This tool uses Helm 3 (https://helm.sh/) and Kustomize (https://github.com/kubernetes-sigs/kustomize) +to generate manifests for Antrea. You can set the HELM and KUSTOMIZE environment variable to +the path of the helm and kustomize binary you want us to use. Otherwise we will download the +appropriate version of the helm and kustomize binary and use it (this is the recommended +approach since different versions of helm and kustomize may create different output YAMLs)." function print_usage { echoerr "$_usage" @@ -40,7 +39,6 @@ function print_help { } MODE="dev" -KEEP=false while [[ $# -gt 0 ]] do @@ -51,10 +49,6 @@ case $key in MODE="$2" shift 2 ;; - --keep) - KEEP=true - shift - ;; -h|--help) print_usage exit 0 @@ -86,6 +80,16 @@ fi THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +source $THIS_DIR/verify-helm.sh + +if [ -z "$HELM" ]; then + HELM="$(verify_helm)" +elif ! $HELM version > /dev/null 2>&1; then + echoerr "$HELM does not appear to be a valid helm binary" + print_help + exit 1 +fi + source $THIS_DIR/verify-kustomize.sh if [ -z "$KUSTOMIZE" ]; then @@ -96,36 +100,32 @@ elif ! $KUSTOMIZE version > /dev/null 2>&1; then exit 1 fi -KUSTOMIZATION_DIR=$THIS_DIR/../build/yamls - -TMP_DIR=$(mktemp -d $KUSTOMIZATION_DIR/overlays.XXXXXXXX) - -pushd $TMP_DIR > /dev/null - -BASE=../../base - -mkdir $MODE && cd $MODE -touch kustomization.yml -$KUSTOMIZE edit add base $BASE -# ../../patches/$MODE may be empty so we use find and not simply cp -find ../../patches/$MODE -name \*.yml -exec cp {} . \; - -if [ "$MODE" == "dev" ]; then - $KUSTOMIZE edit set image clickhouse-monitor=projects.registry.vmware.com/antrea/theia-clickhouse-monitor:latest - $KUSTOMIZE edit add patch --path imagePullPolicy.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse -fi +HELM_TMP_DIR=$(mktemp -d $THIS_DIR/../build/yamls/chart-values.XXXXXXXX) +EXTRA_VALUES="" if [ "$MODE" == "release" ]; then - $KUSTOMIZE edit set image clickhouse-monitor=$IMG_NAME:$IMG_TAG + EXTRA_VALUES="--set clickhouse.monitorImage.repository=$IMG_NAME,clickhouse.monitorImage.tag=$IMG_TAG" fi +THEIA_CHART=$THIS_DIR/../build/charts/theia +KUSTOMIZATION_DIR=$THIS_DIR/../build/yamls +# intermediate manifest +MANIFEST=$KUSTOMIZATION_DIR/base/manifest.yaml +# Suppress potential Helm warnings about invalid permissions for Kubeconfig file +# by throwing away related warnings. +$HELM template \ + --namespace flow-visibility \ + $EXTRA_VALUES \ + "$THEIA_CHART"\ + 2> >(grep -v 'This is insecure' >&2)\ + > $MANIFEST + +# Add flow-visibility Namespace resource by Kustomize +KUSTOMIZE_TMP_DIR=$(mktemp -d $KUSTOMIZATION_DIR/overlays.XXXXXXXX) +cd $KUSTOMIZATION_DIR/base $KUSTOMIZE build -popd > /dev/null - - -if $KEEP; then - echoerr "Kustomization file is at $TMP_DIR/$MODE/kustomization.yml" -else - rm -rf $TMP_DIR -fi +# clean +rm -rf $MANIFEST +rm -rf $HELM_TMP_DIR +rm -rf $KUSTOMIZE_TMP_DIR diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh new file mode 100644 index 000000000..40f2929b5 --- /dev/null +++ b/hack/verify-helm.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# Copyright 2022 Antrea Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +_BINDIR="$THIS_DIR/.bin" +# Must be an exact match, as the generated YAMLs may not be consistent across +# versions +_HELM_VERSION="v3.8.1" + +# Ensure the helm tool exists and is the correct version, or install it +verify_helm() { + # Check if there is already a helm binary in $_BINDIR and if yes, check if + # the version matches the expected one. + local helm="$(PATH=$_BINDIR command -v helm)" + if [ -x "$helm" ]; then + # Verify version if helm was already installed. + local helm_version="$($helm version --short 2> >(grep -v 'This is insecure' >&2))" + # Should work with: + # - v3.8.1 + # - v3.8.1+g5cb9af4 + helm_version="${helm_version%+*}" + if [ "${helm_version}" == "${_HELM_VERSION}" ]; then + # If version is exact match, stop here. + echo "$helm" + return 0 + fi + >&2 echo "Detected helm version ($helm_version) does not match expected one ($_HELM_VERSION), installing correct version" + fi + local ostype="" + if [[ "$OSTYPE" == "linux-gnu" ]]; then + ostype="linux" + elif [[ "$OSTYPE" == "darwin"* ]]; then + ostype="darwin" + else + >&2 echo "Unsupported OS type $OSTYPE" + return 1 + fi + rc=0 + local unameArch="$(uname -m)" || rc=$? + if [ $rc -ne 0 ]; then + >&2 echo "Cannot detect architecture type, uname not available?" + return 1 + fi + local arch="" + case "$unameArch" in + x86_64) arch="amd64";; + arm64) arch="arm64";; + *) >&2 echo "Unsupported architecture type $unameArch"; return 1;; + esac + + >&2 echo "Installing helm" + local helm_url="https://get.helm.sh/helm-${_HELM_VERSION}-${ostype}-${arch}.tar.gz" + curl -sLo helm.tar.gz "${helm_url}" || return 1 + mkdir -p "$_BINDIR" || return 1 + tar -xzf helm.tar.gz -C "$_BINDIR" --strip-components=1 "${ostype}-${arch}/helm" || return 1 + rm -f helm.tar.gz + helm="$_BINDIR/helm" + echo "$helm" + return 0 +} diff --git a/plugins/clickhouse-monitor/main.go b/plugins/clickhouse-monitor/main.go index cc1f6c61e..be1a72f64 100644 --- a/plugins/clickhouse-monitor/main.go +++ b/plugins/clickhouse-monitor/main.go @@ -18,19 +18,17 @@ import ( "database/sql" "fmt" "os" + "strconv" "strings" "time" "github.com/ClickHouse/clickhouse-go" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" ) const ( - // The storage percentage at which the monitor starts to delete old records. By default, if the storage usage is larger than 50%, it starts to delete the old records. - threshold = 0.5 - // The percentage of records in ClickHouse that will be deleted when the storage grows above threshold. - deletePercentage = 0.5 // The monitor stops for 3 intervals after a deletion to wait for the ClickHouse MergeTree Engine to release memory. skipRoundsNum = 3 // Connection to ClickHouse times out if it fails for 1 minute. @@ -48,18 +46,46 @@ const ( ) var ( + // Storage size allocated for the ClickHouse in number of bytes + allocatedSpace uint64 // The name of the table to store the flow records tableName = os.Getenv("TABLE_NAME") // The names of the materialized views mvNames = strings.Split(os.Getenv("MV_NAMES"), " ") // The remaining number of rounds to be skipped remainingRoundsNum = 0 + // The storage percentage at which the monitor starts to delete old records. + threshold float64 + // The percentage of records in ClickHouse that will be deleted when the storage grows above threshold. + deletePercentage float64 ) func main() { // Check environment variables - if len(tableName) == 0 || len(mvNames) == 0 { - klog.ErrorS(nil, "Unable to load environment variables, TABLE_NAME and MV_NAMES must be defined") + allocatedSpaceStr := os.Getenv("STORAGE_SIZE") + thresholdStr := os.Getenv("THRESHOLD") + deletePercentageStr := os.Getenv("DELETE_PERCENTAGE") + + if len(tableName) == 0 || len(mvNames) == 0 || len(allocatedSpaceStr) == 0 || len(thresholdStr) == 0 || len(deletePercentageStr) == 0 { + klog.ErrorS(nil, "Unable to load environment variables, TABLE_NAME, MV_NAMES, STORAGE_SIZE, THRESHOLD and DELETE_PERCENTAGE must be defined") + return + } + var err error + quantity, err := resource.ParseQuantity(allocatedSpaceStr) + if err != nil { + klog.ErrorS(err, "Error when parsing STORAGE_SIZE") + return + } + allocatedSpace = uint64(quantity.Value()) + + threshold, err = strconv.ParseFloat(thresholdStr, 64) + if err != nil { + klog.ErrorS(err, "Error when parsing THRESHOLD") + return + } + deletePercentage, err = strconv.ParseFloat(deletePercentageStr, 64) + if err != nil { + klog.ErrorS(err, "Error when parsing DELETE_PERCENTAGE") return } @@ -68,6 +94,7 @@ func main() { klog.ErrorS(err, "Error when connecting to ClickHouse") os.Exit(1) } + checkStorageCondition(connect) wait.Forever(func() { // The monitor stops working for several rounds after a deletion // as the release of memory space by the ClickHouse MergeTree engine requires time @@ -118,28 +145,69 @@ func connectLoop() (*sql.DB, error) { return connect, nil } -// Checks the memory usage in the ClickHouse, and deletes records when it exceeds the threshold. -func monitorMemory(connect *sql.DB) { +// Check if ClickHouse shares storage space with other software +func checkStorageCondition(connect *sql.DB) { var ( freeSpace uint64 + usedSpace uint64 totalSpace uint64 ) - // Get memory usage from ClickHouse system table + getDiskUsage(connect, &freeSpace, &totalSpace) + getClickHouseUsage(connect, &usedSpace) + availablePercentage := float64(freeSpace+usedSpace) / float64(totalSpace) + klog.InfoS("Low available percentage implies ClickHouse does not save data on a dedicated disk", "availablePercentage", availablePercentage) +} + +func getDiskUsage(connect *sql.DB, freeSpace *uint64, totalSpace *uint64) { + // Get free space from ClickHouse system table if err := wait.PollImmediate(queryRetryInterval, queryTimeout, func() (bool, error) { - if err := connect.QueryRow("SELECT free_space, total_space FROM system.disks").Scan(&freeSpace, &totalSpace); err != nil { - klog.ErrorS(err, "Failed to get memory usage for ClickHouse") + if err := connect.QueryRow("SELECT free_space, total_space FROM system.disks").Scan(freeSpace, totalSpace); err != nil { + klog.ErrorS(err, "Failed to get the disk usage") return false, nil } else { return true, nil } }); err != nil { - klog.ErrorS(err, "Failed to get memory usage for ClickHouse", "timeout", queryTimeout) + klog.ErrorS(err, "Failed to get the disk usage", "timeout", queryTimeout) return } +} + +func getClickHouseUsage(connect *sql.DB, usedSpace *uint64) { + // Get space usage from ClickHouse system table + if err := wait.PollImmediate(queryRetryInterval, queryTimeout, func() (bool, error) { + if err := connect.QueryRow("SELECT SUM(bytes) FROM system.parts").Scan(usedSpace); err != nil { + klog.ErrorS(err, "Failed to get the used space size by the ClickHouse") + return false, nil + } else { + return true, nil + } + }); err != nil { + klog.ErrorS(err, "Failed to get the used space size by the ClickHouse", "timeout", queryTimeout) + return + } +} + +// Checks the memory usage in the ClickHouse, and deletes records when it exceeds the threshold. +func monitorMemory(connect *sql.DB) { + var ( + freeSpace uint64 + usedSpace uint64 + totalSpace uint64 + ) + getDiskUsage(connect, &freeSpace, &totalSpace) + getClickHouseUsage(connect, &usedSpace) + + // Total space for ClickHouse is the smaller one of the user allocated space size and the actual space size on the disk + if (freeSpace + usedSpace) < allocatedSpace { + totalSpace = freeSpace + usedSpace + } else { + totalSpace = allocatedSpace + } // Calculate the memory usage - usagePercentage := float64(totalSpace-freeSpace) / float64(totalSpace) - klog.InfoS("Memory usage", "total", totalSpace, "used", totalSpace-freeSpace, "percentage", usagePercentage) + usagePercentage := float64(usedSpace) / float64(totalSpace) + klog.InfoS("Memory usage", "total", totalSpace, "used", usedSpace, "percentage", usagePercentage) // Delete records when memory usage is larger than threshold if usagePercentage > threshold { timeBoundary, err := getTimeBoundary(connect) @@ -169,7 +237,7 @@ func getTimeBoundary(connect *sql.DB) (time.Time, error) { if err != nil { return timeBoundary, err } - command := fmt.Sprintf("SELECT timeInserted FROM %s LIMIT 1 OFFSET %d", tableName, deleteRowNum) + command := fmt.Sprintf("SELECT timeInserted FROM %s LIMIT 1 OFFSET %d", tableName, deleteRowNum-1) if err := wait.PollImmediate(queryRetryInterval, queryTimeout, func() (bool, error) { if err := connect.QueryRow(command).Scan(&timeBoundary); err != nil { klog.ErrorS(err, "Failed to get timeInserted boundary", "table name", tableName) diff --git a/plugins/clickhouse-monitor/main_test.go b/plugins/clickhouse-monitor/main_test.go index 2de36cbfe..7a0ca57ce 100644 --- a/plugins/clickhouse-monitor/main_test.go +++ b/plugins/clickhouse-monitor/main_test.go @@ -45,11 +45,13 @@ func TestMonitor(t *testing.T) { func testMonitorMemoryWithDeletion(t *testing.T, db *sql.DB, mock sqlmock.Sqlmock) { baseTime := time.Now() diskRow := sqlmock.NewRows([]string{"free_space", "total_space"}).AddRow(4, 10) + partsRow := sqlmock.NewRows([]string{"SUM(bytes)"}).AddRow(5) countRow := sqlmock.NewRows([]string{"count"}).AddRow(10) timeRow := sqlmock.NewRows([]string{"timeInserted"}).AddRow(baseTime.Add(5 * time.Second)) mock.ExpectQuery("SELECT free_space, total_space FROM system.disks").WillReturnRows(diskRow) + mock.ExpectQuery("SELECT SUM(bytes) FROM system.parts").WillReturnRows(partsRow) mock.ExpectQuery("SELECT COUNT() FROM flows").WillReturnRows(countRow) - mock.ExpectQuery("SELECT timeInserted FROM flows LIMIT 1 OFFSET 5").WillReturnRows(timeRow) + mock.ExpectQuery("SELECT timeInserted FROM flows LIMIT 1 OFFSET 4").WillReturnRows(timeRow) for _, table := range []string{"flows", "flows_pod_view", "flows_node_view", "flows_policy_view"} { command := fmt.Sprintf("ALTER TABLE %s DELETE WHERE timeInserted < toDateTime('%v')", table, baseTime.Add(5*time.Second).Format(timeFormat)) mock.ExpectExec(command).WillReturnResult(sqlmock.NewResult(0, 5)) @@ -57,6 +59,10 @@ func testMonitorMemoryWithDeletion(t *testing.T, db *sql.DB, mock sqlmock.Sqlmoc tableName = "flows" mvNames = []string{"flows_pod_view", "flows_node_view", "flows_policy_view"} + allocatedSpace = 10 + threshold = 0.5 + deletePercentage = 0.5 + monitorMemory(db) if err := mock.ExpectationsWereMet(); err != nil { @@ -67,8 +73,13 @@ func testMonitorMemoryWithDeletion(t *testing.T, db *sql.DB, mock sqlmock.Sqlmoc func testMonitorMemoryWithoutDeletion(t *testing.T, db *sql.DB, mock sqlmock.Sqlmock) { diskRow := sqlmock.NewRows([]string{"free_space", "total_space"}).AddRow(6, 10) + partsRow := sqlmock.NewRows([]string{"SUM(bytes)"}).AddRow(5) mock.ExpectQuery("SELECT free_space, total_space FROM system.disks").WillReturnRows(diskRow) + mock.ExpectQuery("SELECT SUM(bytes) FROM system.parts").WillReturnRows(partsRow) + allocatedSpace = 10 + threshold = 0.5 + deletePercentage = 0.5 monitorMemory(db) if err := mock.ExpectationsWereMet(); err != nil {