Skip to content

Commit

Permalink
Antrea Prometheus integration (#236)
Browse files Browse the repository at this point in the history
Integrate with Prometheus monitoring solution.
Integration of the Prometheus client into Antrea controller and agent
allows the exposure of various metrics to Prometheus server.
In addition to Antrea's own set of metrics, Prometheus client will also
expose metrics which are defined by various components which are part of
the Antrea ecosystem, e.g golang, Prometheus itself etc.
  • Loading branch information
ksamoray committed Mar 22, 2020
1 parent 938169e commit e15a3e6
Show file tree
Hide file tree
Showing 12 changed files with 279 additions and 13 deletions.
35 changes: 30 additions & 5 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ subjects:
---
apiVersion: v1
data:
antrea-agent.conf: |
antrea-agent.conf: |-
# Name of the OpenVSwitch bridge antrea-agent will create and use.
# Make sure it doesn't conflict with your existing OpenVSwitch bridges.
#ovsBridge: br-int
Expand Down Expand Up @@ -281,6 +281,15 @@ data:
# Underlying network must be capable of supporting Pod traffic across IP subnet.
# hybrid: noEncap if worker Nodes on same subnet, otherwise encap.
#trafficEncapMode: encap
# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener
#enablePrometheusMetrics: false
# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false
# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
antrea-cni.conf: |
{
"cniVersion":"0.3.0",
Expand All @@ -290,13 +299,21 @@ data:
"type": "host-local"
}
}
antrea-controller.conf: ""
antrea-controller.conf: |-
# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener.
#enablePrometheusMetrics: false
# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false
# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
kind: ConfigMap
metadata:
annotations: {}
labels:
app: antrea
name: antrea-config-ghc6hct4mg
name: antrea-config-ffmk8fd5g4
namespace: kube-system
---
apiVersion: v1
Expand All @@ -311,6 +328,10 @@ type: Opaque
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/port: "443"
prometheus.io/scheme: https
prometheus.io/scrape: "true"
labels:
app: antrea
name: antrea
Expand Down Expand Up @@ -398,7 +419,7 @@ spec:
key: node-role.kubernetes.io/master
volumes:
- configMap:
name: antrea-config-ghc6hct4mg
name: antrea-config-ffmk8fd5g4
name: antrea-config
---
apiVersion: apiregistration.k8s.io/v1
Expand Down Expand Up @@ -432,6 +453,10 @@ spec:
component: antrea-agent
template:
metadata:
annotations:
prometheus.io/port: "10443"
prometheus.io/scheme: https
prometheus.io/scrape: "true"
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -598,7 +623,7 @@ spec:
operator: Exists
volumes:
- configMap:
name: antrea-config-ghc6hct4mg
name: antrea-config-ffmk8fd5g4
name: antrea-config
- hostPath:
path: /etc/cni/net.d
Expand Down
35 changes: 30 additions & 5 deletions build/yamls/antrea.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ subjects:
---
apiVersion: v1
data:
antrea-agent.conf: |
antrea-agent.conf: |-
# Name of the OpenVSwitch bridge antrea-agent will create and use.
# Make sure it doesn't conflict with your existing OpenVSwitch bridges.
#ovsBridge: br-int
Expand Down Expand Up @@ -281,6 +281,15 @@ data:
# Underlying network must be capable of supporting Pod traffic across IP subnet.
# hybrid: noEncap if worker Nodes on same subnet, otherwise encap.
#trafficEncapMode: encap
# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener
#enablePrometheusMetrics: false
# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false
# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
antrea-cni.conf: |
{
"cniVersion":"0.3.0",
Expand All @@ -290,18 +299,30 @@ data:
"type": "host-local"
}
}
antrea-controller.conf: ""
antrea-controller.conf: |-
# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener.
#enablePrometheusMetrics: false
# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false
# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
kind: ConfigMap
metadata:
annotations: {}
labels:
app: antrea
name: antrea-config-4gb24b784b
name: antrea-config-k49g4578m4
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/port: "443"
prometheus.io/scheme: https
prometheus.io/scrape: "true"
labels:
app: antrea
name: antrea
Expand Down Expand Up @@ -389,7 +410,7 @@ spec:
key: node-role.kubernetes.io/master
volumes:
- configMap:
name: antrea-config-4gb24b784b
name: antrea-config-k49g4578m4
name: antrea-config
---
apiVersion: apiregistration.k8s.io/v1
Expand Down Expand Up @@ -423,6 +444,10 @@ spec:
component: antrea-agent
template:
metadata:
annotations:
prometheus.io/port: "10443"
prometheus.io/scheme: https
prometheus.io/scrape: "true"
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -557,7 +582,7 @@ spec:
operator: Exists
volumes:
- configMap:
name: antrea-config-4gb24b784b
name: antrea-config-k49g4578m4
name: antrea-config
- hostPath:
path: /etc/cni/net.d
Expand Down
4 changes: 4 additions & 0 deletions build/yamls/base/agent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ spec:
type: RollingUpdate
template:
metadata:
annotations:
prometheus.io/port: "10443"
prometheus.io/scrape: "true"
prometheus.io/scheme: "https"
labels:
component: antrea-agent
spec:
Expand Down
9 changes: 9 additions & 0 deletions build/yamls/base/conf/antrea-agent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,12 @@
# Underlying network must be capable of supporting Pod traffic across IP subnet.
# hybrid: noEncap if worker Nodes on same subnet, otherwise encap.
#trafficEncapMode: encap

# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener
#enablePrometheusMetrics: false

# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false

# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
8 changes: 8 additions & 0 deletions build/yamls/base/conf/antrea-controller.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener.
#enablePrometheusMetrics: false

# Enable golang metrics exposure via Prometheus.
#enablePrometheusGoMetrics: false

# Enable process metrics exposure via Prometheus.
#enablePrometheusProcessMetrics: false
4 changes: 4 additions & 0 deletions build/yamls/base/controller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/port: "443"
prometheus.io/scrape: "true"
prometheus.io/scheme: "https"
name: antrea
spec:
ports:
Expand Down
8 changes: 8 additions & 0 deletions cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/vmware-tanzu/antrea/pkg/agent/controller/networkpolicy"
"github.com/vmware-tanzu/antrea/pkg/agent/controller/noderoute"
"github.com/vmware-tanzu/antrea/pkg/agent/interfacestore"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow"
"github.com/vmware-tanzu/antrea/pkg/agent/route"
"github.com/vmware-tanzu/antrea/pkg/apis/networking/v1beta1"
Expand Down Expand Up @@ -146,6 +147,13 @@ func run(o *Options) error {

go networkPolicyController.Run(stopCh)

if o.config.EnablePrometheusMetrics {
go metrics.StartListener(
o.config.EnablePrometheusGoMetrics,
o.config.EnablePrometheusProcessMetrics,
o.config.OVSBridge, ifaceStore, ofClient)
}

agentMonitor := monitor.NewAgentMonitor(
crdClient,
o.config.OVSBridge,
Expand Down
9 changes: 9 additions & 0 deletions cmd/antrea-agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,13 @@ type AgentConfig struct {
// Underlying network must be capable of supporting Pod traffic across IP subnet.
// Hybrid: noEncap if worker Nodes on same subnet, otherwise encap.
TrafficEncapMode string `yaml:"trafficEncapMode,omitempty"`
// Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener
// Defaults to false.
EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"`
// Enable golang metrics exposure via Prometheus
// Defaults to false.
EnablePrometheusGoMetrics bool `yaml:"enablePrometheusGoMetrics,omitempty"`
// Enable process metrics exposure via Prometheus
// Defaults to false.
EnablePrometheusProcessMetrics bool `yaml:"enablePrometheusProcessMetrics,omitempty"`
}
9 changes: 9 additions & 0 deletions cmd/antrea-controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,13 @@ type ControllerConfig struct {
// clientConnection specifies the kubeconfig file and client connection settings for the agent
// to communicate with the apiserver.
ClientConnection componentbaseconfig.ClientConnectionConfiguration `yaml:"clientConnection"`
// Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener
// Defaults to false.
EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"`
// Enable golang metrics exposure via Prometheus
// Defaults to false.
EnablePrometheusGoMetrics bool `yaml:"enablePrometheusGoMetrics,omitempty"`
// Enable process metrics exposure via Prometheus
// Defaults to false.
EnablePrometheusProcessMetrics bool `yaml:"enablePrometheusProcessMetrics,omitempty"`
}
51 changes: 48 additions & 3 deletions cmd/antrea-controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ package main
import (
"fmt"
"net"
"net/http"
"os"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
genericopenapi "k8s.io/apiserver/pkg/endpoints/openapi"
genericapiserver "k8s.io/apiserver/pkg/server"
genericoptions "k8s.io/apiserver/pkg/server/options"
Expand Down Expand Up @@ -71,7 +75,8 @@ func run(o *Options) error {
apiServerConfig, err := createAPIServerConfig(o.config.ClientConnection.Kubeconfig,
addressGroupStore,
appliedToGroupStore,
networkPolicyStore)
networkPolicyStore,
o.config.EnablePrometheusMetrics)
if err != nil {
return fmt.Errorf("error creating API server config: %v", err)
}
Expand All @@ -94,22 +99,62 @@ func run(o *Options) error {

go apiServer.GenericAPIServer.PrepareRun().Run(stopCh)

if o.config.EnablePrometheusMetrics {
go createPrometheusMetricsListener(
o.config.EnablePrometheusGoMetrics,
o.config.EnablePrometheusGoMetrics)
}

<-stopCh
klog.Info("Stopping Antrea controller")
return nil
}

// Initialize Prometheus listener and metrics collection.
func createPrometheusMetricsListener(
enablePrometheusGoMetrics bool,
enablePrometheusProcessMetrics bool) {
hostname, err := os.Hostname()
if err != nil {
klog.Errorf("Failed to retrieve agent node name, %v", err)
}

klog.Info("Initializing prometheus")
gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "antrea_controller_host",
Help: "Antrea controller hostname (as a label), typically used in grouping/aggregating stats; " +
"the label defaults to the hostname of the host but can be overridden by configuration. " +
"The value of the gauge is always set to 1.",
ConstLabels: prometheus.Labels{"host": hostname},
})
gaugeHost.Set(1)
prometheus.MustRegister(gaugeHost)
http.Handle("/metrics", promhttp.Handler())

if !enablePrometheusGoMetrics {
klog.Info("Golang metrics are disabled")
prometheus.Unregister(prometheus.NewGoCollector())
}
if !enablePrometheusProcessMetrics {
klog.Info("Process metrics are disabled")
prometheus.Unregister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}))
}
}

func createAPIServerConfig(kubeconfig string,
addressGroupStore storage.Interface,
appliedToGroupStore storage.Interface,
networkPolicyStore storage.Interface) (*apiserver.Config, error) {
networkPolicyStore storage.Interface,
enablePrometheusMetrics bool) (*apiserver.Config, error) {
// TODO:
// 1. Support user-provided certificate.
// 2. Support configurable https port.
secureServing := genericoptions.NewSecureServingOptions().WithLoopback()
authentication := genericoptions.NewDelegatingAuthenticationOptions()
authorization := genericoptions.NewDelegatingAuthorizationOptions()

if enablePrometheusMetrics {
authorization.WithAlwaysAllowPaths("/metrics")
}
// Set the PairName but leave certificate directory blank to generate in-memory by default
secureServing.ServerCert.CertDirectory = ""
secureServing.ServerCert.PairName = "antrea-apiserver"
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ require (
github.com/imdario/mergo v0.3.7 // indirect
github.com/j-keck/arping v1.0.0
github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829
github.com/satori/go.uuid v1.2.0
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.3
Expand Down
Loading

0 comments on commit e15a3e6

Please sign in to comment.