Skip to content

Commit

Permalink
Metrics for Hetzner API calls
Browse files Browse the repository at this point in the history
Provide metrics for Hetzner API calls; helps identifying slowness, throttling causes, and errors bursts.

Signed-off-by: Maksim Paskal <[email protected]>
  • Loading branch information
maksim-paskal authored and LKaemmerling committed Aug 10, 2022
1 parent cabe5ba commit 7d1cd6a
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.18

require (
github.com/hetznercloud/hcloud-go v1.35.0
github.com/prometheus/client_golang v1.12.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.0
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d
Expand Down Expand Up @@ -55,7 +56,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.12.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
Expand Down
14 changes: 14 additions & 0 deletions hcloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"strings"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
"github.com/hetznercloud/hcloud-go/hcloud/metadata"
cloudprovider "k8s.io/cloud-provider"
Expand All @@ -46,6 +47,8 @@ const (
hcloudLoadBalancersDisablePrivateIngress = "HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS"
hcloudLoadBalancersUsePrivateIP = "HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP"
hcloudLoadBalancersDisableIPv6 = "HCLOUD_LOAD_BALANCERS_DISABLE_IPV6"
hcloudMetricsEnabledENVVar = "HCLOUD_METRICS_ENABLED"
hcloudMetricsAddress = ":8233"
nodeNameENVVar = "NODE_NAME"
providerName = "hcloud"
providerVersion = "v1.9.1"
Expand All @@ -62,6 +65,7 @@ type cloud struct {

func newCloud(config io.Reader) (cloudprovider.Interface, error) {
const op = "hcloud/newCloud"
metrics.OperationCalled.WithLabelValues(op).Inc()

token := os.Getenv(hcloudTokenENVVar)
if token == "" {
Expand All @@ -79,6 +83,14 @@ func newCloud(config io.Reader) (cloudprovider.Interface, error) {
hcloud.WithToken(token),
hcloud.WithApplication("hcloud-cloud-controller", providerVersion),
}

// start metrics server if enabled (enabled by default)
if os.Getenv(hcloudMetricsEnabledENVVar) != "false" {
go metrics.Serve(hcloudMetricsAddress)

opts = append(opts, hcloud.WithInstrumentation(metrics.GetRegistry()))
}

if os.Getenv(hcloudDebugENVVar) == "true" {
opts = append(opts, hcloud.WithDebugWriter(os.Stderr))
}
Expand Down Expand Up @@ -244,6 +256,8 @@ func loadBalancerDefaultsFromEnv() (hcops.LoadBalancerDefaults, bool, bool, erro
// network.
func serverIsAttachedToNetwork(metadataClient *metadata.Client, networkID int) (bool, error) {
const op = "serverIsAttachedToNetwork"
metrics.OperationCalled.WithLabelValues(op).Inc()

serverPrivateNetworks, err := metadataClient.PrivateNetworks()
if err != nil {
return false, fmt.Errorf("%s: %s", op, err)
Expand Down
15 changes: 13 additions & 2 deletions hcloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ func TestNewCloud(t *testing.T) {
"HCLOUD_ENDPOINT", env.Server.URL,
"HCLOUD_TOKEN", "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq",
"NODE_NAME", "test",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()
env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -85,7 +86,10 @@ func TestNewCloud(t *testing.T) {
}

func TestNewCloudWrongTokenSize(t *testing.T) {
resetEnv := Setenv(t, "HCLOUD_TOKEN", "0123456789abcdef")
resetEnv := Setenv(t,
"HCLOUD_TOKEN", "0123456789abcdef",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()

var config bytes.Buffer
Expand All @@ -100,6 +104,7 @@ func TestNewCloudConnectionNotPossible(t *testing.T) {
"HCLOUD_ENDPOINT", "http://127.0.0.1:4711/v1",
"HCLOUD_TOKEN", "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq",
"NODE_NAME", "test",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()

Expand All @@ -116,6 +121,7 @@ func TestNewCloudInvalidToken(t *testing.T) {
"HCLOUD_ENDPOINT", env.Server.URL,
"HCLOUD_TOKEN", "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq",
"NODE_NAME", "test",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()
env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -143,6 +149,7 @@ func TestCloud(t *testing.T) {
"HCLOUD_ENDPOINT", env.Server.URL,
"HCLOUD_TOKEN", "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq",
"NODE_NAME", "test",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()
env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -233,7 +240,11 @@ func TestCloud(t *testing.T) {
})

t.Run("RoutesWithNetworks", func(t *testing.T) {
resetEnv := Setenv(t, "HCLOUD_NETWORK", "1", "HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK", "true")
resetEnv := Setenv(t,
"HCLOUD_NETWORK", "1",
"HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK", "true",
"HCLOUD_METRICS_ENABLED", "false",
)
defer resetEnv()

c, err := newCloud(&bytes.Buffer{})
Expand Down
9 changes: 9 additions & 0 deletions hcloud/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"
"strconv"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -47,6 +48,7 @@ func newInstances(client *hcloud.Client, addressFamily addressFamily) *instances

func (i *instances) NodeAddressesByProviderID(ctx context.Context, providerID string) ([]v1.NodeAddress, error) {
const op = "hcloud/instances.NodeAddressesByProviderID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := providerIDToServerID(providerID)
if err != nil {
Expand All @@ -62,6 +64,7 @@ func (i *instances) NodeAddressesByProviderID(ctx context.Context, providerID st

func (i *instances) NodeAddresses(ctx context.Context, nodeName types.NodeName) ([]v1.NodeAddress, error) {
const op = "hcloud/instances.NodeAddresses"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, err := getServerByName(ctx, i.client, string(nodeName))
if err != nil {
Expand All @@ -72,6 +75,7 @@ func (i *instances) NodeAddresses(ctx context.Context, nodeName types.NodeName)

func (i *instances) ExternalID(ctx context.Context, nodeName types.NodeName) (string, error) {
const op = "hcloud/instances.ExternalID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := i.InstanceID(ctx, nodeName)
if err != nil {
Expand All @@ -82,6 +86,7 @@ func (i *instances) ExternalID(ctx context.Context, nodeName types.NodeName) (st

func (i *instances) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
const op = "hcloud/instances.InstanceID"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, err := getServerByName(ctx, i.client, string(nodeName))
if err != nil {
Expand All @@ -92,6 +97,7 @@ func (i *instances) InstanceID(ctx context.Context, nodeName types.NodeName) (st

func (i *instances) InstanceType(ctx context.Context, nodeName types.NodeName) (string, error) {
const op = "hcloud/instances.InstanceType"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, err := getServerByName(ctx, i.client, string(nodeName))
if err != nil {
Expand All @@ -102,6 +108,7 @@ func (i *instances) InstanceType(ctx context.Context, nodeName types.NodeName) (

func (i *instances) InstanceTypeByProviderID(ctx context.Context, providerID string) (string, error) {
const op = "hcloud/instances.InstanceTypeByProviderID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := providerIDToServerID(providerID)
if err != nil {
Expand All @@ -125,6 +132,7 @@ func (i *instances) CurrentNodeName(ctx context.Context, hostname string) (types

func (i instances) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
const op = "hcloud/instances.InstanceExistsByProviderID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := providerIDToServerID(providerID)
if err != nil {
Expand All @@ -140,6 +148,7 @@ func (i instances) InstanceExistsByProviderID(ctx context.Context, providerID st

func (i instances) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
const op = "hcloud/instances.InstanceShutdownByProviderID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := providerIDToServerID(providerID)
if err != nil {
Expand Down
7 changes: 7 additions & 0 deletions hcloud/load_balancers.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
v1 "k8s.io/api/core/v1"
cloudprovider "k8s.io/cloud-provider"
Expand Down Expand Up @@ -46,6 +47,7 @@ func (l *loadBalancers) GetLoadBalancer(
ctx context.Context, _ string, service *v1.Service,
) (status *v1.LoadBalancerStatus, exists bool, err error) {
const op = "hcloud/loadBalancers.GetLoadBalancer"
metrics.OperationCalled.WithLabelValues(op).Inc()

lb, err := l.lbOps.GetByK8SServiceUID(ctx, service)
if err != nil {
Expand Down Expand Up @@ -91,6 +93,8 @@ func (l *loadBalancers) EnsureLoadBalancer(
ctx context.Context, clusterName string, svc *v1.Service, nodes []*v1.Node,
) (*v1.LoadBalancerStatus, error) {
const op = "hcloud/loadBalancers.EnsureLoadBalancer"
metrics.OperationCalled.WithLabelValues(op).Inc()

var (
reload bool
lb *hcloud.LoadBalancer
Expand Down Expand Up @@ -228,6 +232,8 @@ func (l *loadBalancers) UpdateLoadBalancer(
ctx context.Context, clusterName string, svc *v1.Service, nodes []*v1.Node,
) error {
const op = "hcloud/loadBalancers.UpdateLoadBalancer"
metrics.OperationCalled.WithLabelValues(op).Inc()

var (
lb *hcloud.LoadBalancer
err error
Expand Down Expand Up @@ -267,6 +273,7 @@ func (l *loadBalancers) UpdateLoadBalancer(

func (l *loadBalancers) EnsureLoadBalancerDeleted(ctx context.Context, clusterName string, service *v1.Service) error {
const op = "hcloud/loadBalancers.EnsureLoadBalancerDeleted"
metrics.OperationCalled.WithLabelValues(op).Inc()

loadBalancer, err := l.lbOps.GetByK8SServiceUID(ctx, service)
if errors.Is(err, hcops.ErrNotFound) {
Expand Down
10 changes: 10 additions & 0 deletions hcloud/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
"k8s.io/apimachinery/pkg/types"
cloudprovider "k8s.io/cloud-provider"
Expand All @@ -22,6 +23,7 @@ type routes struct {

func newRoutes(client *hcloud.Client, networkID int) (*routes, error) {
const op = "hcloud/newRoutes"
metrics.OperationCalled.WithLabelValues(op).Inc()

networkObj, _, err := client.Network.GetByID(context.Background(), networkID)
if err != nil {
Expand All @@ -40,6 +42,7 @@ func newRoutes(client *hcloud.Client, networkID int) (*routes, error) {

func (r *routes) reloadNetwork(ctx context.Context) error {
const op = "hcloud/reloadNetwork"
metrics.OperationCalled.WithLabelValues(op).Inc()

networkObj, _, err := r.client.Network.GetByID(ctx, r.network.ID)
if err != nil {
Expand All @@ -55,6 +58,7 @@ func (r *routes) reloadNetwork(ctx context.Context) error {
// ListRoutes lists all managed routes that belong to the specified clusterName
func (r *routes) ListRoutes(ctx context.Context, clusterName string) ([]*cloudprovider.Route, error) {
const op = "hcloud/ListRoutes"
metrics.OperationCalled.WithLabelValues(op).Inc()

if err := r.reloadNetwork(ctx); err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
Expand Down Expand Up @@ -88,6 +92,7 @@ func (r *routes) ListRoutes(ctx context.Context, clusterName string) ([]*cloudpr
// to create a more user-meaningful name.
func (r *routes) CreateRoute(ctx context.Context, clusterName string, nameHint string, route *cloudprovider.Route) error {
const op = "hcloud/CreateRoute"
metrics.OperationCalled.WithLabelValues(op).Inc()

srv, err := r.serverCache.ByName(string(route.TargetNode))
if err != nil {
Expand Down Expand Up @@ -150,6 +155,7 @@ func (r *routes) CreateRoute(ctx context.Context, clusterName string, nameHint s
// Route should be as returned by ListRoutes
func (r *routes) DeleteRoute(ctx context.Context, clusterName string, route *cloudprovider.Route) error {
const op = "hcloud/DeleteRoute"
metrics.OperationCalled.WithLabelValues(op).Inc()

srv, err := r.serverCache.ByName(string(route.TargetNode))
if err != nil {
Expand All @@ -174,6 +180,8 @@ func (r *routes) DeleteRoute(ctx context.Context, clusterName string, route *clo

func (r *routes) deleteRouteFromHcloud(ctx context.Context, cidr *net.IPNet, ip net.IP) error {
const op = "hcloud/deleteRouteFromHcloud"
metrics.OperationCalled.WithLabelValues(op).Inc()

opts := hcloud.NetworkDeleteRouteOpts{
Route: hcloud.NetworkRoute{
Destination: cidr,
Expand Down Expand Up @@ -201,6 +209,7 @@ func (r *routes) deleteRouteFromHcloud(ctx context.Context, cidr *net.IPNet, ip

func (r *routes) hcloudRouteToRoute(route hcloud.NetworkRoute) (*cloudprovider.Route, error) {
const op = "hcloud/hcloudRouteToRoute"
metrics.OperationCalled.WithLabelValues(op).Inc()

srv, err := r.serverCache.ByPrivateIP(route.Gateway)
if err != nil {
Expand All @@ -217,6 +226,7 @@ func (r *routes) hcloudRouteToRoute(route hcloud.NetworkRoute) (*cloudprovider.R

func (r *routes) checkIfRouteAlreadyExists(ctx context.Context, route *cloudprovider.Route) (bool, error) {
const op = "hcloud/checkIfRouteAlreadyExists"
metrics.OperationCalled.WithLabelValues(op).Inc()

if err := r.reloadNetwork(ctx); err != nil {
return false, fmt.Errorf("%s: %w", op, err)
Expand Down
4 changes: 4 additions & 0 deletions hcloud/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ import (

"k8s.io/klog/v2"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
cloudprovider "k8s.io/cloud-provider"
)

func getServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) {
const op = "hcloud/getServerByName"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, _, err := c.Server.GetByName(ctx, name)
if err != nil {
Expand All @@ -44,6 +46,7 @@ func getServerByName(ctx context.Context, c *hcloud.Client, name string) (*hclou

func getServerByID(ctx context.Context, c *hcloud.Client, id int) (*hcloud.Server, error) {
const op = "hcloud/getServerByName"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, _, err := c.Server.GetByID(ctx, id)
if err != nil {
Expand All @@ -57,6 +60,7 @@ func getServerByID(ctx context.Context, c *hcloud.Client, id int) (*hcloud.Serve

func providerIDToServerID(providerID string) (int, error) {
const op = "hcloud/providerIDToServerID"
metrics.OperationCalled.WithLabelValues(op).Inc()

providerPrefix := providerName + "://"
if !strings.HasPrefix(providerID, providerPrefix) {
Expand Down
4 changes: 4 additions & 0 deletions hcloud/zones.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
"k8s.io/apimachinery/pkg/types"
cloudprovider "k8s.io/cloud-provider"
Expand All @@ -36,6 +37,7 @@ func newZones(client *hcloud.Client, nodeName string) *zones {

func (z zones) GetZone(ctx context.Context) (cloudprovider.Zone, error) {
const op = "hcloud/zones.GetZone"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, err := getServerByName(ctx, z.client, z.nodeName)
if err != nil {
Expand All @@ -46,6 +48,7 @@ func (z zones) GetZone(ctx context.Context) (cloudprovider.Zone, error) {

func (z zones) GetZoneByProviderID(ctx context.Context, providerID string) (cloudprovider.Zone, error) {
const op = "hcloud/zones.GetZoneByProviderID"
metrics.OperationCalled.WithLabelValues(op).Inc()

id, err := providerIDToServerID(providerID)
if err != nil {
Expand All @@ -62,6 +65,7 @@ func (z zones) GetZoneByProviderID(ctx context.Context, providerID string) (clou

func (z zones) GetZoneByNodeName(ctx context.Context, nodeName types.NodeName) (cloudprovider.Zone, error) {
const op = "hcloud/zones.GetZoneByNodeName"
metrics.OperationCalled.WithLabelValues(op).Inc()

server, err := getServerByName(ctx, z.client, string(nodeName))
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions internal/annotation/load_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package annotation
import (
"fmt"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/hcloud"
v1 "k8s.io/api/core/v1"
)
Expand Down Expand Up @@ -199,6 +200,7 @@ const (
// from lb.
func LBToService(svc *v1.Service, lb *hcloud.LoadBalancer) error {
const op = "annotation/LBToService"
metrics.OperationCalled.WithLabelValues(op).Inc()

sa := &serviceAnnotator{Svc: svc}

Expand Down
Loading

0 comments on commit 7d1cd6a

Please sign in to comment.