Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STOR-2040: CLI command to display bound pvc filesystem usage percentage #1854

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
385 changes: 385 additions & 0 deletions pkg/cli/admin/top/persistentvolumeclaims.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,385 @@
package top
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you'll be the maintainer of this command, there should be a separate OWNERS file like this https://github.com/openshift/oc/blob/master/pkg/cli/admin/inspectalerts/OWNERS


import (
"bytes"
"context"
"crypto/tls"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"

"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errorsutil "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/cli-runtime/pkg/genericiooptions"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/transport"
"k8s.io/klog/v2"
kcmdutil "k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/util/templates"

routev1 "github.com/openshift/api/route/v1"
routev1client "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1"
)

const (
localhostRecoveryTokenSecret = "localhost-recovery-client-token"
kubeApiserverNamespace = "openshift-kube-apiserver"
)

var (
topPersistentVolumeClaimsLong = templates.LongDesc(`
Experimental: Show usage statistics for bound persistentvolumeclaims.

This command analyzes all the bound persistentvolumeclaims managed by the platform and presents current usage statistics.
`)

topPersistentVolumeClaimsExample = templates.Examples(`
# Show usage statistics for all the bound persistentvolumeclaims across the cluster
oc adm top persistentvolumeclaims -A

# Show usage statistics for all the bound persistentvolumeclaims in a specific namespace
oc adm top persistentvolumeclaims -n default

# Show usage statistics for specific bound persistentvolumeclaims
oc adm top persistentvolumeclaims database-pvc app-pvc -n default

`)
)

// RouteGetter is a function that gets a Route.
type RouteGetter func(ctx context.Context, namespace string, name string, opts metav1.GetOptions) (*routev1.Route, error)

type options struct {
genericiooptions.IOStreams
getRoute RouteGetter
Namespace string
InsecureTLS bool
allNamespaces bool
ClientConfig *rest.Config
ClientSet kubernetes.Interface
BearerToken string
}

func newOptions(streams genericiooptions.IOStreams) *options {
return &options{
IOStreams: streams,
}
}

func NewCmdTopPersistentVolumeClaims(f kcmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
o := newOptions(streams)
cmd := &cobra.Command{
Use: "persistentvolumeclaims",
Aliases: []string{"persistentvolumeclaim", "pvc"},
Short: "Experimental: Show usage statistics for bound persistentvolumeclaims",
Long: topPersistentVolumeClaimsLong,
Example: topPersistentVolumeClaimsExample,
Run: func(cmd *cobra.Command, args []string) {
kcmdutil.CheckErr(o.Complete(f, cmd, args))
kcmdutil.CheckErr(o.Run(cmd.Context(), args))
},
}

cmd.Flags().BoolVarP(&o.allNamespaces, "all-namespaces", "A", o.allNamespaces, "If present, list the pvc usage across all namespaces. Namespace in current context is ignored even if specified with --namespace")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't -A (aka --all-namespaces) coming as built-in just like --namespace and we don't have to define in here additionally?.

cmd.Flags().BoolVar(&o.InsecureTLS, "insecure-skip-tls-verify", false, "If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure")
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
cmd.Flags().MarkHidden("insecure-skip-tls-verify")
return cmd
}

func (o *options) Complete(f kcmdutil.Factory, cmd *cobra.Command, args []string) error {
var err error
o.Namespace, _, err = f.ToRawKubeConfigLoader().Namespace()
if err != nil {
return err
}
cfg, err := f.ToRESTConfig()
if err != nil {
return err
}

o.ClientConfig = cfg

routeClient, err := routev1client.NewForConfig(cfg)
if err != nil {
return err
}

o.ClientSet, err = kubernetes.NewForConfig(o.ClientConfig)
if err != nil {
return err
}
o.getRoute = func(ctx context.Context, namespace string, name string, opts metav1.GetOptions) (*routev1.Route, error) {
return routeClient.Routes(namespace).Get(ctx, name, opts)
}

if o.allNamespaces {
o.Namespace = metav1.NamespaceAll
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
} else {
namespace := cmd.Flag("namespace").Value.String()
if len(namespace) != 0 {
o.Namespace = namespace
}
}

if o.allNamespaces && len(args) != 0 {
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
return fmt.Errorf("a persistentvolumeclaim resource cannot be retrieved by name across all namespaces.")
}

return nil
}

type persistentVolumeClaimInfo struct {
Namespace string
Name string
UsagePercentage string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason this field is exported, although persistentVolumeClaimInfo is not exported?

}

func (v persistentVolumeClaimInfo) PrintLine(out io.Writer) {
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
printValue(out, v.Namespace)
printValue(out, v.Name)
printValue(out, v.UsagePercentage)
}

func (o *options) Run(ctx context.Context, args []string) error {
o.BearerToken = o.ClientConfig.BearerToken
if len(o.ClientConfig.BearerToken) == 0 {
klog.V(4).Info(fmt.Sprintf(`no token is currently in use for this session, attempting to retrieve token from secret "%s" in namespace "%s"`, localhostRecoveryTokenSecret, kubeApiserverNamespace))
secret, err := o.ClientSet.CoreV1().Secrets(kubeApiserverNamespace).Get(context.TODO(), localhostRecoveryTokenSecret, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
klog.V(4).Info(fmt.Errorf("error retrieving secret: %s", err.Error()))
return fmt.Errorf("no token is currently in use for this session")
}
return fmt.Errorf("%s", err.Error())
}
localhostRecoveryToken, exist := secret.Data["token"]
if !exist {
return fmt.Errorf(`"token" key not found in secret "%s" in namespace "%s"`, localhostRecoveryTokenSecret, kubeApiserverNamespace)
}
o.BearerToken = string(localhostRecoveryToken)
}
persistentVolumeClaimsBytes, err := GetPersistentVolumeClaims(ctx, o.getRoute, o.BearerToken, o.Namespace, o.InsecureTLS, args)
if err != nil {
return err
}
promOutput := &PromOutput{}
err = json.Unmarshal([]byte(persistentVolumeClaimsBytes), &promOutput)
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}

if len(promOutput.Data.Result) == 0 {
if o.Namespace == "" {
return fmt.Errorf("no persistentvolumeclaims found.")
}
if len(args) == 0 {
return fmt.Errorf("no persistentvolumeclaims found in %s namespace.", o.Namespace)
}
return fmt.Errorf("persistentvolumeclaim %q not found in %s namespace.", args[0], o.Namespace)

}

// if more pvc are requested as args but one of them does not not exist
if len(args) != 0 && len(promOutput.Data.Result) != len(args) {
resultingPvc := make(map[string]bool)
for _, promOutputDataResult := range promOutput.Data.Result {
pvcName := promOutputDataResult.Metric["persistentvolumeclaim"]
resultingPvc[pvcName] = true
}
for _, arg := range args {
if _, ok := resultingPvc[arg]; !ok {
return fmt.Errorf("persistentvolumeclaim %q not found in %s namespace.", arg, o.Namespace)
}
}

}
headers := []string{"NAMESPACE", "NAME", "USAGE(%)"}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have some additional fields? My suggestion would be NAMESPACE, NAME, VOLUME, CAPACITY, USED, USAGE(%).

VOLUME: name of the PV attached to the PVC so the user can easily see which volume it corresponds to.
CAPACITY: total capacity of the volume in multiple of bytes (same as oc get pv, for example 10Gi or 2Ti).
USED: total used space of the volume in multiple of bytes (same format as capacity bytes).

This would be useful to mention in the enhancement that @ardaguclu requested, in case anyone has other suggestions (or objections).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For VOLUME, CAPACITY, USED it would require code modification using a different approach, maybe we can do that in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about this one. The default outuput format should not change once this gets GA'd. It would efficiently become an API (a script parsing the output should not get broken by the update), so adding new columns at least to the default output might not be as simple.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, so the option we have in the future to implement these other column fields without changing the default output is to use -o wide.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This plan sounds fine to me, thanks for considering it.

pvcInfos := []persistentVolumeClaimInfo{}
infos := []Info{}
for _, promOutputDataResult := range promOutput.Data.Result {
namespaceName := promOutputDataResult.Metric["namespace"]
pvcName := promOutputDataResult.Metric["persistentvolumeclaim"]
usagePercentage := promOutputDataResult.Value[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we know promOutputDataResult.Value's length is greater than 1 and we don't panic?

valueFloatLong, _ := strconv.ParseFloat(usagePercentage.(string), 64)
valueFloat := fmt.Sprintf("%.2f", valueFloatLong)
if len(pvcInfos) > 0 {
if !(namespaceName == pvcInfos[len(pvcInfos)-1].Namespace && pvcName == pvcInfos[len(pvcInfos)-1].Name) {
pvcInfos = append(pvcInfos, persistentVolumeClaimInfo{Namespace: namespaceName, Name: pvcName, UsagePercentage: valueFloat})
infos = append(infos, persistentVolumeClaimInfo{Namespace: namespaceName, Name: pvcName, UsagePercentage: valueFloat})
}
} else {
pvcInfos = append(pvcInfos, persistentVolumeClaimInfo{Namespace: namespaceName, Name: pvcName, UsagePercentage: valueFloat})
infos = append(infos, persistentVolumeClaimInfo{Namespace: namespaceName, Name: pvcName, UsagePercentage: valueFloat})
}
}

Print(o.Out, headers, infos)
return nil
}

func constructPrometheusQuery(namespace string, args []string) string {
query := ""
claimNames := ".*"
if namespace != "" {
if len(args) > 0 {
claimNames = strings.Join(args, "|")
}
query = fmt.Sprintf(`100*kubelet_volume_stats_used_bytes{persistentvolumeclaim=~"%s", namespace="%s"}/kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"%s", namespace="%s"}`, claimNames, namespace, claimNames, namespace)
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
} else {
query = `100*kubelet_volume_stats_used_bytes{persistentvolumeclaim=~".*"}/kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~".*"}`
}
return query
}

func GetPersistentVolumeClaims(ctx context.Context, getRoute RouteGetter, bearerToken string, namespace string, insecureTLS bool, args []string) ([]byte, error) {
uri := &url.URL{
Scheme: "https",
Path: "/api/v1/query",
}
query := constructPrometheusQuery(namespace, args)
urlParams := url.Values{}
urlParams.Set("query", query)
uri.RawQuery = urlParams.Encode()

persistentVolumeClaimsBytes, err := getWithBearer(ctx, getRoute, "openshift-monitoring", "prometheus-k8s", uri, bearerToken, insecureTLS)
Comment on lines +248 to +252
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get this error when using system:admin KUBECONFIG, but not for the other oc adm top subcommands:

$ ./oc adm top pvc
error: failed to get persistentvolumeclaims from Prometheus: no token is currently in use for this session

Is it possible to avoid this error by using the metrics API interface instead of constructing a raw request w/ bearer token?

func getMetricsFromMetricsAPI(metricsClient metricsclientset.Interface, namespace, resourceName string, allNamespaces bool, labelSelector labels.Selector, fieldSelector fields.Selector) (*metricsapi.PodMetricsList, error) {
var err error
ns := metav1.NamespaceAll
if !allNamespaces {
ns = namespace
}
versionedMetrics := &metricsv1beta1api.PodMetricsList{}
if resourceName != "" {
m, err := metricsClient.MetricsV1beta1().PodMetricses(ns).Get(context.TODO(), resourceName, metav1.GetOptions{})
if err != nil {
return nil, err
}
versionedMetrics.Items = []metricsv1beta1api.PodMetrics{*m}
} else {
versionedMetrics, err = metricsClient.MetricsV1beta1().PodMetricses(ns).List(context.TODO(), metav1.ListOptions{LabelSelector: labelSelector.String(), FieldSelector: fieldSelector.String()})
if err != nil {
return nil, err
}
}
metrics := &metricsapi.PodMetricsList{}
err = metricsv1beta1api.Convert_v1beta1_PodMetricsList_To_metrics_PodMetricsList(versionedMetrics, metrics, nil)
if err != nil {
return nil, err
}
return metrics, nil
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIU the metricsclientset API allows consumers to access only resource metrics (CPU and memory) for pods and nodes.

So if we strictly want system:admin users authenticated using the KUBECONFIG file to use that command we should evaluate:

  • getting a token from a different place, maybe a secret containing it?! (I will try to check if any exists)
  • using a different workflow to obtain this information rather than Prometheus API.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found out that the token from the secret localhost-recovery-client-token in openshift-kube-apiserver namespace is a valid one to use:

oc get secret -n openshift-kube-apiserver localhost-recovery-client-token -o json | jq '.data.token' -r | base64 -d

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll make this modification before next week so we can try to merge this PR, as @tsmetana agreed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

o.BearerToken = o.RESTConfig.BearerToken
if len(o.RESTConfig.BearerToken) == 0 {
klog.V(4).Info("no token is currently in use for this session")
klog.V(5).Info("attempting to retrieve token from secret \"localhost-recovery-client-token\" in namespace \"openshift-kube-apiserver\"")
secret, err := o.ClientSet.CoreV1().Secrets("openshift-kube-apiserver").Get(context.TODO(), "localhost-recovery-client-token", metav1.GetOptions{})
if err != nil {
klog.V(4).Info(fmt.Errorf("error retrieving secret: %s", err.Error()))
return fmt.Errorf("no token is currently in use for this session")
}
localhostRecoveryToken, exist := secret.Data["token"]
if !exist {
klog.V(4).Info(fmt.Errorf("\"token\" key not found in secret \"localhost-recovery-client-token\" in namespace \"openshift-kube-apiserver\""))
return fmt.Errorf("no token is currently in use for this session")
}
o.BearerToken = string(localhostRecoveryToken)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

if err != nil {
return persistentVolumeClaimsBytes, fmt.Errorf("failed to get persistentvolumeclaims from Prometheus: %w", err)
}

return persistentVolumeClaimsBytes, nil
}

// getWithBearer gets a Route by namespace/name, constructs a URI using
// status.ingress[].host and the path argument, and performs GETs on that
// URI using Bearer authentication with the token argument.
func getWithBearer(ctx context.Context, getRoute RouteGetter, namespace, name string, baseURI *url.URL, bearerToken string, InsecureTLS bool) ([]byte, error) {
if len(bearerToken) == 0 {
return nil, fmt.Errorf("no token is currently in use for this session")
}

route, err := getRoute(ctx, namespace, name, metav1.GetOptions{})
if err != nil {
return nil, err
}

httpTransport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: InsecureTLS},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd strongly against exposing insecureTLS, can we remove this flag entirely?.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you that we should encourage secure TLS, but there might be cases in which this flag could be helpful (e.g., users who create development environments daily for testing).

So, what do you think about just hiding that flag? So users will be prone to use secure TLS but, if for any reason they will come back to us asking for a workaround we can tell them to use this hidden flag.

}

withDebugWrappers, err := transport.HTTPWrappersForConfig(
&transport.Config{
UserAgent: rest.DefaultKubernetesUserAgent() + "(top persistentvolumeclaims)",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason of modifying the UserAgent?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

took inspiration from inspect-alert.

BearerToken: bearerToken,
},
httpTransport,
)
if err != nil {
return nil, err
}

client := &http.Client{Transport: withDebugWrappers}
errs := make([]error, 0, len(route.Status.Ingress))
for _, ingress := range route.Status.Ingress {
baseURI.Host = ingress.Host
content, err := getMetrics(*baseURI, client)
if err == nil {
return content, nil
} else {
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
errs = append(errs, fmt.Errorf("%s, %w", ingress.Host, err))
}
}

if len(errs) == 1 {
return nil, fmt.Errorf("unable to get %s from URI in the %s/%s Route: %s", baseURI.Path, namespace, name, errorsutil.NewAggregate(errs))
}
return nil, fmt.Errorf("unable to get %s from any of %d URIs in the %s/%s Route: %s", baseURI.Path, len(errs), namespace, name, errorsutil.NewAggregate(errs))

}

func getMetrics(uri url.URL, client *http.Client) ([]byte, error) {
req, err := http.NewRequest("GET", uri.String(), nil)
if err != nil {
return nil, err
}

resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

glogBody("Response Body", body)

if resp.StatusCode != http.StatusOK {
return body, fmt.Errorf("GET status code=%d", resp.StatusCode)
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
}

return body, nil
}

type PromOutput struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any library that exposes these concrete types we can directly consume instead of copying and pasting?.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, but I was not able to find them, so I created them handle, I'll check again.

Status string `json:"status"`
Data PromOutputData `json:"data"`
}

type PromOutputData struct {
ResultType string `json:"resultType"`
Result []PromOutputDataResult `json:"result"`
}

type PromOutputDataResult struct {
Metric map[string]string `json:"metric"`
Value FloatStringPair `json:"value"`
}

type FloatStringPair [2]interface{}

// glogBody and truncateBody taken from client-go Request
// https://github.com/openshift/oc/blob/4be3c8609f101a8c5867abc47bda33caae629113/vendor/k8s.io/client-go/rest/request.go#L1183-L1215
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved

// truncateBody decides if the body should be truncated, based on the glog Verbosity.
func truncateBody(body string) string {
max := 0
switch {
case bool(klog.V(10).Enabled()):
return body
case bool(klog.V(9).Enabled()):
max = 10240
case bool(klog.V(8).Enabled()):
max = 1024
}

if len(body) <= max {
return body
}

return body[:max] + fmt.Sprintf(" [truncated %d chars]", len(body)-max)
}

// glogBody logs a body output that could be either JSON or protobuf. It explicitly guards against
// allocating a new string for the body output unless necessary. Uses a simple heuristic to determine
// whether the body is printable.
func glogBody(prefix string, body []byte) {
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
if klogV := klog.V(8); klogV.Enabled() {
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
if bytes.IndexFunc(body, func(r rune) bool {
return r < 0x0a
gmeghnag marked this conversation as resolved.
Show resolved Hide resolved
}) != -1 {
klogV.Infof("%s:\n%s", prefix, truncateBody(hex.Dump(body)))
} else {
klogV.Infof("%s: %s", prefix, truncateBody(string(body)))
}
}
}
1 change: 1 addition & 0 deletions pkg/cli/admin/top/top.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func NewCommandTop(f kcmdutil.Factory, streams genericiooptions.IOStreams) *cobr

cmds.AddCommand(NewCmdTopImages(f, streams))
cmds.AddCommand(NewCmdTopImageStreams(f, streams))
cmds.AddCommand(NewCmdTopPersistentVolumeClaims(f, streams))
cmdTopNode.Long = templates.LongDesc(cmdTopNode.Long)
cmdTopNode.Example = templates.Examples(cmdTopNode.Example)
cmdTopPod.Long = templates.LongDesc(cmdTopPod.Long)
Expand Down