kubernetes · k8s-ci-robot · Jun 5, 2020 · May 20, 2020 · thockin · May 27, 2020
diff --git a/contributors/devel/sig-instrumentation/migration-to-structured-logging.md b/contributors/devel/sig-instrumentation/migration-to-structured-logging.md
@@ -0,0 +1,383 @@
+# Structured Logging migration instructions
+
+This document describes instructions for migration proposed by [Structured Logging KEP]. It describes new structured
+functions introduced in `klog` (Kubernetes logging library) and how log calls should be changed to utilize new features.
+This document was written for the initial migration of `kubernetes/kubernetes` repository proposed for Alpha stage, but
+should be applicable at later stages or for other projects using `klog` logging library.
+
+[Structured Logging KEP]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/1602-structured-logging
+
+## Goal of Alpha migration
+
+The first step is to introduce structure to the high percentage of logs generated in Kubernetes by changing only a
+small number of logs API calls. Based on criteria described in the [selecting most important logs] section, the selected
+22 log calls are estimated to impact 99.9% of log volume. The up to date list of these log calls is provided in the
+[Enhancement Issue].
+
+[Enhancement Issue]: https://github.com/kubernetes/enhancements/issues/1602
+[selecting most important logs]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/1602-structured-logging#selecting-most-important-logs
+
+## Structured logging in Kubernetes
+
+With this enhancement a set of new functions were added to `klog`. Structured logging functions the follow interface
+based on [logr], which has a  different design than other `klog` functions which are based on [glog]. It is recommended
+to familiarize yourself with [logr].
+
+[logr]: https://github.com/go-logr/logr
+[glog]: https://github.com/golang/glog
+
+Here are the prototypes of functions added to `klog` that will be utilized during migration:
+```go
+package klog
+
+// InfoS structured logs to the INFO log.
+// The msg argument used to add constant description to the log line.
+// The key/value pairs would be join by "=" ; a newline is always appended.
+//
+// Examples:
+// >> klog.InfoS("Pod status updated", "pod", klog.KObj(pod), "status", "ready")
+// output:
+// >> I1025 00:15:15.525108       1 controller_utils.go:116] "Pod status updated" pod="kube-system/kubedns" status="ready"
+func InfoS(msg string, keysAndValues ...interface{})
+
+// ErrorS structured logs to the ERROR, WARNING, and INFO logs.
+// the err argument used as "err" field of log line.
+// The msg argument used to add constant description to the log line.
+// The key/value pairs would be join by "=" ; a newline is always appended.
+//
+// Examples:
+// >> klog.ErrorS(err, "Failed to update pod status")
+// output:
+// >> E1025 00:15:15.525108       1 controller_utils.go:114] "Failed to update pod status" err="timeout"
+func ErrorS(err error, msg string, keysAndValues ...interface{})
+
+// KObj is used to create ObjectRef when logging information about Kubernetes objects
+// Examples:
+// >> klog.InfoS("Pod status updated", "pod", klog.KObj(pod), "status", "ready")
+// output:
+// >> I1025 00:15:15.525108       1 controller_utils.go:116] "Pod status updated" pod="kube-system/kubedns" status="ready"
+func KObj(obj KMetadata) ObjectRef
+
+// KRef is used to create ObjectRef when logging information about Kubernetes objects without access to metav1.Object
+// Examples:
+// >> klog.InfoS("Pod status updated", "pod", klog.KRef(podNamespace, podName), "status", "ready")
+// output:
+// >> I1025 00:15:15.525108       1 controller_utils.go:116] "Pod status updated" pod="kube-system/kubedns" status="ready"
+func KRef(namespace, name string) ObjectRef
+
+// ObjectRef represents a reference to a kubernetes object used for logging purpose
+// In text logs it is serialized into "{namespace}/{name}" or "{name}" if namespace is empty
+type ObjectRef struct {
+	Name      string `json:"name"`
+	Namespace string `json:"namespace,omitempty"`
+}
+
+// KMetadata is a subset of the kubernetes k8s.io/apimachinery/pkg/apis/meta/v1.Object interface
+// this interface may expand in the future, but will always be a subset of the
+// kubernetes k8s.io/apimachinery/pkg/apis/meta/v1.Object interface
+type KMetadata interface {
+	GetName() string
+	GetNamespace() string
+}
+```
+
+## Migration
+
+1. Change log functions to structured equivalent
+1. Remove string formatting from log message
+1. Name arguments
+1. Use `klog.KObj` and `klog.KRef` for Kubernetes object references
+1. Verify log output
+
+## Change log functions to structured equivalent
+
+Structured logging functions follow a different logging interface design than other functions in `klog`. They follow
+minimal design from [logr] thus there is no one-to-one mapping.
+
+Simplified mapping between functions:
+* `klog.Infof`, `klog.Info`, `klog.Infoln`, `klog.InfoDepth` -> `klog.InfoS`
+* `klog.V(N).Infof`, `klog.V(N).Info`, `klog.V(N).Infoln` -> `klog.V(N).InfoS`
+* `klog.Warning`, `klog.Warningf`, `klog.Warningln`, `klog.WarningDepth` -> `klog.InfoS`
+* `klog.Error`, `klog.Errorf`, `klog.Errorln`, `klog.ErrorDepth` -> `klog.ErrorS`
+* `klog.Fatal`, `klog.Fatalf`, `klog.Fatalln`, `klog.FatalDepth` -> `klog.ErrorS`
+
+### Removing Depth
+
+Functions with depth (`klog.InfoDepth`, `klog.WarningDepth`, `klog.ErrorDepth`, `klog.FatalDepth`) are used to indicate
+that the source of the log (added as metadata in log) is different than the invocation of logging library. This is
+usually used when implementing logging util functions. As logr interface doesn't support depth, those functions should
+return logging arguments instead of calling `klog` directly.
+
+For example
+```go
+func Handle(w http.ReponseWriter, r *http.Request) {
+    logHTTPRequest(r)
+    handle(w, r)
+}
+
+func logHTTPRequest(r *http.Request) {
+    klog.InfoDepth(1, "Received HTTP %s request", r.Method)
+}
+```
+should be replaced with
+```go
+func Handle(w http.ReponseWriter, r *http.Request) {
+    klog.Info("Received HTTP request", httpRequestLog(r)...)
+    handle(w, r)
+}
+
+func httpRequestLog(r *http.Request) []interface{} {
+    return []interface{}{
+        "verb", r.Method,
+    }
+}
+
+```
+
+### Using ErrorS
+
+With `klog` structured logging borrowing the interface from [logr] it also inherits it's differences in semantic of
+error function. Logs generated by `ErrorS` command may be enhanced with additional debug information
+(such as stack traces) or be additionally sent to special error recording tools. Errors should be used to indicate
+unexpected behaviours in code, like unexpected errors returned by subroutine function calls.
+
+Calling `ErrorS` with `nil` as error is semi-acceptable if there is error condition that deserves a stack trace at this
+origin point. For expected errors (`errors` that can happen during routine operations) please consider using
+`klog.InfoS` and pass error in `err` key instead.
+
+### Replacing Fatal calls
+
+Use of Fatal should be discouraged and it's not available in new functions. Instead of depending on the logger to exit
+the process, you should call `os.Exit()` yourself.
+
+## Remove string formatting from log message
+
+With structured logging, log messages are no longer formatted, leaving argument marshalling up to the logging client
+implementation. This allows messages to be a static description of event.
+
+All string formatting (`%d`, `%v`, `%w`, `%s`) should be removed and log message string simplified.
+Describing arguments in log messages is no longer needed and should be removed leaving only a description of what
+happened.
+
+Additionally we can improve messages to comply with good practices:
+* Start from a capital letter.
+* Do not end the message with a period.
+* Use active voice. Use complete sentences when there is an acting subject ("A could not do B") or omit the subject if
+  the subject would be the program itself ("Could not do B").
+* Use past tense ("Could not delete B" instead of "Cannot delete B")
+* When referring to an object, state what type of object it is. ("Deleted pod" instead of "Deleted")
+
+For example
+```go
+klog.Infof("delete pod %s with propagation policy %s", ...)
+```
+should be changed to
+```go
+klog.Infof("Deleted pod", ...)
+```
+
+Some logs are constructed solely from string formats. In those cases a message needs to be derived from the context of
+the log call.
+
+For example http access logs
+```go
+func LogHTTP(r *http.Request) {
+   klog.Infof("%s %s: (%v) %v%v%v [%s %s]", ...)
+}
+```
+should be changed to
+```go
+func LogHTTP(r *http.Request) {
+   klog.InfoS("Received HTTP request", ...)
+}
+```
+
+### Name arguments
+
+Even though new structured logging functions have very similar function prototype `func (string, ...interface{})` it
+has different meaning for variadic arguments. Instead of just passing arguments, now we are passing key value pairs of
+argument name and argument value. This means when migrating a log call we need to add an additional string before each
+argument, that will be used as it's name.
+
+For example
+```go
+func LogHTTP(r *http.Request) {
+   klog.Infof("Received HTTP request, path: %s, method: %s", r.Path, r.Method)
+}
+```
+should be changed to
+```go
+func LogHTTP(r *http.Request) {
+   klog.InfoS("Received HTTP request", "path", r.Path, "method", r.Method)
+}
+```
+
+Names of arguments should use [lowerCamelCase] and be alphanumeric. Arguments names in one log call should be unique.
+Names should be picked based on semantic meaning of value itself, not the context in which is used (log message should
+imply the context). For example names like `status` should be used over (`desiredStatus`, `oldStatus`, `badStatus`) thus
+allowing to query and join different log lines of the `status` field.
+
+Kubernetes objects should be referenced using only their kind, no matter their api group or version. Example argument
+names: `deployment`, `pod`, `node`, `replicaSet`. For objects of unknown type, is ok to log them under `object` key with
+addition of `apiVersion` and `kind` fields describing the k8s object type.
+
+In situations when we want to the log value of the same meaning twice (e.g. transition between state) it is ok to name
+an additional argument based on context, but leaving one most current/correct value with canonical name.
+
+Examples of keys (strongly suggested, will be extended when pattern emerge, no standard schema yet):
+* `err` - error when using `klog.InfoS`. Used for expected errors that are not `klog.ErrorS`.
+* `object` - reference to k8s objects of unknown type. Should be used with `kind` and `apiVersion`.
+* `kind` - kind of k8s object of unknown type.
+* `apiVersion` -  API version of k8s object of unknown type.
+
+Example:
+
+```go
+func ChangeStatus(newStatus, currentStatus string) {
+  err := changeStatus(newStatus)
+  if err != nil {
+    klog.ErrorS(err, "Failed changing status", "desiredStatus", newStatus, "status", currentStatus)
+  }
+  klog.InfoS("Changed status", "previousStatus", currentStatus, "status", newStatus)
+}
+```
+
+[lowerCamelCase]: https://en.wiktionary.org/wiki/lowerCamelCase
+
+### Use `klog.KObj` and `klog.KRef` for Kubernetes objects
+
+As part of structured logging migration we want to ensure that kubernetes objects references are consistent within the
+codebase. Two new utility functions were introduced to klog `klog.KObj` and `klog.KRef`. Any reference
+(name, uid, namespace) to Kubernetes Object (Pod, Node, Deployment, CRD) should be rewritten to utilize those functions.
+In situations when object `UID` is would be beneficial for log, it should be added as separate field with `UID` suffix.
+
+For example
+```go
+func updatePod(pod *covev1.Pod) {
+   ...
+   klog.Infof("Updated pod %s in namespace %s", pod.Name, pod.Namespace)
+}
+```
+should be changed to
+```go
+func updatePod(pod *covev1.Pod) {
+   ...
+   klog.InfoS("Updated pod", "pod", klog.KObj(pod))
+}
+```
+And
+```go
+func updatePod(pod *covev1.Pod) {
+   ...
+   klog.Infof("Updated pod with uid: %s", pod.Uid)
+}
+```
+should be changed to
+```go
+func updatePod(pod *covev1.Pod) {
+   ...
+   klog.InfoS("Updated pod", "pod", klog.KObj(pod), "podUID", pod.Uid)
+}
+```
+
+`klog.KObj` requires passing a kubernetes object (struct implementing `metav1.Object` interface). In situations where
+the object is not available, we can use `klog.KRef`. Still it is suggested to rewrite the code to use object pointer
+instead of strings where possible.
+
+```go
+func updatePod(podName, podNamespace string) {
+   ...
+   klog.InfoS("Updated pod", "pod", klog.KRef(podNamespace, podName))
+}
+```
+
+For non-namespaced object we can pass empty string to namespace argument
+
+```go
+func updateNode(nodeName string) {
+   ...
+   klog.InfoS("Updated node", "node", klog.KRef("", nodeName))
+}
+```
+
+### Verify log output
+
+With the introduction of structured functions log arguments will be formatted automatically instead of depending on the
+caller. This means that we can remove the burden of picking the format by caller and ensure greater log consistency, but during
+migration it's important to ensure that we avoid degradation of log quality. We should ensure that during migration we
+preserve properties like:
+* meaning of event described by log
+* verbosity of stored information
+
+PRs migrating logs should include examples of outputted logs before and after the change, thus helping reviewers
+understand the impact of change.
+
+Example code to compare [httplog.go#168](https://github.com/kubernetes/kubernetes/blob/15c3f1b11/staging/src/k8s.io/apiserver/pkg/server/httplog/httplog.go#L168)
+```
+package main
+
+import (
+	"fmt"
+	"k8s.io/klog/v2"
+	"net/http"
+	"time"
+)
+
+type respLogger struct {
+	status         int
+	statusStack    string
+	addedInfo      string
+	req *http.Request
+}
+
+func (rl *respLogger) Log(latency time.Duration) {
+	klog.InfoDepth(1, fmt.Sprintf("verb=%q URI=%q latency=%v resp=%v UserAgent=%q srcIP=%q: %v%v",
+		rl.req.Method, rl.req.RequestURI,
+		latency, rl.status,
+		rl.req.UserAgent(), rl.req.RemoteAddr,
+		rl.statusStack, rl.addedInfo,
+	))
+}
+
+func (rl *respLogger) LogArgs(latency time.Duration) []interface{} {
+    return []interface{}{
+        "verb", rl.req.Method,
+        "URI", rl.req.RequestURI,
+        "latency", latency,
+        "resp", rl.status,
+        "userAgent", rl.req.UserAgent(),
+        "srcIP", rl.req.RemoteAddr,
+    }
+}
+
+func main() {
+	klog.InitFlags(nil)
+
+    // Setup
+	rl := respLogger{
+		status:             200,
+		req:                &http.Request{
+			Method:           "GET",
+			Header:           map[string][]string{"User-Agent": {"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0. 2272.118 Safari/537.36."}},
+			RemoteAddr:       "127.0.0.1",
+			RequestURI:       "/metrics",
+		},
+	}
+	latency := time.Second
+
+    // Before migration
+    rl.Log(latency)
+
+    // After migration
+	klog.InfoS("Received HTTP request", rl.LogArgs(latency)...)
+}
+```
+
+Log output before migration
+```
+I0528 19:15:22.737538   47512 logtest.go:52] verb="GET" URI="/metrics" latency=1s resp=200 UserAgent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0. 2272.118 Safari/537.36." srcIP="127.0.0.1":
+```
+After
+```
+I0528 19:15:22.737588   47512 logtest.go:55] "Received HTTP request" verb="GET" URI="/metrics" latency="1s" resp=200 userAgent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0. 2272.118 Safari/537.36." srcIP="127.0.0.1"
+```