From 67090d939c0a9fca9105c4f62fcd6aecbee01fc2 Mon Sep 17 00:00:00 2001 From: Yeh-lei Wu Date: Mon, 4 Mar 2019 11:21:19 +0800 Subject: [PATCH] Support slow log tailing sidcar for tidb instance (#290) * Support slow log tailing sidcar for tidb instance --- .../templates/scripts/_start_tidb.sh.tpl | 5 + .../tidb-cluster/templates/tidb-cluster.yaml | 7 + charts/tidb-cluster/values.yaml | 10 ++ docs/operation-guide.md | 27 ++++ .../tidb-cluster-values.yaml | 1 + pkg/apis/pingcap.com/v1alpha1/types.go | 25 ++-- .../v1alpha1/zz_generated.deepcopy.go | 18 +++ pkg/controller/controller_utils.go | 9 ++ pkg/controller/controller_utils_test.go | 9 ++ pkg/manager/member/tidb_member_manager.go | 130 ++++++++++++------ .../member/tidb_member_manager_test.go | 12 ++ 11 files changed, 200 insertions(+), 53 deletions(-) diff --git a/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl index 06b60e58c8..f223992c4f 100644 --- a/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl @@ -34,6 +34,11 @@ then ARGS="${ARGS} --enable-binlog=true" fi +if [[ ! -z "${SLOW_LOG_FILE}" ]] +then + ARGS="${ARGS} --log-slow-query=${SLOW_LOG_FILE:-}" +fi + echo "start tidb-server ..." echo "/tidb-server ${ARGS}" exec /tidb-server ${ARGS} diff --git a/charts/tidb-cluster/templates/tidb-cluster.yaml b/charts/tidb-cluster/templates/tidb-cluster.yaml index 0628b511b9..beb93718a0 100644 --- a/charts/tidb-cluster/templates/tidb-cluster.yaml +++ b/charts/tidb-cluster/templates/tidb-cluster.yaml @@ -76,3 +76,10 @@ spec: {{- end }} binlogEnabled: {{ .Values.binlog.pump.create | default false }} maxFailoverCount: {{ .Values.tidb.maxFailoverCount | default 3 }} + separateSlowLog: {{ .Values.tidb.separateSlowLog | default false }} + slowLogTailer: + image: {{ .Values.tidb.slowLogTailer.image }} + imagePullPolicy: {{ .Values.tidb.slowLogTailer.imagePullPolicy | default "IfNotPresent" }} + {{- if .Values.tidb.slowLogTailer.resources }} +{{ toYaml .Values.tidb.slowLogTailer.resources | indent 6 }} + {{- end }} diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index 5a6098862a..a9b007becf 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -175,6 +175,16 @@ tidb: exposeStatus: true # annotations: # cloud.google.com/load-balancer-type: Internal + # separateSlowLog: true + slowLogTailer: + image: busybox:1.26.2 + resources: + limits: + cpu: 100m + memory: 50Mi + requests: + cpu: 20m + memory: 5Mi # mysqlClient is used to set password for TiDB mysqlClient: diff --git a/docs/operation-guide.md b/docs/operation-guide.md index 973367fc8f..883b5bcc46 100644 --- a/docs/operation-guide.md +++ b/docs/operation-guide.md @@ -131,6 +131,33 @@ Then open your browser at http://localhost:3000 The default username and passwor The Grafana service is exposed as `NodePort` by default, you can change it to `LoadBalancer` if the underlining Kubernetes has load balancer support. And then view the dashboard via load balancer endpoint. +### View TiDB Slow Query Log + +For default setup, tidb is configured to export slow query log to STDOUT along with normal server logs. You can obtain the slow query log by `grep` the keyword `SLOW_QUERY`: + +```shell +$ kubectl logs -n ${namespace} ${tidbPodName} | grep SLOW_QUERY +``` + +Optionally, you can output slow query log in a separate sidecar by enabling `separateSlowLog`: + +```yaml +# Uncomment the following line to enable separate output of the slow query log + # separateSlowLog: true +``` + +Run `helm upgrade` to apply the change, then you can obtain the slow query log from the sidecar named `slowlog`: + +```shell +$ kubectl logs -n ${namespace} ${tidbPodName} -c slowlog +``` + +To retrieve logs from multiple pods, [`stern`](https://github.com/wercker/stern) is recommended. + +```shell +$ stern -n ${namespace} tidb -c slowlog +``` + ## Backup Currently, TiDB Operator supports two kinds of backup: incremental backup via binlog and full backup(scheduled or ad-hoc) via [Mydumper](https://github.com/maxbube/mydumper). diff --git a/images/tidb-operator-e2e/tidb-cluster-values.yaml b/images/tidb-operator-e2e/tidb-cluster-values.yaml index e0845060c0..0248eba487 100644 --- a/images/tidb-operator-e2e/tidb-cluster-values.yaml +++ b/images/tidb-operator-e2e/tidb-cluster-values.yaml @@ -143,6 +143,7 @@ tidb: exposeStatus: true # annotations: # cloud.google.com/load-balancer-type: Internal + separateSlowLog: true # mysqlClient is used to set password for TiDB mysqlClient: diff --git a/pkg/apis/pingcap.com/v1alpha1/types.go b/pkg/apis/pingcap.com/v1alpha1/types.go index 78cc5de6c7..458543df41 100644 --- a/pkg/apis/pingcap.com/v1alpha1/types.go +++ b/pkg/apis/pingcap.com/v1alpha1/types.go @@ -42,8 +42,10 @@ const ( TiDBMemberType MemberType = "tidb" // TiKVMemberType is tikv container type TiKVMemberType MemberType = "tikv" - //PushGatewayMemberType is pushgateway container type + // PushGatewayMemberType is pushgateway container type PushGatewayMemberType MemberType = "pushgateway" + // SlowLogTailerMemberType is tidb log tailer container type + SlowLogTailerMemberType MemberType = "slowlog" // UnknownMemberType is unknown container type UnknownMemberType MemberType = "unknown" ) @@ -117,13 +119,20 @@ type PDSpec struct { // TiDBSpec contains details of PD member type TiDBSpec struct { ContainerSpec - Replicas int32 `json:"replicas"` - NodeSelector map[string]string `json:"nodeSelector,omitempty"` - NodeSelectorRequired bool `json:"nodeSelectorRequired,omitempty"` - StorageClassName string `json:"storageClassName,omitempty"` - Tolerations []corev1.Toleration `json:"tolerations,omitempty"` - BinlogEnabled bool `json:"binlogEnabled,omitempty"` - MaxFailoverCount int32 `json:"maxFailoverCount,omitempty"` + Replicas int32 `json:"replicas"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + NodeSelectorRequired bool `json:"nodeSelectorRequired,omitempty"` + StorageClassName string `json:"storageClassName,omitempty"` + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + BinlogEnabled bool `json:"binlogEnabled,omitempty"` + MaxFailoverCount int32 `json:"maxFailoverCount,omitempty"` + SeparateSlowLog bool `json:"separateSlowLog,omitempty"` + SlowLogTailer TiDBSlowLogTailerSpec `json:"slowLogTailer,omitempty"` +} + +// TiDBSlowLogTailerSpec represents an optional log tailer sidecar with TiDB +type TiDBSlowLogTailerSpec struct { + ContainerSpec } // TiKVSpec contains details of PD member diff --git a/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go index 66070042d2..9ae44e85c5 100644 --- a/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go @@ -217,6 +217,23 @@ func (in *TiDBMember) DeepCopy() *TiDBMember { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TiDBSlowLogTailerSpec) DeepCopyInto(out *TiDBSlowLogTailerSpec) { + *out = *in + in.ContainerSpec.DeepCopyInto(&out.ContainerSpec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TiDBSlowLogTailerSpec. +func (in *TiDBSlowLogTailerSpec) DeepCopy() *TiDBSlowLogTailerSpec { + if in == nil { + return nil + } + out := new(TiDBSlowLogTailerSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TiDBSpec) DeepCopyInto(out *TiDBSpec) { *out = *in @@ -235,6 +252,7 @@ func (in *TiDBSpec) DeepCopyInto(out *TiDBSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.SlowLogTailer.DeepCopyInto(&out.SlowLogTailer) return } diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 868117a228..ece3327da6 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -36,6 +36,8 @@ var ( const ( // defaultPushgatewayImage is default image of pushgateway defaultPushgatewayImage = "prom/pushgateway:v0.3.1" + // defaultTiDBSlowLogImage is default image of tidb log tailer + defaultTiDBLogTailerImage = "busybox:1.26.2" ) // RequeueError is used to requeue the item, this error type should't be considered as a real error @@ -129,6 +131,13 @@ func GetPushgatewayImage(cluster *v1alpha1.TidbCluster) string { return defaultPushgatewayImage } +func GetSlowLogTailerImage(cluster *v1alpha1.TidbCluster) string { + if img := cluster.Spec.TiDB.SlowLogTailer.Image; img != "" { + return img + } + return defaultTiDBLogTailerImage +} + // PDMemberName returns pd member name func PDMemberName(clusterName string) string { return fmt.Sprintf("%s-pd", clusterName) diff --git a/pkg/controller/controller_utils_test.go b/pkg/controller/controller_utils_test.go index 8bc52abe51..23a50340d7 100644 --- a/pkg/controller/controller_utils_test.go +++ b/pkg/controller/controller_utils_test.go @@ -157,6 +157,15 @@ func TestGetPushgatewayImage(t *testing.T) { g.Expect(GetPushgatewayImage(tc)).To(Equal("image-1")) } +func TestGetSlowLogTailerImage(t *testing.T) { + g := NewGomegaWithT(t) + + tc := &v1alpha1.TidbCluster{} + g.Expect(GetSlowLogTailerImage(tc)).To(Equal(defaultTiDBLogTailerImage)) + tc.Spec.TiDB.SlowLogTailer.Image = "image-1" + g.Expect(GetSlowLogTailerImage(tc)).To(Equal("image-1")) +} + func TestPDMemberName(t *testing.T) { g := NewGomegaWithT(t) g.Expect(PDMemberName("demo")).To(Equal("demo-pd")) diff --git a/pkg/manager/member/tidb_member_manager.go b/pkg/manager/member/tidb_member_manager.go index 3b74381dbd..f2e713dee3 100644 --- a/pkg/manager/member/tidb_member_manager.go +++ b/pkg/manager/member/tidb_member_manager.go @@ -14,6 +14,7 @@ package member import ( + "fmt" "strconv" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" @@ -30,6 +31,12 @@ import ( corelisters "k8s.io/client-go/listers/core/v1" ) +const ( + slowQueryLogVolumeName = "slowlog" + slowQueryLogDir = "/var/log/tidb" + slowQueryLogFile = slowQueryLogDir + "/slowlog" +) + type tidbMemberManager struct { setControl controller.StatefulSetControlInterface svcControl controller.ServiceControlInterface @@ -240,6 +247,83 @@ func (tmm *tidbMemberManager) getNewTiDBSetForTidbCluster(tc *v1alpha1.TidbClust }, } + var containers []corev1.Container + if tc.Spec.TiDB.SeparateSlowLog { + // mount a shared volume and tail the slow log to STDOUT using a sidecar. + vols = append(vols, corev1.Volume{ + Name: slowQueryLogVolumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + volMounts = append(volMounts, corev1.VolumeMount{Name: slowQueryLogVolumeName, MountPath: slowQueryLogDir}) + containers = append(containers, corev1.Container{ + Name: v1alpha1.SlowLogTailerMemberType.String(), + Image: controller.GetSlowLogTailerImage(tc), + ImagePullPolicy: tc.Spec.TiDB.SlowLogTailer.ImagePullPolicy, + Resources: util.ResourceRequirement(tc.Spec.TiDB.SlowLogTailer.ContainerSpec), + VolumeMounts: []corev1.VolumeMount{ + {Name: slowQueryLogVolumeName, MountPath: slowQueryLogDir}, + }, + Command: []string{ + "sh", + "-c", + fmt.Sprintf("touch %s; tail -n0 -F %s;", slowQueryLogFile, slowQueryLogFile), + }, + }) + } + + envs := []corev1.EnvVar{ + { + Name: "CLUSTER_NAME", + Value: tc.GetName(), + }, + { + Name: "TZ", + Value: tc.Spec.Timezone, + }, + { + Name: "BINLOG_ENABLED", + Value: strconv.FormatBool(tc.Spec.TiDB.BinlogEnabled), + }, + } + if tc.Spec.TiDB.SeparateSlowLog { + envs = append(envs, corev1.EnvVar{ + Name: "SLOW_LOG_FILE", + Value: slowQueryLogFile, + }) + } + containers = append(containers, corev1.Container{ + Name: v1alpha1.TiDBMemberType.String(), + Image: tc.Spec.TiDB.Image, + Command: []string{"/bin/sh", "/usr/local/bin/tidb_start_script.sh"}, + ImagePullPolicy: tc.Spec.TiDB.ImagePullPolicy, + Ports: []corev1.ContainerPort{ + { + Name: "server", + ContainerPort: int32(4000), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "status", // pprof, status, metrics + ContainerPort: int32(10080), + Protocol: corev1.ProtocolTCP, + }, + }, + VolumeMounts: volMounts, + Resources: util.ResourceRequirement(tc.Spec.TiDB.ContainerSpec), + Env: envs, + ReadinessProbe: &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/status", + Port: intstr.FromInt(10080), + }, + }, + InitialDelaySeconds: int32(10), + }, + }) + tidbLabel := label.New().Instance(instanceName).TiDB() tidbSet := &apps.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ @@ -264,51 +348,7 @@ func (tmm *tidbMemberManager) getNewTiDBSetForTidbCluster(tc *v1alpha1.TidbClust label.New().Instance(instanceName).TiDB(), tc.Spec.TiDB.NodeSelector, ), - Containers: []corev1.Container{ - { - Name: v1alpha1.TiDBMemberType.String(), - Image: tc.Spec.TiDB.Image, - Command: []string{"/bin/sh", "/usr/local/bin/tidb_start_script.sh"}, - ImagePullPolicy: tc.Spec.TiDB.ImagePullPolicy, - Ports: []corev1.ContainerPort{ - { - Name: "server", - ContainerPort: int32(4000), - Protocol: corev1.ProtocolTCP, - }, - { - Name: "status", // pprof, status, metrics - ContainerPort: int32(10080), - Protocol: corev1.ProtocolTCP, - }, - }, - VolumeMounts: volMounts, - Resources: util.ResourceRequirement(tc.Spec.TiDB.ContainerSpec), - Env: []corev1.EnvVar{ - { - Name: "CLUSTER_NAME", - Value: tc.GetName(), - }, - { - Name: "TZ", - Value: tc.Spec.Timezone, - }, - { - Name: "BINLOG_ENABLED", - Value: strconv.FormatBool(tc.Spec.TiDB.BinlogEnabled), - }, - }, - ReadinessProbe: &corev1.Probe{ - Handler: corev1.Handler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/status", - Port: intstr.FromInt(10080), - }, - }, - InitialDelaySeconds: int32(10), - }, - }, - }, + Containers: containers, RestartPolicy: corev1.RestartPolicyAlways, Tolerations: tc.Spec.TiDB.Tolerations, Volumes: vols, diff --git a/pkg/manager/member/tidb_member_manager_test.go b/pkg/manager/member/tidb_member_manager_test.go index e75288a265..611ab7d475 100644 --- a/pkg/manager/member/tidb_member_manager_test.go +++ b/pkg/manager/member/tidb_member_manager_test.go @@ -209,6 +209,18 @@ func TestTiDBMemberManagerSyncUpdate(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) }, }, + { + name: "enable separate slowlog on the fly", + modify: func(tc *v1alpha1.TidbCluster) { + tc.Spec.TiDB.SeparateSlowLog = true + }, + errWhenUpdateStatefulSet: false, + err: false, + expectStatefulSetFn: func(g *GomegaWithT, set *apps.StatefulSet, err error) { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(set.Spec.Template.Spec.Containers).To(HaveLen(2)) + }, + }, } for i := range tests {