diff --git a/charts/tidb-lightning/templates/job.yaml b/charts/tidb-lightning/templates/job.yaml index f9e636064f2..2f5e9a8ba7b 100644 --- a/charts/tidb-lightning/templates/job.yaml +++ b/charts/tidb-lightning/templates/job.yaml @@ -28,6 +28,9 @@ spec: {{ toYaml .Values.annotations | indent 8 }} {{- end }} spec: + {{- if .Values.serviceAccount }} + serviceAccountName: {{ .Values.serviceAccount }} + {{- end }} {{ if and .Values.dataSource.local.hostPath .Values.dataSource.local.nodeName -}} nodeName: {{ .Values.dataSource.local.nodeName }} {{ else if not .Values.dataSource.adhoc.pvcName -}} diff --git a/charts/tidb-lightning/values.yaml b/charts/tidb-lightning/values.yaml index e5a151e2050..fc95e7b367c 100644 --- a/charts/tidb-lightning/values.yaml +++ b/charts/tidb-lightning/values.yaml @@ -74,6 +74,9 @@ affinity: {} backend: importer # importer | tidb +# Specify a Service Account for lightning +# serviceAccount: + config: | [lightning] level = "info" diff --git a/ci/aws-clean-eks.sh b/ci/aws-clean-eks.sh index 69f11ec570d..fc9c94eb813 100755 --- a/ci/aws-clean-eks.sh +++ b/ci/aws-clean-eks.sh @@ -24,25 +24,52 @@ function get_stacks() { aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text } +function delete_security_group() { + local sgId="$1" + echo "info: deleting security group '$sgId'" + for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$sgId" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do + echo "info: clear leaked network interfaces '$eni'" + aws ec2 delete-network-interface --network-interface-id "$eni" + done + aws ec2 delete-security-group --group-id "$sgId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted security group '$sgId'" + else + echo "error: failed to deleted security group '$sgId'" + fi +} + +function delete_vpc() { + local vpcId="$1" + echo "info: deleting vpc '$vpcId'" + for sgId in $(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpcId" --query "SecurityGroups[?GroupName != 'default'].GroupId" --output text); do + delete_security_group "$sgId" + done + aws ec2 delete-vpc --vpc-id "$vpcId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted vpc '$vpcId'" + else + echo "error: failed to deleted vpc '$vpcId'" + fi +} + function fix_eks_mng_deletion_issues() { local cluster="$1" local mng="$2" while IFS=$'\n' read -r line; do read -r code resourceIds <<< $line if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then - echo "info: clear security group '$resourceIds'" - for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$resourceIds" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do - echo "info: clear leaked network interfaces '$eni'" - aws ec2 delete-network-interface --network-interface-id "$eni" + IFS=',' read -ra sgIds <<< "$resourceIds" + for sgId in ${sgIds[@]}; do + delete_security_group "$sgId" done - aws ec2 delete-security-group --group-id $resourceIds fi done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"') } function clean_eks() { local CLUSTER="$1" - echo "info: deleting mng stack" + echo "info: searching mng stack" local regex='^'$CLUSTER'-mng-[0-9]+$' local mngStack= for stackName in $(get_stacks); do @@ -53,24 +80,15 @@ function clean_eks() { break done if [ -n "$mngStack" ]; then - echo "info: mng stack found '$mngStack', deleting it" - aws cloudformation delete-stack --stack-name $mngStack - aws cloudformation wait stack-delete-complete --stack-name $mngStack - if [ $? -ne 0 ]; then - echo "error: failed to delete mng stack '$mngStack', delete related resource first" - for mngName in $(aws eks list-nodegroups --cluster-name jenkins-tidb-operator-e2e2 --query 'nodegroups[*]' --output text); do - fix_eks_mng_deletion_issues "$CLUSTER" $mngName - done - aws cloudformation delete-stack --stack-name $mngStack - aws cloudformation wait stack-delete-complete --stack-name $mngStack - fi + echo "info: mng stack found '$mngStack'" else - echo "info: mng stack not found, skipped" + echo "info: mng stack not found" fi - echo "info: deleting cluster/cluster-role/mng-role/vpc stacks" + echo "info: deleting mng/cluster/cluster-role/mng-role/vpc stacks" local stacks=( - $CLUSTER-cluster + $mngStack + $CLUSTER-cluster $CLUSTER-role-cluster $CLUSTER-role-mng $CLUSTER-vpc @@ -79,6 +97,33 @@ function clean_eks() { echo "info: deleting stack $stack" aws cloudformation delete-stack --stack-name $stack aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "error: failed to delete stack '$stack'" + if [ "$stack" == "$mngStack" ]; then + echo "info: try to fix mng stack '$stack'" + for mngName in $(aws eks list-nodegroups --cluster-name "$CLUSTER" --query 'nodegroups[*]' --output text); do + fix_eks_mng_deletion_issues "$CLUSTER" $mngName + done + elif [ "$stack" == "$CLUSTER-vpc" ]; then + echo "info: try to fix vpc stack '$stack'" + while IFS=$'\n' read -r sgId; do + delete_security_group "$sgId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "ControlPlaneSecurityGroupID") | .OutputValue') + while IFS=$'\n' read -r vpcId; do + delete_vpc "$vpcId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "VPCID") | .OutputValue') + else + echo "fatal: unable to delete stack $stack" + exit 1 + fi + echo "info: try to delete the stack '$stack' again" + aws cloudformation delete-stack --stack-name $stack + aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "fatal: unable to delete stack $stack" + exit 1 + fi + fi done } diff --git a/ci/e2e_eks.groovy b/ci/e2e_eks.groovy index 0c1a967ed5c..1966b95d54b 100644 --- a/ci/e2e_eks.groovy +++ b/ci/e2e_eks.groovy @@ -40,6 +40,27 @@ spec: emptyDir: {} ''' +// Able to override default values in Jenkins job via environment variables. +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GINKGO_NODES) { + env.DEFAULT_GINKGO_NODES = "8" +} + +if (!env.DEFAULT_E2E_ARGS) { + env.DEFAULT_E2E_ARGS = "--ginkgo.skip='\\[Serial\\]|\\[Stability\\]' --ginkgo.focus='\\[tidb-operator\\]'" +} + +if (!env.DEFAULT_CLUSTER) { + env.DEFAULT_CLUSTER = "jenkins-tidb-operator-e2e" +} + +if (!env.DEFAULT_AWS_REGION) { + env.DEFAULT_AWS_REGION = "us-west-2" +} + pipeline { agent { kubernetes { @@ -50,16 +71,17 @@ pipeline { } options { - timeout(time: 3, unit: 'HOURS') + timeout(time: 3, unit: 'HOURS') } parameters { string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url') - string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') - string(name: 'CLUSTER', defaultValue: 'jenkins-tidb-operator-e2e', description: 'the name of the cluster') - string(name: 'AWS_REGION', defaultValue: 'us-west-2', description: 'the AWS region') - string(name: 'GINKGO_NODES', defaultValue: '8', description: 'the number of ginkgo nodes') + string(name: 'GINKGO_NODES', defaultValue: env.DEFAULT_GINKGO_NODES, description: 'the number of ginkgo nodes') + string(name: 'E2E_ARGS', defaultValue: env.DEFAULT_E2E_ARGS, description: "e2e args, e.g. --ginkgo.focus='\\[Stability\\]'") + string(name: 'CLUSTER', defaultValue: env.DEFAULT_CLUSTER, description: 'the name of the cluster') + string(name: 'AWS_REGION', defaultValue: env.DEFAULT_AWS_REGION, description: 'the AWS region') } environment { @@ -117,7 +139,7 @@ pipeline { echo "info: try to clean the cluster created previously" ./ci/aws-clean-eks.sh \$CLUSTER echo "info: begin to run e2e" - ./hack/e2e.sh -- --ginkgo.skip='\\[Serial\\]' --ginkgo.focus='\\[tidb-operator\\]' + ./hack/e2e.sh -- ${params.E2E_ARGS} """ } } diff --git a/ci/e2e_gke.groovy b/ci/e2e_gke.groovy index 5575caae973..e85f5809ce1 100644 --- a/ci/e2e_gke.groovy +++ b/ci/e2e_gke.groovy @@ -40,6 +40,31 @@ spec: emptyDir: {} ''' +// Able to override default values in Jenkins job via environment variables. +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GINKGO_NODES) { + env.DEFAULT_GINKGO_NODES = "8" +} + +if (!env.DEFAULT_E2E_ARGS) { + env.DEFAULT_E2E_ARGS = "--ginkgo.skip='\\[Serial\\]|\\[Stability\\]' --ginkgo.focus='\\[tidb-operator\\]'" +} + +if (!env.DEFAULT_CLUSTER) { + env.DEFAULT_CLUSTER = "jenkins-tidb-operator-e2e" +} + +if (!env.DEFAULT_GCP_PROJECT) { + env.DEFAULT_GCP_PROJECT = "" +} + +if (!env.DEFAULT_GCP_ZONE) { + env.DEFAULT_GCP_ZONE = "us-central1-b" +} + pipeline { agent { kubernetes { @@ -50,17 +75,18 @@ pipeline { } options { - timeout(time: 3, unit: 'HOURS') + timeout(time: 3, unit: 'HOURS') } parameters { string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url') - string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') - string(name: 'CLUSTER', defaultValue: 'jenkins-tidb-operator-e2e', description: 'the name of the cluster') - string(name: 'GCP_PROJECT', defaultValue: 'smooth-tendril-207212', description: 'the GCP project ID') - string(name: 'GCP_ZONE', defaultValue: 'us-central1-b', description: 'the GCP zone') - string(name: 'GINKGO_NODES', defaultValue: '8', description: 'the number of ginkgo nodes') + string(name: 'GINKGO_NODES', defaultValue: env.DEFAULT_GINKGO_NODES, description: 'the number of ginkgo nodes') + string(name: 'E2E_ARGS', defaultValue: env.DEFAULT_E2E_ARGS, description: "e2e args, e.g. --ginkgo.focus='\\[Stability\\]'") + string(name: 'CLUSTER', defaultValue: env.DEFAULT_CLUSTER, description: 'the name of the cluster') + string(name: 'GCP_PROJECT', defaultValue: env.DEFAULT_GCP_PROJECT, description: 'the GCP project ID') + string(name: 'GCP_ZONE', defaultValue: env.DEFAULT_GCP_ZONE, description: 'the GCP zone') } environment { @@ -120,7 +146,7 @@ pipeline { echo "info: try to clean the cluster created previously" SKIP_BUILD=y SKIP_IMAGE_BUILD=y SKIP_UP=y SKIP_TEST=y ./hack/e2e.sh echo "info: begin to run e2e" - ./hack/e2e.sh -- --ginkgo.skip='\\[Serial\\]' --ginkgo.focus='\\[tidb-operator\\]' + ./hack/e2e.sh -- ${params.E2E_ARGS} """ } } diff --git a/examples/auto-scale/README.md b/examples/auto-scale/README.md new file mode 100644 index 00000000000..5bb7277d317 --- /dev/null +++ b/examples/auto-scale/README.md @@ -0,0 +1,50 @@ +# Deploying TidbCluster with Auto-scaling + +> **Note:** +> +> This setup is for test or demo purpose only and **IS NOT** applicable for critical environment. Refer to the [Documents](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/prerequisites/) for production setup. + + +The following steps will create a TiDB cluster with monitoring and auto-scaler, the monitoring data is not persisted by default. + +**Prerequisites**: +- Has TiDB operator `v1.1.0-beta.2` or higher version installed. [Doc](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/tidb-operator/) +- Has default `StorageClass` configured, and there are enough PVs (by default, 6 PVs are required) of that storageClass: + + This could be verified by the following command: + + ```bash + > kubectl get storageclass + ``` + + The output is similar to this: + + ```bash + NAME PROVISIONER AGE + standard (default) kubernetes.io/gce-pd 1d + gold kubernetes.io/gce-pd 1d + ``` + + Alternatively, you could specify the storageClass explicitly by modifying `tidb-cluster.yaml`. + + +## Enabling Auto-scaling + +> **Note:** +> +> The Auto-scaling feature is still in alpha, you should enable this feature in TiDB Operator by setting values.yaml: + ```yaml +features: + AutoScaling=true +``` + +Auto-scale the cluster based on CPU load +```bash +> kubectl -n apply -f ./ +``` + +## Destroy + +```bash +> kubectl -n delete -f ./ +``` diff --git a/examples/auto-scale/tidb-cluster-auto-scaler.yaml b/examples/auto-scale/tidb-cluster-auto-scaler.yaml new file mode 100644 index 00000000000..7727b8e0f2f --- /dev/null +++ b/examples/auto-scale/tidb-cluster-auto-scaler.yaml @@ -0,0 +1,31 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbClusterAutoScaler +metadata: + name: auto-scaling-demo +spec: + cluster: + name: auto-scaling-demo + monitor: + name: auto-scaling-demo + tikv: + minReplicas: 3 + maxReplicas: 4 + metricsTimeDuration: "1m" + metrics: + - type: "Resource" + resource: + name: "cpu" + target: + type: "Utilization" + averageUtilization: 80 + tidb: + minReplicas: 2 + maxReplicas: 3 + metricsTimeDuration: "1m" + metrics: + - type: "Resource" + resource: + name: "cpu" + target: + type: "Utilization" + averageUtilization: 80 diff --git a/examples/auto-scale/tidb-cluster.yaml b/examples/auto-scale/tidb-cluster.yaml new file mode 100644 index 00000000000..f46ba9a659a --- /dev/null +++ b/examples/auto-scale/tidb-cluster.yaml @@ -0,0 +1,26 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: auto-scaling-demo +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 3 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 3 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 2 + service: + type: ClusterIP + config: {} diff --git a/examples/auto-scale/tidb-monitor.yaml b/examples/auto-scale/tidb-monitor.yaml new file mode 100644 index 00000000000..c1c99bc95df --- /dev/null +++ b/examples/auto-scale/tidb-monitor.yaml @@ -0,0 +1,20 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: auto-scaling-demo +spec: + clusters: + - name: auto-scaling-demo + prometheus: + baseImage: prom/prometheus + version: v2.11.1 + grafana: + baseImage: grafana/grafana + version: 6.0.1 + initializer: + baseImage: pingcap/tidb-monitor-initializer + version: v3.0.5 + reloader: + baseImage: pingcap/tidb-monitor-reloader + version: v1.0.1 + imagePullPolicy: IfNotPresent diff --git a/examples/initialize/README.md b/examples/initialize/README.md new file mode 100644 index 00000000000..6e4df651add --- /dev/null +++ b/examples/initialize/README.md @@ -0,0 +1,67 @@ +# Creating TidbCluster with Initialization + +> **Note:** +> +> This setup is for test or demo purpose only and **IS NOT** applicable for critical environment. Refer to the [Documents](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/prerequisites/) for production setup. + + +The following steps will create a TiDB cluster with Initialization. + +**Prerequisites**: +- Has TiDB operator `v1.1.0-beta.1` or higher version installed. [Doc](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/tidb-operator/) +- Has default `StorageClass` configured, and there are enough PVs (by default, 6 PVs are required) of that storageClass: + + This could by verified by the following command: + + ```bash + > kubectl get storageclass + ``` + + The output is similar to this: + + ```bash + NAME PROVISIONER AGE + standard (default) kubernetes.io/gce-pd 1d + gold kubernetes.io/gce-pd 1d + ``` + + Alternatively, you could specify the storageClass explicitly by modifying `tidb-cluster.yaml`. + + +## Initialize + + +> **Note:** +> +> The Initialization should be done once the TiDB Cluster was created + +The following commands is assumed to be executed in this directory. + +You can create the root user and set its password by creating secret and link it to the Initializer: + +```bash +> kubectl create secret generic tidb-secret --from-literal=root= --namespace= +``` + +You can also create other users and set their password: +```bash +> kubectl create secret generic tidb-secret --from-literal=root= --from-literal=developer= --namespace= +``` + +Initialize the cluster to create the users and create the database named `hello`: + +```bash +> kubectl -n apply -f ./ +``` + +Wait for Initialize job done: +```bash +$ kubectl get pod -n | grep initialize-demo-tidb-initializer +initialize-demo-tidb-initializer-whzn7 0/1 Completed 0 57s +``` + +## Destroy + +```bash +> kubectl -n delete -f ./ +``` diff --git a/examples/initialize/tidb-cluster.yaml b/examples/initialize/tidb-cluster.yaml new file mode 100644 index 00000000000..1ec543ea72d --- /dev/null +++ b/examples/initialize/tidb-cluster.yaml @@ -0,0 +1,26 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: initialize-demo +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 1 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 1 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 1 + service: + type: ClusterIP + config: {} diff --git a/examples/initialize/tidb-initializer.yaml b/examples/initialize/tidb-initializer.yaml new file mode 100644 index 00000000000..9067aff97bc --- /dev/null +++ b/examples/initialize/tidb-initializer.yaml @@ -0,0 +1,21 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbInitializer +metadata: + name: initialize-demo +spec: + image: tnir/mysqlclient + imagePullPolicy: IfNotPresent + cluster: + name: initialize-demo + initSql: "create database hello;" + # initSqlConfigMap: tidb-initsql + passwordSecret: "tidb-secret" + # permitHost: 172.6.5.8 + # resources: + # limits: + # cpu: 1000m + # memory: 500Mi + # requests: + # cpu: 100m + # memory: 50Mi + # timezone: "Asia/Shanghai"