From da3aa82dbcd4b88d33fdeb5ecbb92b75547ff8d3 Mon Sep 17 00:00:00 2001 From: Swapnil Mhamane Date: Fri, 12 Jul 2019 17:55:59 +0530 Subject: [PATCH 1/2] Restructure the documentation Signed-off-by: Swapnil Mhamane --- .ci/build | 20 +- .ci/check | 24 +-- .ci/integration_test | 21 +- .ci/unit_test | 19 +- Makefile | 4 + README.md | 226 ++------------------ build/Dockerfile | 2 +- doc/development/local_setup.md | 76 +++++++ doc/development/testing_and_dependencies.md | 31 +++ doc/{ => proposals}/design.md | 0 doc/{ => proposals}/validation.md | 0 doc/usage/getting_started.md | 89 ++++++++ doc/{ => usage}/metrics.md | 2 +- 13 files changed, 228 insertions(+), 286 deletions(-) create mode 100644 doc/development/local_setup.md create mode 100644 doc/development/testing_and_dependencies.md rename doc/{ => proposals}/design.md (100%) rename doc/{ => proposals}/validation.md (100%) create mode 100644 doc/usage/getting_started.md rename doc/{ => usage}/metrics.md (99%) diff --git a/.ci/build b/.ci/build index 3a9ad3298..69a57d01c 100755 --- a/.ci/build +++ b/.ci/build @@ -34,23 +34,9 @@ VCS="github.com" ORGANIZATION="gardener" PROJECT="etcd-backup-restore" REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} +export GO111MODULE=on +cd "${SOURCE_PATH}" -# The `go ` commands requires to see the target repository to be part of a -# Go workspace. Thus, if we are not yet in a Go workspace, let's create one -# temporarily by using symbolic links. -if [[ "${SOURCE_PATH}" != *"src/${REPOSITORY}" ]]; then - SOURCE_SYMLINK_PATH="${SOURCE_PATH}/tmp/src/${REPOSITORY}" - if [[ -d "${SOURCE_PATH}/tmp" ]]; then - rm -rf "${SOURCE_PATH}/tmp" - fi - mkdir -p "${SOURCE_PATH}/tmp/src/${VCS}/${ORGANIZATION}" - ln -s "${SOURCE_PATH}" "${SOURCE_SYMLINK_PATH}" - cd "${SOURCE_SYMLINK_PATH}" - - export GOPATH="${SOURCE_PATH}/tmp" - export GOBIN="${SOURCE_PATH}/tmp/bin" - export PATH="${GOBIN}:${PATH}" -fi ############################################################################### @@ -63,6 +49,7 @@ GIT_SHA=$(git rev-parse --short HEAD || echo "GitNotFound") if [[ -z "$LOCAL_BUILD" ]]; then CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ -a \ + -mod vendor \ -v \ -o ${BINARY_PATH}/linux-amd64/etcdbrctl \ -ldflags "-w -X ${REPOSITORY}/pkg/version.Version=${VERSION} -X ${REPOSITORY}/pkg/version.GitSHA=${GIT_SHA}" \ @@ -72,6 +59,7 @@ if [[ -z "$LOCAL_BUILD" ]]; then else go build \ -v \ + -mod vendor \ -o ${BINARY_PATH}/etcdbrctl \ -ldflags "-w -X ${REPOSITORY}/pkg/version.Version=${VERSION} -X ${REPOSITORY}/pkg/version.GitSHA=${GIT_SHA}" \ main.go diff --git a/.ci/check b/.ci/check index fbc99bc54..81e536a0d 100755 --- a/.ci/check +++ b/.ci/check @@ -27,26 +27,10 @@ VCS="github.com" ORGANIZATION="gardener" PROJECT="etcd-backup-restore" REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} - -# The `go ` commands requires to see the target repository to be part of a -# Go workspace. Thus, if we are not yet in a Go workspace, let's create one -# temporarily by using symbolic links. -if [[ "${SOURCE_PATH}" != *"src/${REPOSITORY}" ]]; then - SOURCE_SYMLINK_PATH="${SOURCE_PATH}/tmp/src/${REPOSITORY}" - if [[ -d "${SOURCE_PATH}/tmp" ]]; then - rm -rf "${SOURCE_PATH}/tmp" - fi - mkdir -p "${SOURCE_PATH}/tmp/src/${VCS}/${ORGANIZATION}" - ln -s "${SOURCE_PATH}" "${SOURCE_SYMLINK_PATH}" - cd "${SOURCE_SYMLINK_PATH}" - - export GOPATH="${SOURCE_PATH}/tmp" - export GOBIN="${SOURCE_PATH}/tmp/bin" - export PATH="${GOBIN}:${PATH}" -fi +cd "${SOURCE_PATH}" # Install Golint (linting tool). -go get -u golang.org/x/lint/golint +GO111MODULE=off go get -u golang.org/x/lint/golint # Install Helm from binary. curl -LO https://git.io/get_helm.sh @@ -56,12 +40,12 @@ rm get_helm.sh ############################################################################### -PACKAGES="$(go list -e ./... | grep -vE '/tmp/|/vendor/')" +PACKAGES="$(GO111MODULE=on go list -mod=vendor -e ./...)" LINT_FOLDERS="$(echo ${PACKAGES} | sed "s|github.com/gardener/etcd-backup-restore|.|g")" HELM_CHART_PATH="${SOURCE_PATH}/chart/etcd-backup-restore" # Execute static code checks. -go vet ${PACKAGES} +GO111MODULE=on go vet -mod vendor ${PACKAGES} # Execute automatic code formatting directive. go fmt ${PACKAGES} diff --git a/.ci/integration_test b/.ci/integration_test index 595ed09c9..f899a7747 100755 --- a/.ci/integration_test +++ b/.ci/integration_test @@ -30,22 +30,9 @@ REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} VERSION_FILE="$(readlink -f "${SOURCE_PATH}/VERSION")" VERSION="$(cat "${VERSION_FILE}")" -# The `go ` commands requires to see the target repository to be part of a -# Go workspace. Thus, if we are not yet in a Go workspace, let's create one -# temporarily by using symbolic links. -if [[ "${SOURCE_PATH}" != *"src/${REPOSITORY}" ]]; then - SOURCE_SYMLINK_PATH="${SOURCE_PATH}/tmp/src/${REPOSITORY}" - if [[ -d "${SOURCE_PATH}/tmp" ]]; then - rm -rf "${SOURCE_PATH}/tmp" - fi - mkdir -p "${SOURCE_PATH}/tmp/src/${VCS}/${ORGANIZATION}" - ln -s "${SOURCE_PATH}" "${SOURCE_SYMLINK_PATH}" - cd "${SOURCE_SYMLINK_PATH}" - - export GOPATH="${SOURCE_PATH}/tmp" - export GOBIN="${SOURCE_PATH}/tmp/bin" - export PATH="${GOBIN}:${PATH}" -fi +export GOBIN="${SOURCE_PATH}/bin" +export PATH="${GOBIN}:${PATH}" +cd "${SOURCE_PATH}" ############################################################################## @@ -64,7 +51,7 @@ function setup_test_enviornment() { function setup_ginkgo(){ echo "Installing Ginkgo..." - go get -u github.com/onsi/ginkgo/ginkgo + GO111MODULE=off go get -u github.com/onsi/ginkgo/ginkgo echo "Successfully installed Ginkgo." } diff --git a/.ci/unit_test b/.ci/unit_test index 938995771..33f9e0cf1 100755 --- a/.ci/unit_test +++ b/.ci/unit_test @@ -27,22 +27,7 @@ ORGANIZATION="gardener" PROJECT="etcd-backup-restore" REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} -# The `go ` commands requires to see the target repository to be part of a -# Go workspace. Thus, if we are not yet in a Go workspace, let's create one -# temporarily by using symbolic links. -if [[ "${SOURCE_PATH}" != *"src/${REPOSITORY}" ]]; then - SOURCE_SYMLINK_PATH="${SOURCE_PATH}/tmp/src/${REPOSITORY}" - if [[ -d "${SOURCE_PATH}/tmp" ]]; then - rm -rf "${SOURCE_PATH}/tmp" - fi - mkdir -p "${SOURCE_PATH}/tmp/src/${VCS}/${ORGANIZATION}" - ln -s "${SOURCE_PATH}" "${SOURCE_SYMLINK_PATH}" - cd "${SOURCE_SYMLINK_PATH}" - - export GOPATH="${SOURCE_PATH}/tmp" - export GOBIN="${SOURCE_PATH}/tmp/bin" - export PATH="${GOBIN}:${PATH}" -fi +cd "${SOURCE_PATH}" # Install Ginkgo (test framework) to be able to execute the tests. go get -u github.com/onsi/ginkgo/ginkgo @@ -57,7 +42,7 @@ function test_with_coverage() { ginkgo $GINKGO_COMMON_FLAGS --coverprofile ${coverprofile_file} -covermode=set -outputdir ${output_dir} ${TEST_PACKAGES} sed -i '/mode: set/d' ${output_dir}/${coverprofile_file} {( echo "mode: set"; cat ${output_dir}/${coverprofile_file} )} > ${output_dir}/${coverprofile_file}.temp - mv ${output_dir}/${coverprofile_file}.temp ${output_dir}/${coverprofile_file} + mv ${output_dir}/${coverprofile_file}.temp ${output_dir}/${coverprofile_file} go tool cover -func ${output_dir}/${coverprofile_file} } diff --git a/Makefile b/Makefile index fefb39159..6bd4e1a5a 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,10 @@ revendor: @env GO111MODULE=on go mod vendor -v @env GO111MODULE=on go mod tidy -v +.PHONY: update-dependencies +update-dependencies: + @env GO111MODULE=on go get -u + .PHONY: build build: @.ci/build diff --git a/README.md b/README.md index 132e35fba..8bc2ca93b 100644 --- a/README.md +++ b/README.md @@ -6,225 +6,23 @@ Etcd-backup-restore is collection of components to backup and restore the [etcd]. It also, provides the ability to validate the data directory, so that we could know the data directory is in good shape to bootstrap etcd successfully. -## Table of Contents +## Documentation Index -- [Getting started](#getting-started) - - [Prerequisites](#prerequisites) - - [Build](#build) -- [Design](#design) -- [Usage](#usage) - - [Cloud Provider Credentials](#cloud-provider-credentials) - - [Taking scheduled snapshot](#taking-scheduled-snapshot) - - [Etcd data directory initialization](#etcd-data-directory-initialization) - - [Etcdbrctl server](#etcdbrctl-server) -- [Dependency management](#dependency-management) - - [Updating dependencies](#updating-dependencies) -- [Testing](#testing) +### Usage -## Getting started +* [Getting started](doc/usage/getting_started.md) +* [Monitoring](doc/usage/metrics.md) -Currently there are no binary build available, but it is pretty straight forward to build it by following the steps mentioned below. +### Design and Proposals +* [Core design](doc/proposals/design.md) +* [Etcd data Validation ](doc/proposals/validation.md) +* [High watch events ingress rate issue](doc.proposals/high_watch_event_ingress_rate.md) -### Prerequisites +### Development -Although the following installation instructions are for Mac OS X, similar alternate commands could be found for any Linux distribution +* [Setting up a local development environment](doc.development/local_setup.md) +* [Testing and Dependency Management](doc.development/testing_and_dependencies.md) +* [Adding support for a new cloud provider](doc/development/new_cp_support.md) -#### Installing [Golang](https://golang.org/) environment - -Install the latest version of Golang (at least `v1.9.4` is required). For Mac OS, you could use [Homebrew](https://brew.sh/): - -```sh -brew install golang -``` - -For other OS, please check [Go installation documentation](https://golang.org/doc/install). - -Make sure to set your `$GOPATH` environment variable properly (conventionally, it points to `$HOME/go`). - -For your convenience, you can add the `bin` directory of the `$GOPATH` to your `$PATH`: `PATH=$PATH:$GOPATH/bin`, but it is not necessarily required. - -We use [Dep](https://github.com/golang/dep) for managing golang package dependencies. Please install it -on Mac OS via - -```sh -brew install dep -``` - -On other operating systems, please check the [Dep installation documentation](https://golang.github.io/dep/docs/installation.html) and the [Dep releases page](https://github.com/golang/dep/releases). After downloading the appropriate release in your `$GOPATH/bin` folder, you need to make it executable via `chmod +x ` and rename it to dep via `mv dep- dep`. - -#### [Golint](https://github.com/golang/lint) - -In order to perform linting on the Go source code, please install [Golint](https://github.com/golang/lint): - -```bash -go get -u github.com/golang/lint/golint -``` - -#### [Ginkgo](https://onsi.github.io/ginkgo/) and [Gomega](https://onsi.github.io/gomega/) - -In order to perform tests on the Go source code, please install [Ginkgo](https://onsi.github.io/ginkgo/) and [Gomega](http://onsi.github.io/gomega/). Please make yourself familiar with both frameworks and read their introductions after installation: - -```bash -go get -u github.com/onsi/ginkgo/ginkgo -go get -u github.com/onsi/gomega -``` - -#### Installing `git` - -We use `git` as VCS which you would need to install. - -On Mac OS run - -```sh -brew install git -``` - -#### Installing `gcloud` SDK (Optional) - -In case you have to create a new release or a new hotfix, you have to push the resulting Docker image into a Docker registry. Currently, we are using the Google Container Registry (this could change in the future). Please follow the official [installation instructions from Google](https://cloud.google.com/sdk/downloads). - -### Installing `Docker` (Optional) - -In case you want to build Docker images, you have to install Docker itself. We recommend using [Docker for Mac OS X](https://docs.docker.com/docker-for-mac/) which can be downloaded from [here](https://download.docker.com/mac/stable/Docker.dmg). - -### Build - -First, you need to create a target folder structure before cloning and building `etcdbrctl`. - -```sh - -mkdir -p ~/go/src/github.com/gardener -cd ~/go/src/github.com/gardener -git clone https://github.com/gardener/etcd-backup-restore.git -cd etcd-backup-restore -``` - -To build the binary in local machine environment, use `make` target `build-local`. - -```sh -make build-local -``` - -This will build the binary `etcdbrctl` under `bin` directory. - -Next you can make it available to use as shell command by moving the executable to `/usr/local/bin`. - -## Design - -Please find the design doc [here](doc/design.md). - -## Usage - -You can follow the `help` flag on `etcdbrctl` command and its sub-commands to know the usage details. Some of the common use cases are mentioned below. Although examples below uses AWS S3 as storage provider, we have added support for AWS, GCS, Azure, Openstack swift and Alicloud OSS object store. It also supports local disk as storage provider. - -### Cloud Provider Credentials - -The procedure to provide credentials to access the cloud provider object store varies for different providers. - -For `AWS S3`, the `credentials` file has to be provided in the `~/.aws` directory. - -For `GCP Containers`, the service account json file should be provided in the `~/.gcp` as a `service-account-file.json` file. - -For `Azure Blob storage`, `STORAGE_ACCOUNT` and `STORAGE_KEY` should be made available as environment variables. - -For `Openstack Swift`, `OS_USERNAME`, `OS_PASSWORD`, `OS_AUTH_URL`, `OS_TENANT_ID` and `OS_DOMAIN_ID` should be made available as environment variables. - -For `Alicloud OSS`, `ALICLOUD_ENDPOINT`, `ALICLOUD_ACCESS_KEY_ID`, `ALICLOUD_ACCESS_KEY_SECRET` should be made available as environment variables. - -### Taking scheduled snapshot - - - -`etcd` should already be running. One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period-seconds` flag. The default for the same is 10 seconds. - -etcd-backup-restore has two garbage collection policies to collect existing backups from the cloud bucket. The flag `garbage-collection-policy` is used to indicate the correct garbage collection policy. -1. `Exponential` -1. `LimitBased` - -If using `LimitBased` policy, the `max-backups` flag should be provided to indicate the number of recent backups to persist at each garbage collection cycle. - -```sh -$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --max-backups=10 --garbage-collection-policy='LimitBased' -INFO[0000] Validating schedule... -INFO[0000] Job attempt: 1 -INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC -INFO[0000] Successfully opened snapshot reader on etcd -INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 -INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC -INFO[0000] Applied watch on etcd from revision: 00000002 -INFO[0000] No events received to save snapshot. -``` - -The command mentioned above takes hourly snapshots and pushs it to S3 bucket named "etcd-backup". It is configured to keep only last 10 backups in bucket. - -`Exponential` policy stores the snapshots in a condensed manner as mentioned below: -- All full backups and delta backups for the previous hour. -- Latest full snapshot of each previous hour for the day. -- Latest full snapshot of each previous day for 7 days. -- Latest full snapshot of the previous 4 weeks. - -```sh -$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --garbage-collection-policy='Exponential' -INFO[0000] Validating schedule... -INFO[0000] Job attempt: 1 -INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC -INFO[0000] Successfully opened snapshot reader on etcd -INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 -INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC -INFO[0000] Applied watch on etcd from revision: 00000002 -INFO[0000] No events received to save snapshot. -``` - -The command mentioned above stores etcd snapshots as per the exponential policy mentioned above. - -### Etcd data directory initialization - -Sub-command `initialize` does the task of data directory validation. If the data directory is found to be corrupt, the controller will restore it from the latest snapshot in the cloud store. It restores the full snapshot first and then incrementally applies the delta snapshots. - -```sh -$ ./bin/etcdbrctl initialize --storage-provider="S3" --store-container="etcd-backup" --data-dir="default.etcd" -INFO[0000] Checking for data directory structure validity... -INFO[0000] Checking for data directory files corruption... -INFO[0000] Verifying snap directory... -Verifying Snapfile default.etcd/member/snap/0000000000000001-0000000000000001.snap. -INFO[0000] Verifying WAL directory... -INFO[0000] Verifying DB file... -INFO[0000] Data directory corrupt. Invalid db files: invalid database -INFO[0000] Removing data directory(default.etcd) for snapshot restoration. -INFO[0000] Finding latest snapshot... -INFO[0000] Restoring from latest snapshot: Full-00000000-00040010-1522152360... -2018-03-27 17:38:06.617280 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 -INFO[0000] Successfully restored the etcd data directory. -``` - -### Etcdbrctl server - -With sub-command `server` you can start a http server which exposes an endpoint to initialize etcd over REST interface. The server also keeps on backup schedule thread running to have periodic backups. This is mainly made available to manage an etcd instance running in a Kubernetes cluster. You can deploy the example [manifest](./example/etcd-statefulset.yaml) on a Kubernetes cluster to have an fault resilient etcd instance. - -## Dependency management - -We use [Dep](https://github.com/golang/dep) to manage golang dependencies.. In order to add a new package dependency to the project, you can perform `dep ensure -add ` or edit the `Gopkg.toml` file and append the package along with the version you want to use as a new `[[constraint]]`. - -### Updating dependencies - -The `Makefile` contains a rule called `revendor` which performs a `dep ensure -update` and a `dep prune` command. This updates all the dependencies to its latest versions (respecting the constraints specified in the `Gopkg.toml` file). The command also installs the packages which do not already exist in the `vendor` folder but are specified in the `Gopkg.toml` (in case you have added new ones). - -```sh -make revendor -``` - -The dependencies are installed into the `vendor` folder which **should be added** to the VCS. - -:warning: Make sure you test the code after you have updated the dependencies! - -## Testing - -We have created `make` target `verify` which will internally run different rule like `fmt` for formatting, `lint` for linting check and most importantly `test` which will check the code against predefined unit tests. Although, currently there are not enough test cases written to cover entire code, hence one should check for failure cases manually before raising pull request. We will eventually add the test cases for complete code coverage. - -```sh -make verify -``` - -By default, we try to run test in parallel without computing code coverage. To get the code coverage, you will have to set environment variable `COVER` to `true`. This will log the code coverage percentage at the end of test logs. Also, all cover profile files will accumulated under `test/output/coverprofile.out` directory. You can visualize exact code coverage using `make show-coverage`. [etcd]: https://github.com/coreos/etcd diff --git a/build/Dockerfile b/build/Dockerfile index 8ce5874ac..3bdf500c4 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM alpine:3.9.3 +FROM alpine:3.10.1 RUN apk add --update bash curl diff --git a/doc/development/local_setup.md b/doc/development/local_setup.md new file mode 100644 index 000000000..8caa5cc54 --- /dev/null +++ b/doc/development/local_setup.md @@ -0,0 +1,76 @@ +## Prerequisites + +Although the following installation instructions are for Mac OS X, similar alternate commands could be found for any Linux distribution + +### Installing [Golang](https://golang.org/) environment + +Install the latest version of Golang (at least `v1.12` is required). For Mac OS, you could use [Homebrew](https://brew.sh/): + +```sh +brew install golang +``` + +For other OS, please check [Go installation documentation](https://golang.org/doc/install). + +Make sure to set your `$GOPATH` environment variable properly (conventionally, it points to `$HOME/go`). + +For your convenience, you can add the `bin` directory of the `$GOPATH` to your `$PATH`: `PATH=$PATH:$GOPATH/bin`, but it is not necessarily required. + +### [Golint](https://github.com/golang/lint) + +In order to perform linting on the Go source code, please install [Golint](https://github.com/golang/lint): + +```bash +go get -u golang.org/x/lint/golint +``` + +### [Ginkgo](https://onsi.github.io/ginkgo/) and [Gomega](https://onsi.github.io/gomega/) + +In order to perform tests on the Go source code, please install [Ginkgo](https://onsi.github.io/ginkgo/) and [Gomega](http://onsi.github.io/gomega/). Please make yourself familiar with both frameworks and read their introductions after installation: + +```bash +go get -u github.com/onsi/ginkgo/ginkgo +go get -u github.com/onsi/gomega +``` + +### Installing `git` + +We use `git` as VCS which you would need to install. + +On Mac OS run + +```sh +brew install git +``` + +### Installing `gcloud` SDK (Optional) + +In case you have to create a new release or a new hotfix, you have to push the resulting Docker image into a Docker registry. Currently, we are using the Google Container Registry (this could change in the future). Please follow the official [installation instructions from Google](https://cloud.google.com/sdk/downloads). + + +## Build + +Currently there are no binary build available, but it is pretty straight forward to build it by following the steps mentioned below. + +* First, you need to create a target folder structure before cloning and building `etcdbrctl`. + + ```sh + git clone https://github.com/gardener/etcd-backup-restore.git + cd etcd-backup-restore + ``` + +* To build the binary in local machine environment, use `make` target `build-local`. It will build the binary `etcdbrctl` under `bin` directory. + + ```sh + make build-local + ``` + +* Next you can make it available to use as shell command by moving the executable to `/usr/local/bin`. +You can verify the installation by running following command: + ```console + $ etcdbrctl -v + INFO[0000] etcd-backup-restore Version: v0.7.0-dev + INFO[0000] Git SHA: 38979f0 + INFO[0000] Go Version: go1.12 + INFO[0000] Go OS/Arch: darwin/amd64 + ``` \ No newline at end of file diff --git a/doc/development/testing_and_dependencies.md b/doc/development/testing_and_dependencies.md new file mode 100644 index 000000000..aca981665 --- /dev/null +++ b/doc/development/testing_and_dependencies.md @@ -0,0 +1,31 @@ +# Dependency management + +We use golang modules to manage golang dependencies. In order to add a new package dependency to the project, you can perform `go get @` or edit the `go.mod` file and append the package along with the version you want to use. + +### Updating dependencies + +The `Makefile` contains a rule called `revendor` which performs `go mod vendor` and `go mod tidy`. +* `go mod vendor` resets the main module's vendor directory to include all packages needed to build and test all the main module's packages. It does not include test code for vendored packages. +It does not include test code for vendored packages. +* `go mod tidy` makes sure go.mod matches the source code in the module. +It adds any missing modules necessary to build the current module's +packages and dependencies, and it removes unused modules that +don't provide any relevant packages. + +```sh +make revendor +``` + +The dependencies are installed into the `vendor` folder which **should be added** to the VCS. + +:warning: Make sure you test the code after you have updated the dependencies! + +# Testing + +We have created `make` target `verify` which will internally run different rule like `fmt` for formatting, `lint` for linting check and most importantly `test` which will check the code against predefined unit tests. Although, currently there are not enough test cases written to cover entire code, hence one should check for failure cases manually before raising pull request. We will eventually add the test cases for complete code coverage. + +```sh +make verify +``` + +By default, we try to run test in parallel without computing code coverage. To get the code coverage, you will have to set environment variable `COVER` to `true`. This will log the code coverage percentage at the end of test logs. Also, all cover profile files will accumulated under `test/output/coverprofile.out` directory. You can visualize exact code coverage using `make show-coverage`. diff --git a/doc/design.md b/doc/proposals/design.md similarity index 100% rename from doc/design.md rename to doc/proposals/design.md diff --git a/doc/validation.md b/doc/proposals/validation.md similarity index 100% rename from doc/validation.md rename to doc/proposals/validation.md diff --git a/doc/usage/getting_started.md b/doc/usage/getting_started.md new file mode 100644 index 000000000..cead71c71 --- /dev/null +++ b/doc/usage/getting_started.md @@ -0,0 +1,89 @@ +# Getting started + +Currently we don't publish the binary build with the release, but it is pretty straight forward to build it by following the steps mentioned [here](../development/local_setup.md#build). But we do publish the docker image with each release, please check the [release page](https://github.com/gardener/etcd-backup-restore/releases) for the same. + +## Usage + +You can follow the `help` flag on `etcdbrctl` command and its sub-commands to know the usage details. Some of the common use cases are mentioned below. Although examples below uses AWS S3 as storage provider, we have added support for AWS, GCS, Azure, Openstack swift and Alicloud OSS object store. It also supports local disk as storage provider. + +### Cloud Provider Credentials + +The procedure to provide credentials to access the cloud provider object store varies for different providers. + +* For `AWS S3`, the `credentials` file has to be provided in the `~/.aws` directory. + +* For `GCP Containers`, the service account json file should be provided in the `~/.gcp` as a `service-account-file.json` file. + +* For `Azure Blob storage`, `STORAGE_ACCOUNT` and `STORAGE_KEY` should be made available as environment variables. + +* For `Openstack Swift`, `OS_USERNAME`, `OS_PASSWORD`, `OS_AUTH_URL`, `OS_TENANT_ID` and `OS_DOMAIN_ID` should be made available as environment variables. + +* For `Alicloud OSS`, `ALICLOUD_ENDPOINT`, `ALICLOUD_ACCESS_KEY_ID`, `ALICLOUD_ACCESS_KEY_SECRET` should be made available as environment variables. + +### Taking scheduled snapshot + +`etcd` should already be running. One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period-seconds` flag. The default for the same is 10 seconds. + +etcd-backup-restore has two garbage collection policies to collect existing backups from the cloud bucket. The flag `garbage-collection-policy` is used to indicate the correct garbage collection policy. +1. `Exponential` +1. `LimitBased` + +If using `LimitBased` policy, the `max-backups` flag should be provided to indicate the number of recent backups to persist at each garbage collection cycle. + +```console +$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --max-backups=10 --garbage-collection-policy='LimitBased' +INFO[0000] Validating schedule... +INFO[0000] Job attempt: 1 +INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC +INFO[0000] Successfully opened snapshot reader on etcd +INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 +INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC +INFO[0000] Applied watch on etcd from revision: 00000002 +INFO[0000] No events received to save snapshot. +``` + +The command mentioned above takes hourly snapshots and pushs it to S3 bucket named "etcd-backup". It is configured to keep only last 10 backups in bucket. + +`Exponential` policy stores the snapshots in a condensed manner as mentioned below: +- All full backups and delta backups for the previous hour. +- Latest full snapshot of each previous hour for the day. +- Latest full snapshot of each previous day for 7 days. +- Latest full snapshot of the previous 4 weeks. + +```console +$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --garbage-collection-policy='Exponential' +INFO[0000] Validating schedule... +INFO[0000] Job attempt: 1 +INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC +INFO[0000] Successfully opened snapshot reader on etcd +INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 +INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC +INFO[0000] Applied watch on etcd from revision: 00000002 +INFO[0000] No events received to save snapshot. +``` + +The command mentioned above stores etcd snapshots as per the exponential policy mentioned above. + +### Etcd data directory initialization + +Sub-command `initialize` does the task of data directory validation. If the data directory is found to be corrupt, the controller will restore it from the latest snapshot in the cloud store. It restores the full snapshot first and then incrementally applies the delta snapshots. + +```console +$ ./bin/etcdbrctl initialize --storage-provider="S3" --store-container="etcd-backup" --data-dir="default.etcd" +INFO[0000] Checking for data directory structure validity... +INFO[0000] Checking for data directory files corruption... +INFO[0000] Verifying snap directory... +Verifying Snapfile default.etcd/member/snap/0000000000000001-0000000000000001.snap. +INFO[0000] Verifying WAL directory... +INFO[0000] Verifying DB file... +INFO[0000] Data directory corrupt. Invalid db files: invalid database +INFO[0000] Removing data directory(default.etcd) for snapshot restoration. +INFO[0000] Finding latest snapshot... +INFO[0000] Restoring from latest snapshot: Full-00000000-00040010-1522152360... +2018-03-27 17:38:06.617280 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 +INFO[0000] Successfully restored the etcd data directory. +``` + +### Etcdbrctl server + +With sub-command `server` you can start a http server which exposes an endpoint to initialize etcd over REST interface. The server also keeps on backup schedule thread running to have periodic backups. This is mainly made available to manage an etcd instance running in a Kubernetes cluster. You can deploy the example [helm chart](../../chart/etcd-backup-restore) on a Kubernetes cluster to have an fault resilient etcd instance. diff --git a/doc/metrics.md b/doc/usage/metrics.md similarity index 99% rename from doc/metrics.md rename to doc/usage/metrics.md index a85a12350..b83c2b3f3 100644 --- a/doc/metrics.md +++ b/doc/usage/metrics.md @@ -1,4 +1,4 @@ -# Metrics +# Monitoring etcd-backup-restore uses [Prometheus][prometheus] for metrics reporting. The metrics can be used for real-time monitoring and debugging. It won't persist its metrics; if a member restarts, the metrics will be reset. From f620df130f017e1da281ef13714361cf95bdc6d3 Mon Sep 17 00:00:00 2001 From: Swapnil Mhamane Date: Wed, 31 Jul 2019 23:03:29 +0530 Subject: [PATCH 2/2] Apply suggestions from code review Co-Authored-By: Shreyas Rao <42259948+shreyas-s-rao@users.noreply.github.com> Signed-off-by: Swapnil Mhamane --- .ci/integration_test | 10 +- .ci/unit_test | 2 +- README.md | 8 +- doc/development/local_setup.md | 18 +-- doc/development/testing_and_dependencies.md | 11 +- doc/proposals/design.md | 14 +-- doc/usage/getting_started.md | 121 ++++++++++++++------ 7 files changed, 118 insertions(+), 66 deletions(-) diff --git a/.ci/integration_test b/.ci/integration_test index f899a7747..5cdd25690 100755 --- a/.ci/integration_test +++ b/.ci/integration_test @@ -27,7 +27,7 @@ VCS="github.com" ORGANIZATION="gardener" PROJECT="etcd-backup-restore" REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} -VERSION_FILE="$(readlink -f "${SOURCE_PATH}/VERSION")" +VERSION_FILE="$(readlink -f "${SOURCE_PATH}/VERSION")" VERSION="$(cat "${VERSION_FILE}")" export GOBIN="${SOURCE_PATH}/bin" @@ -45,7 +45,7 @@ TEST_DIR= function setup_test_enviornment() { setup_ginkgo setup_etcd - setup_etcbrctl + setup_etcdbrctl setup_awscli } @@ -69,9 +69,9 @@ function setup_etcd(){ echo "Successfully installed etcd." } -function setup_etcbrctl(){ +function setup_etcdbrctl(){ echo "Installing etcdbrctl..." - go build \ + GO111MODULE=on go build \ -v \ -o ${GOBIN}/etcdbrctl \ -ldflags "-w -X ${REPOSITORY}/pkg/version.Version=${VERSION}" \ @@ -199,4 +199,4 @@ echo "Deleting test enviornment..." cleanup_test_environment echo "Successfully completed all tests." -exit $TEST_RESULT \ No newline at end of file +exit $TEST_RESULT diff --git a/.ci/unit_test b/.ci/unit_test index 33f9e0cf1..3afe97c4c 100755 --- a/.ci/unit_test +++ b/.ci/unit_test @@ -30,7 +30,7 @@ REPOSITORY=${VCS}/${ORGANIZATION}/${PROJECT} cd "${SOURCE_PATH}" # Install Ginkgo (test framework) to be able to execute the tests. -go get -u github.com/onsi/ginkgo/ginkgo +GO111MODULE=off go get github.com/onsi/ginkgo/ginkgo ############################################################################### diff --git a/README.md b/README.md index 8bc2ca93b..b7cf51f4d 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,13 @@ Etcd-backup-restore is collection of components to backup and restore the [etcd] ### Design and Proposals * [Core design](doc/proposals/design.md) -* [Etcd data Validation ](doc/proposals/validation.md) -* [High watch events ingress rate issue](doc.proposals/high_watch_event_ingress_rate.md) +* [Etcd data validation ](doc/proposals/validation.md) +* [High watch events ingress rate issue](doc/proposals/high_watch_event_ingress_rate.md) ### Development -* [Setting up a local development environment](doc.development/local_setup.md) -* [Testing and Dependency Management](doc.development/testing_and_dependencies.md) +* [Setting up a local development environment](doc/development/local_setup.md) +* [Testing and Dependency Management](doc/development/testing_and_dependencies.md) * [Adding support for a new cloud provider](doc/development/new_cp_support.md) diff --git a/doc/development/local_setup.md b/doc/development/local_setup.md index 8caa5cc54..6d139a58e 100644 --- a/doc/development/local_setup.md +++ b/doc/development/local_setup.md @@ -1,20 +1,20 @@ ## Prerequisites -Although the following installation instructions are for Mac OS X, similar alternate commands could be found for any Linux distribution +Although the following installation instructions are for Mac OS X, similar alternate commands can be found for any Linux distribution. ### Installing [Golang](https://golang.org/) environment -Install the latest version of Golang (at least `v1.12` is required). For Mac OS, you could use [Homebrew](https://brew.sh/): +Install the latest version of Golang (at least `v1.12` is required). For Mac OS, you may use [Homebrew](https://brew.sh/): ```sh brew install golang ``` -For other OS, please check [Go installation documentation](https://golang.org/doc/install). +For other OSes, please check [Go installation documentation](https://golang.org/doc/install). Make sure to set your `$GOPATH` environment variable properly (conventionally, it points to `$HOME/go`). -For your convenience, you can add the `bin` directory of the `$GOPATH` to your `$PATH`: `PATH=$PATH:$GOPATH/bin`, but it is not necessarily required. +For your convenience, you can add the `bin` directory of the `$GOPATH` to your `$PATH`: `PATH=$PATH:$GOPATH/bin`, but it is not mandatory. ### [Golint](https://github.com/golang/lint) @@ -45,12 +45,11 @@ brew install git ### Installing `gcloud` SDK (Optional) -In case you have to create a new release or a new hotfix, you have to push the resulting Docker image into a Docker registry. Currently, we are using the Google Container Registry (this could change in the future). Please follow the official [installation instructions from Google](https://cloud.google.com/sdk/downloads). - +In case you have to create a new release or a new hotfix, you have to push the resulting Docker image into a Docker registry. Currently, we use the Google Container Registry (this could change in the future). Please follow the official [installation instructions from Google](https://cloud.google.com/sdk/downloads). ## Build -Currently there are no binary build available, but it is pretty straight forward to build it by following the steps mentioned below. +Currently there are no binary builds available, but it is fairly simple to build it by following the steps mentioned below. * First, you need to create a target folder structure before cloning and building `etcdbrctl`. @@ -65,12 +64,13 @@ Currently there are no binary build available, but it is pretty straight forward make build-local ``` -* Next you can make it available to use as shell command by moving the executable to `/usr/local/bin`. +* Next you can make it available to use as shell command by moving the executable to `/usr/local/bin`, or by optionally including the `bin` directory in your `$PATH` environment variable. You can verify the installation by running following command: + ```console $ etcdbrctl -v INFO[0000] etcd-backup-restore Version: v0.7.0-dev INFO[0000] Git SHA: 38979f0 INFO[0000] Go Version: go1.12 INFO[0000] Go OS/Arch: darwin/amd64 - ``` \ No newline at end of file + ``` diff --git a/doc/development/testing_and_dependencies.md b/doc/development/testing_and_dependencies.md index aca981665..d51cf837e 100644 --- a/doc/development/testing_and_dependencies.md +++ b/doc/development/testing_and_dependencies.md @@ -1,16 +1,13 @@ # Dependency management -We use golang modules to manage golang dependencies. In order to add a new package dependency to the project, you can perform `go get @` or edit the `go.mod` file and append the package along with the version you want to use. +We use go-modules to manage golang dependencies. In order to add a new package dependency to the project, you can perform `go get @` or edit the `go.mod` file and append the package along with the version you want to use. ### Updating dependencies The `Makefile` contains a rule called `revendor` which performs `go mod vendor` and `go mod tidy`. * `go mod vendor` resets the main module's vendor directory to include all packages needed to build and test all the main module's packages. It does not include test code for vendored packages. -It does not include test code for vendored packages. * `go mod tidy` makes sure go.mod matches the source code in the module. -It adds any missing modules necessary to build the current module's -packages and dependencies, and it removes unused modules that -don't provide any relevant packages. +It adds any missing modules necessary to build the current module's packages and dependencies, and it removes unused modules that don't provide any relevant packages. ```sh make revendor @@ -22,10 +19,10 @@ The dependencies are installed into the `vendor` folder which **should be added* # Testing -We have created `make` target `verify` which will internally run different rule like `fmt` for formatting, `lint` for linting check and most importantly `test` which will check the code against predefined unit tests. Although, currently there are not enough test cases written to cover entire code, hence one should check for failure cases manually before raising pull request. We will eventually add the test cases for complete code coverage. +We have created `make` target `verify` which will internally run different rules like `fmt` for formatting, `lint` for linting check and most importantly `test` which will check the code against predefined unit tests. As currently there aren't enough test cases written to cover the entire code, you must check for failure cases manually and include test cases before raising pull request. We will eventually add more test cases for complete code coverage. ```sh make verify ``` -By default, we try to run test in parallel without computing code coverage. To get the code coverage, you will have to set environment variable `COVER` to `true`. This will log the code coverage percentage at the end of test logs. Also, all cover profile files will accumulated under `test/output/coverprofile.out` directory. You can visualize exact code coverage using `make show-coverage`. +By default, we run tests without computing code coverage. To get the code coverage, you can set the environment variable `COVER` to `true`. This will log the code coverage percentage at the end of test logs. Also, all cover profile files will be accumulated under `test/output/coverprofile.out` directory. You can visualize the exact code coverage by running `make show-coverage` after running `make verify` with code coverage enabled. diff --git a/doc/proposals/design.md b/doc/proposals/design.md index bde90fe84..79be75c3e 100644 --- a/doc/proposals/design.md +++ b/doc/proposals/design.md @@ -39,7 +39,7 @@ Main goal of this project to provide a solution to make [etcd] instance backing ## Architecture -![architecture](images/etcd-backup-restore.jpg) +![architecture](../images/etcd-backup-restore.jpg) We will have a StatefulSet for etcd with two containers in it. @@ -50,7 +50,7 @@ We will have a StatefulSet for etcd with two containers in it. ### ETCD Container - Request the sidecar to validate/initialize the data directory. -- The etcd process is started only if the `initialize` request to sidecar returns a success. +- The etcd process is started only if the `initialize` request to sidecar returns a success. ### Sidecar Container @@ -71,12 +71,12 @@ Sidecar container has two components - Probe is required to ensure that etcd is live before backups are triggered. - Schedule the backup operation (probably using cron library) which triggers full snapshot at regular intervals. - Store the snapshot in the configured cloud object store. - + **Init container is not used for the validation/restoration of etcd data directory. The rationale behind the decision was to avoid baking in pod restart logic in sidecar container in the event etcd process had died. In case etcd container died, init-container had to be run before etcd container was run to ensure that data directory was valid. This required the pod to be restarted. With the current design, the sidecar handles the data directory validation/restoration and periodic backups. Pod restart is not required.** ## Workflow -![sequence-diagram](images/etcd-backup-restore-sequence-diagram.jpg) +![sequence-diagram](../images/etcd-backup-restore-sequence-diagram.jpg) ### Etcd container @@ -92,15 +92,15 @@ Sidecar container has two components 1. In case of data directory corruption, restore data directory from the latest cloud snapshot. Return success. 2. In case data directory is valid, return success. 3. In all other cases, return failure. -3. Once the `initialize` request returns success, etcd process can be expected to start up in some time. The prober would then receive a successful probe of etcd's liveliness. -4. On successful probe, start taking periodic backup of etcd and store the snapshot to the cloud object store. Stop prober. +3. Once the `initialize` request returns success, etcd process can be expected to start up in some time. The prober would then receive a successful probe of etcd's liveliness. +4. On successful probe, start taking periodic backup of etcd and store the snapshot to the cloud object store. Stop prober. - In case of a failure to take a backup, exit with error. (Container restarts) ### Handling of Different Scenarios/Issues - DNS latency: Should not matter for single member Etcd cluster. - Etcd upgrade and downgrade for K8s compatibility: Should not be issue for v3.* series released so far. Simply restart pod. No data format change. -- Iaas issue: Issues like unreachable object store, will be taken care by init container and backup container. Both container will keep retrying to reach out object store with exponential timeouts. +- IaaS issue: Issues like unreachable object store, will be taken care by init container and backup container. Both container will keep retrying to reach out object store with exponential timeouts. - Corrupt backup: StatefulSet go in restart loop, and human operator will with customers concern delete the last corrupt backup from object store manually. So that, in next iteration it will recover from previous non-corrupt backup. ## Outlook diff --git a/doc/usage/getting_started.md b/doc/usage/getting_started.md index cead71c71..c6fcf44b9 100644 --- a/doc/usage/getting_started.md +++ b/doc/usage/getting_started.md @@ -1,10 +1,10 @@ # Getting started -Currently we don't publish the binary build with the release, but it is pretty straight forward to build it by following the steps mentioned [here](../development/local_setup.md#build). But we do publish the docker image with each release, please check the [release page](https://github.com/gardener/etcd-backup-restore/releases) for the same. +Currently we don't publish the binary build with the release, but it is pretty straight forward to build it by following the steps mentioned [here](../development/local_setup.md#build). But we do publish the docker image with each release, please check the [release page](https://github.com/gardener/etcd-backup-restore/releases) for the same. Currently, release docker images are pushed to `eu.gcr.io/gardener-project/gardener/etcdbrctl` to container registry. ## Usage -You can follow the `help` flag on `etcdbrctl` command and its sub-commands to know the usage details. Some of the common use cases are mentioned below. Although examples below uses AWS S3 as storage provider, we have added support for AWS, GCS, Azure, Openstack swift and Alicloud OSS object store. It also supports local disk as storage provider. +You can follow the `help` flag on `etcdbrctl` command and its sub-commands to know the usage details. Some of the common use cases are mentioned below. Although examples below use AWS S3 as storage provider, etcd-backup-restore supports AWS, GCS, Azure, Openstack swift and Alicloud OSS object store. It also supports local disk as storage provider for development purposes, but it is not recommended to use this in a production environment. ### Cloud Provider Credentials @@ -22,24 +22,57 @@ The procedure to provide credentials to access the cloud provider object store v ### Taking scheduled snapshot -`etcd` should already be running. One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period-seconds` flag. The default for the same is 10 seconds. +Sub-command `snapshot` takes scheduled backups, or `snapshots` of a running `etcd` cluster, which are pushed to one of the storage providers specified above (please note that `etcd` should already be running). One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period-seconds` flag. The default for the same is 10 seconds. + +etcd-backup-restore has two garbage collection policies to clean up existing backups from the cloud bucket. The flag `garbage-collection-policy` is used to indicate the desired garbage collection policy. -etcd-backup-restore has two garbage collection policies to collect existing backups from the cloud bucket. The flag `garbage-collection-policy` is used to indicate the correct garbage collection policy. 1. `Exponential` 1. `LimitBased` -If using `LimitBased` policy, the `max-backups` flag should be provided to indicate the number of recent backups to persist at each garbage collection cycle. +If using `LimitBased` policy, the `max-backups` flag should be provided to indicate the number of recent-most backups to persist at each garbage collection cycle. ```console -$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --max-backups=10 --garbage-collection-policy='LimitBased' +$ ./bin/etcdbrctl snapshot \ +--storage-provider="S3" \ +--etcd-endpoints http://localhost:2379 \ +--schedule "*/1 * * * *" \ +--store-container="etcd-backup" \ +--delta-snapshot-period-seconds=10 \ +--max-backups=10 \ +--garbage-collection-policy='LimitBased' + +INFO[0000] etcd-backup-restore Version: 0.7.0-dev +INFO[0000] Git SHA: c03f75c +INFO[0000] Go Version: go1.12.7 +INFO[0000] Go OS/Arch: darwin/amd64 INFO[0000] Validating schedule... -INFO[0000] Job attempt: 1 -INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC +INFO[0000] Defragmentation period :72 hours +INFO[0000] Taking scheduled snapshot for time: 2019-08-05 21:41:34.303439 +0530 IST INFO[0000] Successfully opened snapshot reader on etcd -INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 -INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC -INFO[0000] Applied watch on etcd from revision: 00000002 -INFO[0000] No events received to save snapshot. +INFO[0001] Successfully initiated the multipart upload with upload ID : xhDeLNQsp9HAExmU1O4C3mCriUViVIRrrlPzdJ_.f4dtL046pNekEz54UD9GLYYOLjQUy.ZLZBLp4WeyNnFndDbvDZwhhCjAtwZQdqEbGw5.0HnX8fiP9Vvqk3_2j_Cf +INFO[0001] Uploading snapshot of size: 22028320, chunkSize: 5242880, noOfChunks: 5 +INFO[0001] Triggered chunk upload for all chunks, total: 5 +INFO[0001] No of Chunks:= 5 +INFO[0001] Uploading chunk with id: 2, offset: 5242880, attempt: 0 +INFO[0001] Uploading chunk with id: 4, offset: 15728640, attempt: 0 +INFO[0001] Uploading chunk with id: 5, offset: 20971520, attempt: 0 +INFO[0001] Uploading chunk with id: 1, offset: 0, attempt: 0 +INFO[0001] Uploading chunk with id: 3, offset: 10485760, attempt: 0 +INFO[0008] Received chunk result for id: 5, offset: 20971520 +INFO[0012] Received chunk result for id: 3, offset: 10485760 +INFO[0014] Received chunk result for id: 4, offset: 15728640 +INFO[0015] Received chunk result for id: 2, offset: 5242880 +INFO[0018] Received chunk result for id: 1, offset: 0 +INFO[0018] Received successful chunk result for all chunks. Stopping workers. +INFO[0018] Finishing the multipart upload with upload ID : xhDeLNQsp9HAExmU1O4C3mCriUViVIRrrlPzdJ_.f4dtL046pNekEz54UD9GLYYOLjQUy.ZLZBLp4WeyNnFndDbvDZwhhCjAtwZQdqEbGw5.0HnX8fiP9Vvqk3_2j_Cf +INFO[0018] Total time to save snapshot: 17.934609 seconds. +INFO[0018] Successfully saved full snapshot at: Backup-1565021494/Full-00000000-00009002-1565021494 +INFO[0018] Applied watch on etcd from revision: 9003 +INFO[0018] Stopping full snapshot... +INFO[0018] Resetting full snapshot to run after 7.742179s +INFO[0018] Will take next full snapshot at time: 2019-08-05 21:42:00 +0530 IST +INFO[0018] Taking delta snapshot for time: 2019-08-05 21:41:52.258109 +0530 IST +INFO[0018] No events received to save snapshot. Skipping delta snapshot. ``` The command mentioned above takes hourly snapshots and pushs it to S3 bucket named "etcd-backup". It is configured to keep only last 10 backups in bucket. @@ -51,15 +84,36 @@ The command mentioned above takes hourly snapshots and pushs it to S3 bucket nam - Latest full snapshot of the previous 4 weeks. ```console -$ ./bin/etcdbrctl snapshot --storage-provider="S3" --etcd-endpoints http://localhost:2379 --schedule "*/1 * * * *" --store-container="etcd-backup" --delta-snapshot-period-seconds=10 --garbage-collection-policy='Exponential' +$ ./bin/etcdbrctl snapshot \ +--storage-provider="S3" \ +--endpoints http://localhost:2379 \ +--schedule "*/1 * * * *" \ +--store-container="etcd-backup" \ +--delta-snapshot-period-seconds=10 \ +--garbage-collection-policy='Exponential' + +INFO[0000] etcd-backup-restore Version: 0.7.0-dev +INFO[0000] Git SHA: c03f75c +INFO[0000] Go Version: go1.12.7 +INFO[0000] Go OS/Arch: darwin/amd64 INFO[0000] Validating schedule... -INFO[0000] Job attempt: 1 -INFO[0000] Taking initial full snapshot at time: 2018-07-09 12:09:04.3567024 +0000 UTC -INFO[0000] Successfully opened snapshot reader on etcd -INFO[0000] Successfully saved full snapshot at: Backup-1531138145/Full-00000000-00000001-1531138145 -INFO[0000] Will take next full snapshot at time: 2018-07-09 12:10:00 +0000 UTC -INFO[0000] Applied watch on etcd from revision: 00000002 -INFO[0000] No events received to save snapshot. +INFO[0001] Taking scheduled snapshot for time: 2019-08-05 21:50:07.390127 +0530 IST +INFO[0001] Defragmentation period :72 hours +INFO[0001] There are no updates since last snapshot, skipping full snapshot. +INFO[0001] Applied watch on etcd from revision: 9003 +INFO[0001] Stopping full snapshot... +INFO[0001] Resetting full snapshot to run after 52.597795s +INFO[0001] Will take next full snapshot at time: 2019-08-05 21:51:00 +0530 IST +INFO[0001] Taking delta snapshot for time: 2019-08-05 21:50:07.402289 +0530 IST +INFO[0001] No events received to save snapshot. Skipping delta snapshot. +INFO[0001] Stopping delta snapshot... +INFO[0001] Resetting delta snapshot to run after 10 secs. +INFO[0011] Taking delta snapshot for time: 2019-08-05 21:50:17.403706 +0530 IST +INFO[0011] No events received to save snapshot. Skipping delta snapshot. +INFO[0011] Stopping delta snapshot... +INFO[0011] Resetting delta snapshot to run after 10 secs. +INFO[0021] Taking delta snapshot for time: 2019-08-05 21:50:27.406208 +0530 IST +INFO[0021] No events received to save snapshot. Skipping delta snapshot. ``` The command mentioned above stores etcd snapshots as per the exponential policy mentioned above. @@ -69,21 +123,22 @@ The command mentioned above stores etcd snapshots as per the exponential policy Sub-command `initialize` does the task of data directory validation. If the data directory is found to be corrupt, the controller will restore it from the latest snapshot in the cloud store. It restores the full snapshot first and then incrementally applies the delta snapshots. ```console -$ ./bin/etcdbrctl initialize --storage-provider="S3" --store-container="etcd-backup" --data-dir="default.etcd" +$ ./bin/etcdbrctl initialize \ +--storage-provider="S3" \ +--store-container="etcd-backup" \ +--data-dir="default.etcd" INFO[0000] Checking for data directory structure validity... -INFO[0000] Checking for data directory files corruption... -INFO[0000] Verifying snap directory... -Verifying Snapfile default.etcd/member/snap/0000000000000001-0000000000000001.snap. -INFO[0000] Verifying WAL directory... -INFO[0000] Verifying DB file... -INFO[0000] Data directory corrupt. Invalid db files: invalid database -INFO[0000] Removing data directory(default.etcd) for snapshot restoration. -INFO[0000] Finding latest snapshot... -INFO[0000] Restoring from latest snapshot: Full-00000000-00040010-1522152360... -2018-03-27 17:38:06.617280 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 -INFO[0000] Successfully restored the etcd data directory. +INFO[0000] Checking for revision consistency... +INFO[0000] Etcd revision inconsistent with latest snapshot revision: current etcd revision (770) is less than latest snapshot revision (9002): possible data loss +INFO[0000] Finding latest set of snapshot to recover from... +INFO[0001] Removing data directory(default.etcd.part) for snapshot restoration. +INFO[0001] Restoring from base snapshot: Backup-1565021494/Full-00000000-00009002-1565021494 +2019-08-05 21:45:49.646232 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 +INFO[0008] No delta snapshots present over base snapshot. +INFO[0008] Removing data directory(default.etcd) for snapshot restoration. +INFO[0008] Successfully restored the etcd data directory. ``` ### Etcdbrctl server -With sub-command `server` you can start a http server which exposes an endpoint to initialize etcd over REST interface. The server also keeps on backup schedule thread running to have periodic backups. This is mainly made available to manage an etcd instance running in a Kubernetes cluster. You can deploy the example [helm chart](../../chart/etcd-backup-restore) on a Kubernetes cluster to have an fault resilient etcd instance. +With sub-command `server` you can start a http server which exposes an endpoint to initialize etcd over REST interface. The server also keeps the backup schedule thread running to keep taking periodic backups. This is mainly made available to manage an etcd instance running in a Kubernetes cluster. You can deploy the example [helm chart](../../chart/etcd-backup-restore) on a Kubernetes cluster to have a fault-resilient, self-healing etcd cluster.