diff --git a/backend/coins/btc/db/transactionsdb/transactionsdb.go b/backend/coins/btc/db/transactionsdb/transactionsdb.go index 9ecfd5e2d2..90150b1e9e 100644 --- a/backend/coins/btc/db/transactionsdb/transactionsdb.go +++ b/backend/coins/btc/db/transactionsdb/transactionsdb.go @@ -22,12 +22,12 @@ import ( "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" - bbolt "github.com/coreos/bbolt" "github.com/digitalbitbox/bitbox-wallet-app/backend/coins/btc/blockchain" "github.com/digitalbitbox/bitbox-wallet-app/backend/coins/btc/transactions" "github.com/digitalbitbox/bitbox-wallet-app/backend/coins/btc/types" "github.com/digitalbitbox/bitbox-wallet-app/backend/coins/btc/util" "github.com/digitalbitbox/bitbox-wallet-app/util/errp" + "go.etcd.io/bbolt" ) const ( diff --git a/backend/coins/eth/db/db.go b/backend/coins/eth/db/db.go index a20d9d0434..9f7b2bf966 100644 --- a/backend/coins/eth/db/db.go +++ b/backend/coins/eth/db/db.go @@ -19,10 +19,10 @@ import ( "encoding/json" "sort" - bbolt "github.com/coreos/bbolt" "github.com/digitalbitbox/bitbox-wallet-app/backend/coins/eth/types" "github.com/digitalbitbox/bitbox-wallet-app/util/errp" "github.com/digitalbitbox/bitbox-wallet-app/util/jsonp" + "go.etcd.io/bbolt" ) const ( diff --git a/backend/notifier.go b/backend/notifier.go index 37d6c1c1cd..76d1206e3c 100644 --- a/backend/notifier.go +++ b/backend/notifier.go @@ -17,9 +17,9 @@ package backend import ( "fmt" - bbolt "github.com/coreos/bbolt" "github.com/digitalbitbox/bitbox-wallet-app/backend/accounts" "github.com/digitalbitbox/bitbox-wallet-app/util/errp" + "go.etcd.io/bbolt" ) const ( diff --git a/backend/rates/bbolt_bench_test.go b/backend/rates/bbolt_bench_test.go index 7a9d039b9b..80f3a44f4f 100644 --- a/backend/rates/bbolt_bench_test.go +++ b/backend/rates/bbolt_bench_test.go @@ -10,10 +10,10 @@ import ( "testing" "time" - bbolt "github.com/coreos/bbolt" "github.com/digitalbitbox/bitbox-wallet-app/util/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.etcd.io/bbolt" ) // These benchmarks are independent of the actual rates package code. diff --git a/backend/rates/db.go b/backend/rates/db.go index a81a928a32..243824a818 100644 --- a/backend/rates/db.go +++ b/backend/rates/db.go @@ -6,7 +6,7 @@ import ( "path/filepath" "time" - bbolt "github.com/coreos/bbolt" + "go.etcd.io/bbolt" ) func openRatesDB(dir string) (*bbolt.DB, error) { diff --git a/backend/rates/rates.go b/backend/rates/rates.go index 2332d76a8e..3515d63956 100644 --- a/backend/rates/rates.go +++ b/backend/rates/rates.go @@ -28,13 +28,13 @@ import ( "sync" "time" - bbolt "github.com/coreos/bbolt" "github.com/digitalbitbox/bitbox-wallet-app/util/errp" "github.com/digitalbitbox/bitbox-wallet-app/util/logging" "github.com/digitalbitbox/bitbox-wallet-app/util/observable" "github.com/digitalbitbox/bitbox-wallet-app/util/observable/action" "github.com/digitalbitbox/bitbox-wallet-app/util/ratelimit" "github.com/sirupsen/logrus" + "go.etcd.io/bbolt" ) const ( diff --git a/go.mod b/go.mod index a167f63341..2ff8a4f27a 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ require ( github.com/btcsuite/btcd v0.22.0-beta github.com/btcsuite/btcutil v1.0.3-0.20210929233259-9cdf59f60c51 github.com/cespare/cp v1.1.1 // indirect - github.com/coreos/bbolt v1.3.0 github.com/deckarep/golang-set v1.7.1 // indirect github.com/digitalbitbox/bitbox02-api-go v0.0.0-20211215112420-8cbc2cc44567 github.com/ethereum/go-ethereum v1.10.13 @@ -24,6 +23,7 @@ require ( github.com/stretchr/objx v0.2.0 // indirect github.com/stretchr/testify v1.7.0 github.com/tyler-smith/go-bip39 v1.0.2 + go.etcd.io/bbolt v1.3.6 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2 golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d ) diff --git a/go.sum b/go.sum index 5a81f92da6..caa6da6dae 100644 --- a/go.sum +++ b/go.sum @@ -59,7 +59,6 @@ github.com/aws/smithy-go v1.1.0/go.mod h1:EzMw8dbp/YJL4A5/sbhGddag+NPT7q084agLbB github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c= -github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/btcsuite/btcd v0.20.1-beta/go.mod h1:wVuoA8VJLEcwgqHBwHmzLRazpKxTv13Px/pDuV7OomQ= github.com/btcsuite/btcd v0.22.0-beta h1:LTDpDKUM5EeOFBPM8IXpinEcmZ6FWfNZbE3lfrfdnWo= @@ -95,8 +94,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudflare/cloudflare-go v0.14.0/go.mod h1:EnwdgGMaFOruiPZRFSgn+TsQ3hQ7C/YWzIGLeu5c304= github.com/consensys/bavard v0.1.8-0.20210406032232-f3452dc9b572/go.mod h1:Bpd0/3mZuaj6Sj+PqrmIquiOKy397AKGThQPaGzNXAQ= github.com/consensys/gnark-crypto v0.4.1-0.20210426202927-39ac3d4b3f1f/go.mod h1:815PAHg3wvysy0SyIqanF8gZ0Y1wjk/hrDHD/iT88+Q= -github.com/coreos/bbolt v1.3.0 h1:HIgH5xUWXT914HCI671AxuTTqjj64UOFr7pHn48LUTI= -github.com/coreos/bbolt v1.3.0/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4= @@ -406,6 +403,8 @@ github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+ github.com/willf/bitset v1.1.3/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/xlab/treeprint v0.0.0-20180616005107-d6fb6747feb6/go.mod h1:ce1O1j6UtZfjr22oyGxGLbauSBp2YVXpARAosm7dHBg= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -523,6 +522,7 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/coreos/bbolt/Makefile b/vendor/github.com/coreos/bbolt/Makefile deleted file mode 100644 index e035e63adc..0000000000 --- a/vendor/github.com/coreos/bbolt/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -BRANCH=`git rev-parse --abbrev-ref HEAD` -COMMIT=`git rev-parse --short HEAD` -GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" - -default: build - -race: - @go test -v -race -test.run="TestSimulate_(100op|1000op)" - -# go get github.com/kisielk/errcheck -errcheck: - @errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt - -test: - @go test -v -cover . - @go test -v ./cmd/bolt - -.PHONY: fmt test diff --git a/vendor/github.com/coreos/bbolt/appveyor.yml b/vendor/github.com/coreos/bbolt/appveyor.yml deleted file mode 100644 index 6e26e941d6..0000000000 --- a/vendor/github.com/coreos/bbolt/appveyor.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: "{build}" - -os: Windows Server 2012 R2 - -clone_folder: c:\gopath\src\github.com\boltdb\bolt - -environment: - GOPATH: c:\gopath - -install: - - echo %PATH% - - echo %GOPATH% - - go version - - go env - - go get -v -t ./... - -build_script: - - go test -v ./... diff --git a/vendor/github.com/coreos/bbolt/freelist.go b/vendor/github.com/coreos/bbolt/freelist.go deleted file mode 100644 index 1b7ba91b2a..0000000000 --- a/vendor/github.com/coreos/bbolt/freelist.go +++ /dev/null @@ -1,248 +0,0 @@ -package bolt - -import ( - "fmt" - "sort" - "unsafe" -) - -// freelist represents a list of all pages that are available for allocation. -// It also tracks pages that have been freed but are still in use by open transactions. -type freelist struct { - ids []pgid // all free and available free page ids. - pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. - cache map[pgid]bool // fast lookup of all free and pending page ids. -} - -// newFreelist returns an empty, initialized freelist. -func newFreelist() *freelist { - return &freelist{ - pending: make(map[txid][]pgid), - cache: make(map[pgid]bool), - } -} - -// size returns the size of the page after serialization. -func (f *freelist) size() int { - return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * f.count()) -} - -// count returns count of pages on the freelist -func (f *freelist) count() int { - return f.free_count() + f.pending_count() -} - -// free_count returns count of free pages -func (f *freelist) free_count() int { - return len(f.ids) -} - -// pending_count returns count of pending pages -func (f *freelist) pending_count() int { - var count int - for _, list := range f.pending { - count += len(list) - } - return count -} - -// all returns a list of all free ids and all pending ids in one sorted list. -func (f *freelist) all() []pgid { - m := make(pgids, 0) - - for _, list := range f.pending { - m = append(m, list...) - } - - sort.Sort(m) - return pgids(f.ids).merge(m) -} - -// allocate returns the starting page id of a contiguous list of pages of a given size. -// If a contiguous block cannot be found then 0 is returned. -func (f *freelist) allocate(n int) pgid { - if len(f.ids) == 0 { - return 0 - } - - var initial, previd pgid - for i, id := range f.ids { - if id <= 1 { - panic(fmt.Sprintf("invalid page allocation: %d", id)) - } - - // Reset initial page if this is not contiguous. - if previd == 0 || id-previd != 1 { - initial = id - } - - // If we found a contiguous block then remove it and return it. - if (id-initial)+1 == pgid(n) { - // If we're allocating off the beginning then take the fast path - // and just adjust the existing slice. This will use extra memory - // temporarily but the append() in free() will realloc the slice - // as is necessary. - if (i + 1) == n { - f.ids = f.ids[i+1:] - } else { - copy(f.ids[i-n+1:], f.ids[i+1:]) - f.ids = f.ids[:len(f.ids)-n] - } - - // Remove from the free cache. - for i := pgid(0); i < pgid(n); i++ { - delete(f.cache, initial+i) - } - - return initial - } - - previd = id - } - return 0 -} - -// free releases a page and its overflow for a given transaction id. -// If the page is already free then a panic will occur. -func (f *freelist) free(txid txid, p *page) { - if p.id <= 1 { - panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) - } - - // Free page and all its overflow pages. - var ids = f.pending[txid] - for id := p.id; id <= p.id+pgid(p.overflow); id++ { - // Verify that page is not already free. - if f.cache[id] { - panic(fmt.Sprintf("page %d already freed", id)) - } - - // Add to the freelist and cache. - ids = append(ids, id) - f.cache[id] = true - } - f.pending[txid] = ids -} - -// release moves all page ids for a transaction id (or older) to the freelist. -func (f *freelist) release(txid txid) { - m := make(pgids, 0) - for tid, ids := range f.pending { - if tid <= txid { - // Move transaction's pending pages to the available freelist. - // Don't remove from the cache since the page is still free. - m = append(m, ids...) - delete(f.pending, tid) - } - } - sort.Sort(m) - f.ids = pgids(f.ids).merge(m) -} - -// rollback removes the pages from a given pending tx. -func (f *freelist) rollback(txid txid) { - // Remove page ids from cache. - for _, id := range f.pending[txid] { - delete(f.cache, id) - } - - // Remove pages from pending list. - delete(f.pending, txid) -} - -// freed returns whether a given page is in the free list. -func (f *freelist) freed(pgid pgid) bool { - return f.cache[pgid] -} - -// read initializes the freelist from a freelist page. -func (f *freelist) read(p *page) { - // If the page.count is at the max uint16 value (64k) then it's considered - // an overflow and the size of the freelist is stored as the first element. - idx, count := 0, int(p.count) - if count == 0xFFFF { - idx = 1 - count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0]) - } - - // Copy the list of page ids from the freelist. - if count == 0 { - f.ids = nil - } else { - ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count] - f.ids = make([]pgid, len(ids)) - copy(f.ids, ids) - - // Make sure they're sorted. - sort.Sort(pgids(f.ids)) - } - - // Rebuild the page cache. - f.reindex() -} - -// write writes the page ids onto a freelist page. All free and pending ids are -// saved to disk since in the event of a program crash, all pending ids will -// become free. -func (f *freelist) write(p *page) error { - // Combine the old free pgids and pgids waiting on an open transaction. - ids := f.all() - - // Update the header flag. - p.flags |= freelistPageFlag - - // The page.count can only hold up to 64k elements so if we overflow that - // number then we handle it by putting the size in the first element. - if len(ids) == 0 { - p.count = uint16(len(ids)) - } else if len(ids) < 0xFFFF { - p.count = uint16(len(ids)) - copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:], ids) - } else { - p.count = 0xFFFF - ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(len(ids)) - copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:], ids) - } - - return nil -} - -// reload reads the freelist from a page and filters out pending items. -func (f *freelist) reload(p *page) { - f.read(p) - - // Build a cache of only pending pages. - pcache := make(map[pgid]bool) - for _, pendingIDs := range f.pending { - for _, pendingID := range pendingIDs { - pcache[pendingID] = true - } - } - - // Check each page in the freelist and build a new available freelist - // with any pages not in the pending lists. - var a []pgid - for _, id := range f.ids { - if !pcache[id] { - a = append(a, id) - } - } - f.ids = a - - // Once the available list is rebuilt then rebuild the free cache so that - // it includes the available and pending free pages. - f.reindex() -} - -// reindex rebuilds the free cache based on available and pending free lists. -func (f *freelist) reindex() { - f.cache = make(map[pgid]bool) - for _, id := range f.ids { - f.cache[id] = true - } - for _, pendingIDs := range f.pending { - for _, pendingID := range pendingIDs { - f.cache[pendingID] = true - } - } -} diff --git a/vendor/github.com/coreos/bbolt/.gitignore b/vendor/go.etcd.io/bbolt/.gitignore similarity index 53% rename from vendor/github.com/coreos/bbolt/.gitignore rename to vendor/go.etcd.io/bbolt/.gitignore index c7bd2b7a5b..18312f0043 100644 --- a/vendor/github.com/coreos/bbolt/.gitignore +++ b/vendor/go.etcd.io/bbolt/.gitignore @@ -2,3 +2,6 @@ *.test *.swp /bin/ +cover.out +/.idea +*.iml diff --git a/vendor/go.etcd.io/bbolt/.travis.yml b/vendor/go.etcd.io/bbolt/.travis.yml new file mode 100644 index 0000000000..452601e49d --- /dev/null +++ b/vendor/go.etcd.io/bbolt/.travis.yml @@ -0,0 +1,18 @@ +language: go +go_import_path: go.etcd.io/bbolt + +sudo: false + +go: +- 1.15 + +before_install: +- go get -v golang.org/x/sys/unix +- go get -v honnef.co/go/tools/... +- go get -v github.com/kisielk/errcheck + +script: +- make fmt +- make test +- make race +# - make errcheck diff --git a/vendor/github.com/coreos/bbolt/LICENSE b/vendor/go.etcd.io/bbolt/LICENSE similarity index 100% rename from vendor/github.com/coreos/bbolt/LICENSE rename to vendor/go.etcd.io/bbolt/LICENSE diff --git a/vendor/go.etcd.io/bbolt/Makefile b/vendor/go.etcd.io/bbolt/Makefile new file mode 100644 index 0000000000..21ecf48f61 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/Makefile @@ -0,0 +1,36 @@ +BRANCH=`git rev-parse --abbrev-ref HEAD` +COMMIT=`git rev-parse --short HEAD` +GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" + +race: + @TEST_FREELIST_TYPE=hashmap go test -v -race -test.run="TestSimulate_(100op|1000op)" + @echo "array freelist test" + @TEST_FREELIST_TYPE=array go test -v -race -test.run="TestSimulate_(100op|1000op)" + +fmt: + !(gofmt -l -s -d $(shell find . -name \*.go) | grep '[a-z]') + +# go get honnef.co/go/tools/simple +gosimple: + gosimple ./... + +# go get honnef.co/go/tools/unused +unused: + unused ./... + +# go get github.com/kisielk/errcheck +errcheck: + @errcheck -ignorepkg=bytes -ignore=os:Remove go.etcd.io/bbolt + +test: + TEST_FREELIST_TYPE=hashmap go test -timeout 20m -v -coverprofile cover.out -covermode atomic + # Note: gets "program not an importable package" in out of path builds + TEST_FREELIST_TYPE=hashmap go test -v ./cmd/bbolt + + @echo "array freelist test" + + @TEST_FREELIST_TYPE=array go test -timeout 20m -v -coverprofile cover.out -covermode atomic + # Note: gets "program not an importable package" in out of path builds + @TEST_FREELIST_TYPE=array go test -v ./cmd/bbolt + +.PHONY: race fmt errcheck test gosimple unused diff --git a/vendor/github.com/coreos/bbolt/README.md b/vendor/go.etcd.io/bbolt/README.md similarity index 79% rename from vendor/github.com/coreos/bbolt/README.md rename to vendor/go.etcd.io/bbolt/README.md index 8523e33773..f1b4a7b2bf 100644 --- a/vendor/github.com/coreos/bbolt/README.md +++ b/vendor/go.etcd.io/bbolt/README.md @@ -1,5 +1,18 @@ -Bolt [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.svg?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.svg)](https://godoc.org/github.com/boltdb/bolt) ![Version](https://img.shields.io/badge/version-1.2.1-green.svg) -==== +bbolt +===== + +[![Go Report Card](https://goreportcard.com/badge/github.com/etcd-io/bbolt?style=flat-square)](https://goreportcard.com/report/github.com/etcd-io/bbolt) +[![Coverage](https://codecov.io/gh/etcd-io/bbolt/branch/master/graph/badge.svg)](https://codecov.io/gh/etcd-io/bbolt) +[![Build Status Travis](https://img.shields.io/travis/etcd-io/bboltlabs.svg?style=flat-square&&branch=master)](https://travis-ci.com/etcd-io/bbolt) +[![Godoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](https://godoc.org/github.com/etcd-io/bbolt) +[![Releases](https://img.shields.io/github/release/etcd-io/bbolt/all.svg?style=flat-square)](https://github.com/etcd-io/bbolt/releases) +[![LICENSE](https://img.shields.io/github/license/etcd-io/bbolt.svg?style=flat-square)](https://github.com/etcd-io/bbolt/blob/master/LICENSE) + +bbolt is a fork of [Ben Johnson's][gh_ben] [Bolt][bolt] key/value +store. The purpose of this fork is to provide the Go community with an active +maintenance and development target for Bolt; the goal is improved reliability +and stability. bbolt includes bug fixes, performance enhancements, and features +not found in Bolt while preserving backwards compatibility with the Bolt API. Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] [LMDB project][lmdb]. The goal of the project is to provide a simple, @@ -10,47 +23,55 @@ Since Bolt is meant to be used as such a low-level piece of functionality, simplicity is key. The API will be small and only focus on getting values and setting values. That's it. +[gh_ben]: https://github.com/benbjohnson +[bolt]: https://github.com/boltdb/bolt [hyc_symas]: https://twitter.com/hyc_symas [lmdb]: http://symas.com/mdb/ ## Project Status -Bolt is stable and the API is fixed. Full unit test coverage and randomized -black box testing are used to ensure database consistency and thread safety. -Bolt is currently in high-load production environments serving databases as -large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed -services every day. +Bolt is stable, the API is fixed, and the file format is fixed. Full unit +test coverage and randomized black box testing are used to ensure database +consistency and thread safety. Bolt is currently used in high-load production +environments serving databases as large as 1TB. Many companies such as +Shopify and Heroku use Bolt-backed services every day. + +## Project versioning + +bbolt uses [semantic versioning](http://semver.org). +API should not change between patch and minor releases. +New minor versions may add additional features to the API. ## Table of Contents -- [Getting Started](#getting-started) - - [Installing](#installing) - - [Opening a database](#opening-a-database) - - [Transactions](#transactions) - - [Read-write transactions](#read-write-transactions) - - [Read-only transactions](#read-only-transactions) - - [Batch read-write transactions](#batch-read-write-transactions) - - [Managing transactions manually](#managing-transactions-manually) - - [Using buckets](#using-buckets) - - [Using key/value pairs](#using-keyvalue-pairs) - - [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket) - - [Iterating over keys](#iterating-over-keys) - - [Prefix scans](#prefix-scans) - - [Range scans](#range-scans) - - [ForEach()](#foreach) - - [Nested buckets](#nested-buckets) - - [Database backups](#database-backups) - - [Statistics](#statistics) - - [Read-Only Mode](#read-only-mode) - - [Mobile Use (iOS/Android)](#mobile-use-iosandroid) -- [Resources](#resources) -- [Comparison with other databases](#comparison-with-other-databases) - - [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases) - - [LevelDB, RocksDB](#leveldb-rocksdb) - - [LMDB](#lmdb) -- [Caveats & Limitations](#caveats--limitations) -- [Reading the Source](#reading-the-source) -- [Other Projects Using Bolt](#other-projects-using-bolt) + - [Getting Started](#getting-started) + - [Installing](#installing) + - [Opening a database](#opening-a-database) + - [Transactions](#transactions) + - [Read-write transactions](#read-write-transactions) + - [Read-only transactions](#read-only-transactions) + - [Batch read-write transactions](#batch-read-write-transactions) + - [Managing transactions manually](#managing-transactions-manually) + - [Using buckets](#using-buckets) + - [Using key/value pairs](#using-keyvalue-pairs) + - [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket) + - [Iterating over keys](#iterating-over-keys) + - [Prefix scans](#prefix-scans) + - [Range scans](#range-scans) + - [ForEach()](#foreach) + - [Nested buckets](#nested-buckets) + - [Database backups](#database-backups) + - [Statistics](#statistics) + - [Read-Only Mode](#read-only-mode) + - [Mobile Use (iOS/Android)](#mobile-use-iosandroid) + - [Resources](#resources) + - [Comparison with other databases](#comparison-with-other-databases) + - [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases) + - [LevelDB, RocksDB](#leveldb-rocksdb) + - [LMDB](#lmdb) + - [Caveats & Limitations](#caveats--limitations) + - [Reading the Source](#reading-the-source) + - [Other Projects Using Bolt](#other-projects-using-bolt) ## Getting Started @@ -59,13 +80,28 @@ services every day. To start using Bolt, install Go and run `go get`: ```sh -$ go get github.com/boltdb/bolt/... +$ go get go.etcd.io/bbolt/... ``` This will retrieve the library and install the `bolt` command line utility into your `$GOBIN` path. +### Importing bbolt + +To use bbolt as an embedded key-value store, import as: + +```go +import bolt "go.etcd.io/bbolt" + +db, err := bolt.Open(path, 0666, nil) +if err != nil { + return err +} +defer db.Close() +``` + + ### Opening a database The top-level object in Bolt is a `DB`. It is represented as a single file on @@ -79,7 +115,7 @@ package main import ( "log" - "github.com/boltdb/bolt" + bolt "go.etcd.io/bbolt" ) func main() { @@ -116,11 +152,12 @@ are not thread safe. To work with data in multiple goroutines you must start a transaction for each one or use locking to ensure only one goroutine accesses a transaction at a time. Creating transaction from the `DB` is thread safe. -Read-only transactions and read-write transactions should not depend on one -another and generally shouldn't be opened simultaneously in the same goroutine. -This can cause a deadlock as the read-write transaction needs to periodically -re-map the data file but it cannot do so while a read-only transaction is open. - +Transactions should not depend on one another and generally shouldn't be opened +simultaneously in the same goroutine. This can cause a deadlock as the read-write +transaction needs to periodically re-map the data file but it cannot do so while +any read-only transaction is open. Even a nested read-only transaction can cause +a deadlock, as the child transaction can block the parent transaction from releasing +its resources. #### Read-write transactions @@ -209,7 +246,7 @@ and then safely close your transaction if an error is returned. This is the recommended way to use Bolt transactions. However, sometimes you may want to manually start and end your transactions. -You can use the `Tx.Begin()` function directly but **please** be sure to close +You can use the `DB.Begin()` function directly but **please** be sure to close the transaction. ```go @@ -239,7 +276,7 @@ should be writable. ### Using buckets Buckets are collections of key/value pairs within the database. All keys in a -bucket must be unique. You can create a bucket using the `DB.CreateBucket()` +bucket must be unique. You can create a bucket using the `Tx.CreateBucket()` function: ```go @@ -395,7 +432,7 @@ db.View(func(tx *bolt.Tx) error { c := tx.Bucket([]byte("MyBucket")).Cursor() prefix := []byte("1234") - for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() { + for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { fmt.Printf("key=%s, value=%s\n", k, v) } @@ -448,6 +485,10 @@ db.View(func(tx *bolt.Tx) error { }) ``` +Please note that keys and values in `ForEach()` are only valid while +the transaction is open. If you need to use a key or value outside of +the transaction, you must use `copy()` to copy it to another byte +slice. ### Nested buckets @@ -460,6 +501,55 @@ func (*Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) func (*Bucket) DeleteBucket(key []byte) error ``` +Say you had a multi-tenant application where the root level bucket was the account bucket. Inside of this bucket was a sequence of accounts which themselves are buckets. And inside the sequence bucket you could have many buckets pertaining to the Account itself (Users, Notes, etc) isolating the information into logical groupings. + +```go + +// createUser creates a new user in the given account. +func createUser(accountID int, u *User) error { + // Start the transaction. + tx, err := db.Begin(true) + if err != nil { + return err + } + defer tx.Rollback() + + // Retrieve the root bucket for the account. + // Assume this has already been created when the account was set up. + root := tx.Bucket([]byte(strconv.FormatUint(accountID, 10))) + + // Setup the users bucket. + bkt, err := root.CreateBucketIfNotExists([]byte("USERS")) + if err != nil { + return err + } + + // Generate an ID for the new user. + userID, err := bkt.NextSequence() + if err != nil { + return err + } + u.ID = userID + + // Marshal and save the encoded user. + if buf, err := json.Marshal(u); err != nil { + return err + } else if err := bkt.Put([]byte(strconv.FormatUint(u.ID, 10)), buf); err != nil { + return err + } + + // Commit the transaction. + if err := tx.Commit(); err != nil { + return err + } + + return nil +} + +``` + + + ### Database backups @@ -469,7 +559,7 @@ this from a read-only transaction, it will perform a hot backup and not block your other database reads and writes. By default, it will use a regular file handle which will utilize the operating -system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx) +system's page cache. See the [`Tx`](https://godoc.org/go.etcd.io/bbolt#Tx) documentation for information about optimizing for larger-than-RAM datasets. One common use case is to backup over HTTP so you can use tools like `cURL` to @@ -715,6 +805,9 @@ Here are a few things to note when evaluating and using Bolt: can be reused by a new page or can be unmapped from virtual memory and you'll see an `unexpected fault address` panic when accessing it. +* Bolt uses an exclusive write lock on the database file so it cannot be + shared by multiple processes. + * Be careful when using `Bucket.FillPercent`. Setting a high fill percent for buckets that have random inserts will cause your database to have very poor page utilization. @@ -755,7 +848,7 @@ Here are a few things to note when evaluating and using Bolt: ## Reading the Source -Bolt is a relatively small code base (<3KLOC) for an embedded, serializable, +Bolt is a relatively small code base (<5KLOC) for an embedded, serializable, transactional key/value database so it can be a good starting point for people interested in how databases work. @@ -807,46 +900,59 @@ them via pull request. Below is a list of public, open source projects that use Bolt: -* [BoltDbWeb](https://github.com/evnix/boltdbweb) - A web based GUI for BoltDB files. -* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard. +* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend. * [Bazil](https://bazil.org/) - A file system that lets your data reside where it is most convenient for it to reside. -* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb. -* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics. -* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects. -* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday. +* [bolter](https://github.com/hasit/bolter) - Command-line app for viewing BoltDB file in your terminal. +* [boltcli](https://github.com/spacewander/boltcli) - the redis-cli for boltdb with Lua script support. +* [BoltHold](https://github.com/timshannon/bolthold) - An embeddable NoSQL store for Go types built on BoltDB +* [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt. +* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. +* [BoltDbWeb](https://github.com/evnix/boltdbweb) - A web based GUI for BoltDB files. +* [BoltDB Viewer](https://github.com/zc310/rich_boltdb) - A BoltDB Viewer Can run on Windows、Linux、Android system. +* [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend. +* [btcwallet](https://github.com/btcsuite/btcwallet) - A bitcoin wallet. +* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining + simple tx and key scans. +* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend. * [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations. -* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite. -* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin". +* [🌰 Chestnut](https://github.com/jrapoport/chestnut) - Chestnut is encrypted storage for Go. +* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware. +* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb. +* [dcrwallet](https://github.com/decred/dcrwallet) - A wallet for the Decred cryptocurrency. +* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems. * [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka. +* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data. +* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. +* [GoWebApp](https://github.com/josephspurrier/gowebapp) - A basic MVC web application in Go using BoltDB. +* [GoShort](https://github.com/pankajkhairnar/goShort) - GoShort is a URL shortener written in Golang and BoltDB for persistent key/value storage and for routing it's using high performent HTTPRouter. +* [gopherpit](https://github.com/gopherpit/gopherpit) - A web service to manage Go remote import paths with custom domains +* [gokv](https://github.com/philippgille/gokv) - Simple key-value store abstraction and implementations for Go (Redis, Consul, etcd, bbolt, BadgerDB, LevelDB, Memcached, DynamoDB, S3, PostgreSQL, MongoDB, CockroachDB and many more) +* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin". +* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics. +* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters. * [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed. -* [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt. -* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site. +* [Ironsmith](https://github.com/timshannon/ironsmith) - A simple, script-driven continuous integration (build - > test -> release) tool, with no external dependencies +* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs. +* [Key Value Access Langusge (KVAL)](https://github.com/kval-access-language) - A proposed grammar for key-value datastores offering a bbolt binding. * [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage. -* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters. -* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend. -* [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend. -* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server. -* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read. -* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics. -* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data. +* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. +* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. +* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite. +* [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files. +* [NATS](https://github.com/nats-io/nats-streaming-server) - NATS Streaming uses bbolt for message and metadata storage. * [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system. -* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware. -* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs. -* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems. +* [Rain](https://github.com/cenkalti/rain) - BitTorrent client and library. +* [reef-pi](https://github.com/reef-pi/reef-pi) - reef-pi is an award winning, modular, DIY reef tank controller using easy to learn electronics based on a Raspberry Pi. +* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service +* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read. * [stow](https://github.com/djherbis/stow) - a persistence manager for objects backed by boltdb. -* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining - simple tx and key scans. -* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. -* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service -* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. -* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. -* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. * [Storm](https://github.com/asdine/storm) - Simple and powerful ORM for BoltDB. -* [GoWebApp](https://github.com/josephspurrier/gowebapp) - A basic MVC web application in Go using BoltDB. * [SimpleBolt](https://github.com/xyproto/simplebolt) - A simple way to use BoltDB. Deals mainly with strings. -* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend. -* [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files. -* [GoShort](https://github.com/pankajkhairnar/goShort) - GoShort is a URL shortener written in Golang and BoltDB for persistent key/value storage and for routing it's using high performent HTTPRouter. +* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics. +* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects. +* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server. +* [torrent](https://github.com/anacrolix/torrent) - Full-featured BitTorrent client package and utilities in Go. BoltDB is a storage backend in development. +* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday. If you are using Bolt in a project please send a pull request to add it to the list. diff --git a/vendor/github.com/coreos/bbolt/bolt_386.go b/vendor/go.etcd.io/bbolt/bolt_386.go similarity index 93% rename from vendor/github.com/coreos/bbolt/bolt_386.go rename to vendor/go.etcd.io/bbolt/bolt_386.go index e659bfb91f..aee25960ff 100644 --- a/vendor/github.com/coreos/bbolt/bolt_386.go +++ b/vendor/go.etcd.io/bbolt/bolt_386.go @@ -1,4 +1,4 @@ -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0x7FFFFFFF // 2GB diff --git a/vendor/github.com/coreos/bbolt/bolt_amd64.go b/vendor/go.etcd.io/bbolt/bolt_amd64.go similarity index 93% rename from vendor/github.com/coreos/bbolt/bolt_amd64.go rename to vendor/go.etcd.io/bbolt/bolt_amd64.go index cca6b7eb70..5dd8f3f2ae 100644 --- a/vendor/github.com/coreos/bbolt/bolt_amd64.go +++ b/vendor/go.etcd.io/bbolt/bolt_amd64.go @@ -1,4 +1,4 @@ -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0xFFFFFFFFFFFF // 256TB diff --git a/vendor/github.com/coreos/bbolt/bolt_arm.go b/vendor/go.etcd.io/bbolt/bolt_arm.go similarity index 93% rename from vendor/github.com/coreos/bbolt/bolt_arm.go rename to vendor/go.etcd.io/bbolt/bolt_arm.go index e659bfb91f..aee25960ff 100644 --- a/vendor/github.com/coreos/bbolt/bolt_arm.go +++ b/vendor/go.etcd.io/bbolt/bolt_arm.go @@ -1,4 +1,4 @@ -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0x7FFFFFFF // 2GB diff --git a/vendor/github.com/coreos/bbolt/bolt_arm64.go b/vendor/go.etcd.io/bbolt/bolt_arm64.go similarity index 94% rename from vendor/github.com/coreos/bbolt/bolt_arm64.go rename to vendor/go.etcd.io/bbolt/bolt_arm64.go index 6d2309352e..810dfd55c5 100644 --- a/vendor/github.com/coreos/bbolt/bolt_arm64.go +++ b/vendor/go.etcd.io/bbolt/bolt_arm64.go @@ -1,6 +1,6 @@ // +build arm64 -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0xFFFFFFFFFFFF // 256TB diff --git a/vendor/github.com/coreos/bbolt/bolt_linux.go b/vendor/go.etcd.io/bbolt/bolt_linux.go similarity index 91% rename from vendor/github.com/coreos/bbolt/bolt_linux.go rename to vendor/go.etcd.io/bbolt/bolt_linux.go index 2b67666140..7707bcacf0 100644 --- a/vendor/github.com/coreos/bbolt/bolt_linux.go +++ b/vendor/go.etcd.io/bbolt/bolt_linux.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "syscall" diff --git a/vendor/go.etcd.io/bbolt/bolt_mips64x.go b/vendor/go.etcd.io/bbolt/bolt_mips64x.go new file mode 100644 index 0000000000..dd8ffe1239 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/bolt_mips64x.go @@ -0,0 +1,9 @@ +// +build mips64 mips64le + +package bbolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0x8000000000 // 512GB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF diff --git a/vendor/go.etcd.io/bbolt/bolt_mipsx.go b/vendor/go.etcd.io/bbolt/bolt_mipsx.go new file mode 100644 index 0000000000..a669703a4e --- /dev/null +++ b/vendor/go.etcd.io/bbolt/bolt_mipsx.go @@ -0,0 +1,9 @@ +// +build mips mipsle + +package bbolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0x40000000 // 1GB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0xFFFFFFF diff --git a/vendor/github.com/coreos/bbolt/bolt_openbsd.go b/vendor/go.etcd.io/bbolt/bolt_openbsd.go similarity index 97% rename from vendor/github.com/coreos/bbolt/bolt_openbsd.go rename to vendor/go.etcd.io/bbolt/bolt_openbsd.go index 7058c3d734..d7f50358ef 100644 --- a/vendor/github.com/coreos/bbolt/bolt_openbsd.go +++ b/vendor/go.etcd.io/bbolt/bolt_openbsd.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "syscall" diff --git a/vendor/github.com/coreos/bbolt/bolt_ppc.go b/vendor/go.etcd.io/bbolt/bolt_ppc.go similarity index 93% rename from vendor/github.com/coreos/bbolt/bolt_ppc.go rename to vendor/go.etcd.io/bbolt/bolt_ppc.go index 645ddc3edc..84e545ef3e 100644 --- a/vendor/github.com/coreos/bbolt/bolt_ppc.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc.go @@ -1,6 +1,6 @@ // +build ppc -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0x7FFFFFFF // 2GB diff --git a/vendor/github.com/coreos/bbolt/bolt_ppc64.go b/vendor/go.etcd.io/bbolt/bolt_ppc64.go similarity index 94% rename from vendor/github.com/coreos/bbolt/bolt_ppc64.go rename to vendor/go.etcd.io/bbolt/bolt_ppc64.go index 2dc6be02e3..a76120908c 100644 --- a/vendor/github.com/coreos/bbolt/bolt_ppc64.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc64.go @@ -1,6 +1,6 @@ // +build ppc64 -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0xFFFFFFFFFFFF // 256TB diff --git a/vendor/github.com/coreos/bbolt/bolt_ppc64le.go b/vendor/go.etcd.io/bbolt/bolt_ppc64le.go similarity index 94% rename from vendor/github.com/coreos/bbolt/bolt_ppc64le.go rename to vendor/go.etcd.io/bbolt/bolt_ppc64le.go index 8351e129f6..c830f2fc77 100644 --- a/vendor/github.com/coreos/bbolt/bolt_ppc64le.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc64le.go @@ -1,6 +1,6 @@ // +build ppc64le -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0xFFFFFFFFFFFF // 256TB diff --git a/vendor/go.etcd.io/bbolt/bolt_riscv64.go b/vendor/go.etcd.io/bbolt/bolt_riscv64.go new file mode 100644 index 0000000000..c967613b00 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/bolt_riscv64.go @@ -0,0 +1,9 @@ +// +build riscv64 + +package bbolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF diff --git a/vendor/github.com/coreos/bbolt/bolt_s390x.go b/vendor/go.etcd.io/bbolt/bolt_s390x.go similarity index 94% rename from vendor/github.com/coreos/bbolt/bolt_s390x.go rename to vendor/go.etcd.io/bbolt/bolt_s390x.go index f4dd26bbba..ff2a560970 100644 --- a/vendor/github.com/coreos/bbolt/bolt_s390x.go +++ b/vendor/go.etcd.io/bbolt/bolt_s390x.go @@ -1,6 +1,6 @@ // +build s390x -package bolt +package bbolt // maxMapSize represents the largest mmap size supported by Bolt. const maxMapSize = 0xFFFFFFFFFFFF // 256TB diff --git a/vendor/github.com/coreos/bbolt/bolt_unix.go b/vendor/go.etcd.io/bbolt/bolt_unix.go similarity index 51% rename from vendor/github.com/coreos/bbolt/bolt_unix.go rename to vendor/go.etcd.io/bbolt/bolt_unix.go index cad62dda1e..4e5f65ccc8 100644 --- a/vendor/github.com/coreos/bbolt/bolt_unix.go +++ b/vendor/go.etcd.io/bbolt/bolt_unix.go @@ -1,41 +1,45 @@ -// +build !windows,!plan9,!solaris +// +build !windows,!plan9,!solaris,!aix -package bolt +package bbolt import ( "fmt" - "os" "syscall" "time" "unsafe" + + "golang.org/x/sys/unix" ) // flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { +func flock(db *DB, exclusive bool, timeout time.Duration) error { var t time.Time + if timeout != 0 { + t = time.Now() + } + fd := db.file.Fd() + flag := syscall.LOCK_NB + if exclusive { + flag |= syscall.LOCK_EX + } else { + flag |= syscall.LOCK_SH + } for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - flag := syscall.LOCK_SH - if exclusive { - flag = syscall.LOCK_EX - } - - // Otherwise attempt to obtain an exclusive lock. - err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) + // Attempt to obtain an exclusive lock. + err := syscall.Flock(int(fd), flag) if err == nil { return nil } else if err != syscall.EWOULDBLOCK { return err } + // If we timed out then return an error. + if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { + return ErrTimeout + } + // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) + time.Sleep(flockRetryTimeout) } } @@ -47,13 +51,15 @@ func funlock(db *DB) error { // mmap memory maps a DB's data file. func mmap(db *DB, sz int) error { // Map the data file to memory. - b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) + b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) if err != nil { return err } // Advise the kernel that the mmap is accessed randomly. - if err := madvise(b, syscall.MADV_RANDOM); err != nil { + err = unix.Madvise(b, syscall.MADV_RANDOM) + if err != nil && err != syscall.ENOSYS { + // Ignore not implemented error in kernel because it still works. return fmt.Errorf("madvise: %s", err) } @@ -72,18 +78,9 @@ func munmap(db *DB) error { } // Unmap using the original byte slice. - err := syscall.Munmap(db.dataref) + err := unix.Munmap(db.dataref) db.dataref = nil db.data = nil db.datasz = 0 return err } - -// NOTE: This function is copied from stdlib because it is not available on darwin. -func madvise(b []byte, advice int) (err error) { - _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice)) - if e1 != 0 { - err = e1 - } - return -} diff --git a/vendor/go.etcd.io/bbolt/bolt_unix_aix.go b/vendor/go.etcd.io/bbolt/bolt_unix_aix.go new file mode 100644 index 0000000000..a64c16f512 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/bolt_unix_aix.go @@ -0,0 +1,90 @@ +// +build aix + +package bbolt + +import ( + "fmt" + "syscall" + "time" + "unsafe" + + "golang.org/x/sys/unix" +) + +// flock acquires an advisory lock on a file descriptor. +func flock(db *DB, exclusive bool, timeout time.Duration) error { + var t time.Time + if timeout != 0 { + t = time.Now() + } + fd := db.file.Fd() + var lockType int16 + if exclusive { + lockType = syscall.F_WRLCK + } else { + lockType = syscall.F_RDLCK + } + for { + // Attempt to obtain an exclusive lock. + lock := syscall.Flock_t{Type: lockType} + err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock) + if err == nil { + return nil + } else if err != syscall.EAGAIN { + return err + } + + // If we timed out then return an error. + if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { + return ErrTimeout + } + + // Wait for a bit and try again. + time.Sleep(flockRetryTimeout) + } +} + +// funlock releases an advisory lock on a file descriptor. +func funlock(db *DB) error { + var lock syscall.Flock_t + lock.Start = 0 + lock.Len = 0 + lock.Type = syscall.F_UNLCK + lock.Whence = 0 + return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock) +} + +// mmap memory maps a DB's data file. +func mmap(db *DB, sz int) error { + // Map the data file to memory. + b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) + if err != nil { + return err + } + + // Advise the kernel that the mmap is accessed randomly. + if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil { + return fmt.Errorf("madvise: %s", err) + } + + // Save the original byte slice and convert to a byte array pointer. + db.dataref = b + db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0])) + db.datasz = sz + return nil +} + +// munmap unmaps a DB's data file from memory. +func munmap(db *DB) error { + // Ignore the unmap if we have no mapped data. + if db.dataref == nil { + return nil + } + + // Unmap using the original byte slice. + err := unix.Munmap(db.dataref) + db.dataref = nil + db.data = nil + db.datasz = 0 + return err +} diff --git a/vendor/github.com/coreos/bbolt/bolt_unix_solaris.go b/vendor/go.etcd.io/bbolt/bolt_unix_solaris.go similarity index 70% rename from vendor/github.com/coreos/bbolt/bolt_unix_solaris.go rename to vendor/go.etcd.io/bbolt/bolt_unix_solaris.go index 307bf2b3ee..babad65786 100644 --- a/vendor/github.com/coreos/bbolt/bolt_unix_solaris.go +++ b/vendor/go.etcd.io/bbolt/bolt_unix_solaris.go @@ -1,8 +1,7 @@ -package bolt +package bbolt import ( "fmt" - "os" "syscall" "time" "unsafe" @@ -11,36 +10,35 @@ import ( ) // flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { +func flock(db *DB, exclusive bool, timeout time.Duration) error { var t time.Time + if timeout != 0 { + t = time.Now() + } + fd := db.file.Fd() + var lockType int16 + if exclusive { + lockType = syscall.F_WRLCK + } else { + lockType = syscall.F_RDLCK + } for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - var lock syscall.Flock_t - lock.Start = 0 - lock.Len = 0 - lock.Pid = 0 - lock.Whence = 0 - lock.Pid = 0 - if exclusive { - lock.Type = syscall.F_WRLCK - } else { - lock.Type = syscall.F_RDLCK - } - err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) + // Attempt to obtain an exclusive lock. + lock := syscall.Flock_t{Type: lockType} + err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock) if err == nil { return nil } else if err != syscall.EAGAIN { return err } + // If we timed out then return an error. + if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { + return ErrTimeout + } + // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) + time.Sleep(flockRetryTimeout) } } diff --git a/vendor/github.com/coreos/bbolt/bolt_windows.go b/vendor/go.etcd.io/bbolt/bolt_windows.go similarity index 76% rename from vendor/github.com/coreos/bbolt/bolt_windows.go rename to vendor/go.etcd.io/bbolt/bolt_windows.go index d538e6afd7..fca178bd29 100644 --- a/vendor/github.com/coreos/bbolt/bolt_windows.go +++ b/vendor/go.etcd.io/bbolt/bolt_windows.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "fmt" @@ -16,8 +16,6 @@ var ( ) const ( - lockExt = ".lock" - // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx flagLockExclusive = 2 flagLockFailImmediately = 1 @@ -48,48 +46,47 @@ func fdatasync(db *DB) error { } // flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { - // Create a separate lock file on windows because a process - // cannot share an exclusive lock on the same file. This is - // needed during Tx.WriteTo(). - f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode) - if err != nil { - return err - } - db.lockfile = f - +func flock(db *DB, exclusive bool, timeout time.Duration) error { var t time.Time + if timeout != 0 { + t = time.Now() + } + var flag uint32 = flagLockFailImmediately + if exclusive { + flag |= flagLockExclusive + } for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - - var flag uint32 = flagLockFailImmediately - if exclusive { - flag |= flagLockExclusive - } + // Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range + // -1..0 as the lock on the database file. + var m1 uint32 = (1 << 32) - 1 // -1 in a uint32 + err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{ + Offset: m1, + OffsetHigh: m1, + }) - err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) if err == nil { return nil } else if err != errLockViolation { return err } + // If we timed oumercit then return an error. + if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { + return ErrTimeout + } + // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) + time.Sleep(flockRetryTimeout) } } // funlock releases an advisory lock on a file descriptor. func funlock(db *DB) error { - err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{}) - db.lockfile.Close() - os.Remove(db.path+lockExt) + var m1 uint32 = (1 << 32) - 1 // -1 in a uint32 + err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{ + Offset: m1, + OffsetHigh: m1, + }) return err } diff --git a/vendor/github.com/coreos/bbolt/boltsync_unix.go b/vendor/go.etcd.io/bbolt/boltsync_unix.go similarity index 91% rename from vendor/github.com/coreos/bbolt/boltsync_unix.go rename to vendor/go.etcd.io/bbolt/boltsync_unix.go index f50442523c..9587afefee 100644 --- a/vendor/github.com/coreos/bbolt/boltsync_unix.go +++ b/vendor/go.etcd.io/bbolt/boltsync_unix.go @@ -1,6 +1,6 @@ // +build !windows,!plan9,!linux,!openbsd -package bolt +package bbolt // fdatasync flushes written data to a file descriptor. func fdatasync(db *DB) error { diff --git a/vendor/github.com/coreos/bbolt/bucket.go b/vendor/go.etcd.io/bbolt/bucket.go similarity index 91% rename from vendor/github.com/coreos/bbolt/bucket.go rename to vendor/go.etcd.io/bbolt/bucket.go index d2f8c524e4..d8750b1487 100644 --- a/vendor/github.com/coreos/bbolt/bucket.go +++ b/vendor/go.etcd.io/bbolt/bucket.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "bytes" @@ -14,13 +14,6 @@ const ( MaxValueSize = (1 << 31) - 2 ) -const ( - maxUint = ^uint(0) - minUint = 0 - maxInt = int(^uint(0) >> 1) - minInt = -maxInt - 1 -) - const bucketHeaderSize = int(unsafe.Sizeof(bucket{})) const ( @@ -130,9 +123,19 @@ func (b *Bucket) Bucket(name []byte) *Bucket { func (b *Bucket) openBucket(value []byte) *Bucket { var child = newBucket(b.tx) + // Unaligned access requires a copy to be made. + const unalignedMask = unsafe.Alignof(struct { + bucket + page + }{}) - 1 + unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0 + if unaligned { + value = cloneBytes(value) + } + // If this is a writable transaction then we need to copy the bucket entry. // Read-only transactions can point directly at the mmap entry. - if b.tx.writable { + if b.tx.writable && !unaligned { child.bucket = &bucket{} *child.bucket = *(*bucket)(unsafe.Pointer(&value[0])) } else { @@ -167,9 +170,8 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { if bytes.Equal(key, k) { if (flags & bucketLeafFlag) != 0 { return nil, ErrBucketExists - } else { - return nil, ErrIncompatibleValue } + return nil, ErrIncompatibleValue } // Create empty, inline bucket. @@ -206,7 +208,7 @@ func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { } // DeleteBucket deletes a bucket at the given key. -// Returns an error if the bucket does not exists, or if the key represents a non-bucket value. +// Returns an error if the bucket does not exist, or if the key represents a non-bucket value. func (b *Bucket) DeleteBucket(key []byte) error { if b.tx.db == nil { return ErrTxClosed @@ -228,7 +230,7 @@ func (b *Bucket) DeleteBucket(key []byte) error { // Recursively delete all child buckets. child := b.Bucket(key) err := child.ForEach(func(k, v []byte) error { - if v == nil { + if _, _, childFlags := child.Cursor().seek(k); (childFlags & bucketLeafFlag) != 0 { if err := child.DeleteBucket(k); err != nil { return fmt.Errorf("delete bucket: %s", err) } @@ -316,7 +318,12 @@ func (b *Bucket) Delete(key []byte) error { // Move cursor to correct position. c := b.Cursor() - _, _, flags := c.seek(key) + k, _, flags := c.seek(key) + + // Return nil if the key doesn't exist. + if !bytes.Equal(key, k) { + return nil + } // Return an error if there is already existing bucket value. if (flags & bucketLeafFlag) != 0 { @@ -329,6 +336,28 @@ func (b *Bucket) Delete(key []byte) error { return nil } +// Sequence returns the current integer for the bucket without incrementing it. +func (b *Bucket) Sequence() uint64 { return b.bucket.sequence } + +// SetSequence updates the sequence number for the bucket. +func (b *Bucket) SetSequence(v uint64) error { + if b.tx.db == nil { + return ErrTxClosed + } else if !b.Writable() { + return ErrTxNotWritable + } + + // Materialize the root node if it hasn't been already so that the + // bucket will be saved during commit. + if b.rootNode == nil { + _ = b.node(b.root, nil) + } + + // Increment and return the sequence. + b.bucket.sequence = v + return nil +} + // NextSequence returns an autoincrementing integer for the bucket. func (b *Bucket) NextSequence() (uint64, error) { if b.tx.db == nil { @@ -382,7 +411,7 @@ func (b *Bucket) Stats() BucketStats { if p.count != 0 { // If page has any elements, add all element headers. - used += leafPageElementSize * int(p.count-1) + used += leafPageElementSize * uintptr(p.count-1) // Add all element key, value sizes. // The computation takes advantage of the fact that the position @@ -390,16 +419,16 @@ func (b *Bucket) Stats() BucketStats { // of all previous elements' keys and values. // It also includes the last element's header. lastElement := p.leafPageElement(p.count - 1) - used += int(lastElement.pos + lastElement.ksize + lastElement.vsize) + used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize) } if b.root == 0 { // For inlined bucket just update the inline stats - s.InlineBucketInuse += used + s.InlineBucketInuse += int(used) } else { // For non-inlined bucket update all the leaf stats s.LeafPageN++ - s.LeafInuse += used + s.LeafInuse += int(used) s.LeafOverflowN += int(p.overflow) // Collect stats from sub-buckets. @@ -420,13 +449,13 @@ func (b *Bucket) Stats() BucketStats { // used totals the used bytes for the page // Add header and all element headers. - used := pageHeaderSize + (branchPageElementSize * int(p.count-1)) + used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1)) // Add size of all keys and values. // Again, use the fact that last element's position equals to // the total of key, value sizes of all previous elements. - used += int(lastElement.pos + lastElement.ksize) - s.BranchInuse += used + used += uintptr(lastElement.pos + lastElement.ksize) + s.BranchInuse += int(used) s.BranchOverflowN += int(p.overflow) } @@ -566,7 +595,7 @@ func (b *Bucket) inlineable() bool { // our threshold for inline bucket size. var size = pageHeaderSize for _, inode := range n.inodes { - size += leafPageElementSize + len(inode.key) + len(inode.value) + size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value)) if inode.flags&bucketLeafFlag != 0 { return false @@ -579,8 +608,8 @@ func (b *Bucket) inlineable() bool { } // Returns the maximum total size of a bucket to make it a candidate for inlining. -func (b *Bucket) maxInlineBucketSize() int { - return b.tx.db.pageSize / 4 +func (b *Bucket) maxInlineBucketSize() uintptr { + return uintptr(b.tx.db.pageSize / 4) } // write allocates and writes a bucket to a byte slice. diff --git a/vendor/go.etcd.io/bbolt/compact.go b/vendor/go.etcd.io/bbolt/compact.go new file mode 100644 index 0000000000..e4fe91b046 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/compact.go @@ -0,0 +1,114 @@ +package bbolt + +// Compact will create a copy of the source DB and in the destination DB. This may +// reclaim space that the source database no longer has use for. txMaxSize can be +// used to limit the transactions size of this process and may trigger intermittent +// commits. A value of zero will ignore transaction sizes. +// TODO: merge with: https://github.com/etcd-io/etcd/blob/b7f0f52a16dbf83f18ca1d803f7892d750366a94/mvcc/backend/backend.go#L349 +func Compact(dst, src *DB, txMaxSize int64) error { + // commit regularly, or we'll run out of memory for large datasets if using one transaction. + var size int64 + tx, err := dst.Begin(true) + if err != nil { + return err + } + defer tx.Rollback() + + if err := walk(src, func(keys [][]byte, k, v []byte, seq uint64) error { + // On each key/value, check if we have exceeded tx size. + sz := int64(len(k) + len(v)) + if size+sz > txMaxSize && txMaxSize != 0 { + // Commit previous transaction. + if err := tx.Commit(); err != nil { + return err + } + + // Start new transaction. + tx, err = dst.Begin(true) + if err != nil { + return err + } + size = 0 + } + size += sz + + // Create bucket on the root transaction if this is the first level. + nk := len(keys) + if nk == 0 { + bkt, err := tx.CreateBucket(k) + if err != nil { + return err + } + if err := bkt.SetSequence(seq); err != nil { + return err + } + return nil + } + + // Create buckets on subsequent levels, if necessary. + b := tx.Bucket(keys[0]) + if nk > 1 { + for _, k := range keys[1:] { + b = b.Bucket(k) + } + } + + // Fill the entire page for best compaction. + b.FillPercent = 1.0 + + // If there is no value then this is a bucket call. + if v == nil { + bkt, err := b.CreateBucket(k) + if err != nil { + return err + } + if err := bkt.SetSequence(seq); err != nil { + return err + } + return nil + } + + // Otherwise treat it as a key/value pair. + return b.Put(k, v) + }); err != nil { + return err + } + + return tx.Commit() +} + +// walkFunc is the type of the function called for keys (buckets and "normal" +// values) discovered by Walk. keys is the list of keys to descend to the bucket +// owning the discovered key/value pair k/v. +type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error + +// walk walks recursively the bolt database db, calling walkFn for each key it finds. +func walk(db *DB, walkFn walkFunc) error { + return db.View(func(tx *Tx) error { + return tx.ForEach(func(name []byte, b *Bucket) error { + return walkBucket(b, nil, name, nil, b.Sequence(), walkFn) + }) + }) +} + +func walkBucket(b *Bucket, keypath [][]byte, k, v []byte, seq uint64, fn walkFunc) error { + // Execute callback. + if err := fn(keypath, k, v, seq); err != nil { + return err + } + + // If this is not a bucket then stop. + if v != nil { + return nil + } + + // Iterate over each child key/value. + keypath = append(keypath, k) + return b.ForEach(func(k, v []byte) error { + if v == nil { + bkt := b.Bucket(k) + return walkBucket(bkt, keypath, k, nil, bkt.Sequence(), fn) + } + return walkBucket(b, keypath, k, v, b.Sequence(), fn) + }) +} diff --git a/vendor/github.com/coreos/bbolt/cursor.go b/vendor/go.etcd.io/bbolt/cursor.go similarity index 98% rename from vendor/github.com/coreos/bbolt/cursor.go rename to vendor/go.etcd.io/bbolt/cursor.go index 1be9f35e3e..98aeb449a4 100644 --- a/vendor/github.com/coreos/bbolt/cursor.go +++ b/vendor/go.etcd.io/bbolt/cursor.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "bytes" @@ -157,12 +157,6 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { // Start from root page/node and traverse to correct page. c.stack = c.stack[:0] c.search(seek, c.bucket.root) - ref := &c.stack[len(c.stack)-1] - - // If the cursor is pointing to the end of page/node then return nil. - if ref.index >= ref.count() { - return nil, nil, 0 - } // If this is a bucket then return a nil value. return c.keyValue() @@ -339,6 +333,8 @@ func (c *Cursor) nsearch(key []byte) { // keyValue returns the key and value of the current leaf element. func (c *Cursor) keyValue() ([]byte, []byte, uint32) { ref := &c.stack[len(c.stack)-1] + + // If the cursor is pointing to the end of page/node then return nil. if ref.count() == 0 || ref.index >= ref.count() { return nil, nil, 0 } @@ -370,7 +366,7 @@ func (c *Cursor) node() *node { } for _, ref := range c.stack[:len(c.stack)-1] { _assert(!n.isLeaf, "expected branch node") - n = n.childAt(int(ref.index)) + n = n.childAt(ref.index) } _assert(n.isLeaf, "expected leaf node") return n diff --git a/vendor/github.com/coreos/bbolt/db.go b/vendor/go.etcd.io/bbolt/db.go similarity index 76% rename from vendor/github.com/coreos/bbolt/db.go rename to vendor/go.etcd.io/bbolt/db.go index 1223493ca7..a798c390a2 100644 --- a/vendor/github.com/coreos/bbolt/db.go +++ b/vendor/go.etcd.io/bbolt/db.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "errors" @@ -7,8 +7,7 @@ import ( "log" "os" "runtime" - "runtime/debug" - "strings" + "sort" "sync" "time" "unsafe" @@ -23,6 +22,8 @@ const version = 2 // Represents a marker value to indicate that a file is a Bolt DB. const magic uint32 = 0xED0CDAED +const pgidNoFreelist pgid = 0xffffffffffffffff + // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when // syncing changes to a file. This is required as some operating systems, // such as OpenBSD, do not have a unified buffer cache (UBC) and writes @@ -39,6 +40,19 @@ const ( // default page size for db is set to the OS page size. var defaultPageSize = os.Getpagesize() +// The time elapsed between consecutive file locking attempts. +const flockRetryTimeout = 50 * time.Millisecond + +// FreelistType is the type of the freelist backend +type FreelistType string + +const ( + // FreelistArrayType indicates backend freelist type is array + FreelistArrayType = FreelistType("array") + // FreelistMapType indicates backend freelist type is hashmap + FreelistMapType = FreelistType("hashmap") +) + // DB represents a collection of buckets persisted to a file on disk. // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. @@ -61,6 +75,18 @@ type DB struct { // THIS IS UNSAFE. PLEASE USE WITH CAUTION. NoSync bool + // When true, skips syncing freelist to disk. This improves the database + // write performance under normal operation, but requires a full database + // re-sync during recovery. + NoFreelistSync bool + + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures + // dramatic performance degradation if database is large and framentation in freelist is common. + // The alternative one is using hashmap, it is faster in almost all circumstances + // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. + // The default type is array + FreelistType FreelistType + // When true, skips the truncate call when growing the database. // Setting this to true is only safe on non-ext3/ext4 systems. // Skipping truncation avoids preallocation of hard drive space and @@ -94,10 +120,16 @@ type DB struct { // of truncate() and fsync() when growing the data file. AllocSize int + // Mlock locks database file in memory when set to true. + // It prevents major page faults, however used memory can't be reclaimed. + // + // Supported only on Unix via mlock/munlock syscalls. + Mlock bool + path string + openFile func(string, int, os.FileMode) (*os.File, error) file *os.File - lockfile *os.File // windows only - dataref []byte // mmap'ed readonly, write throws SEGV + dataref []byte // mmap'ed readonly, write throws SEGV data *[maxMapSize]byte datasz int filesz int // current on disk file size @@ -107,9 +139,11 @@ type DB struct { opened bool rwtx *Tx txs []*Tx - freelist *freelist stats Stats + freelist *freelist + freelistLoad sync.Once + pagePool sync.Pool batchMu sync.Mutex @@ -148,14 +182,19 @@ func (db *DB) String() string { // If the file does not exist then it will be created automatically. // Passing in nil options will cause Bolt to open the database with the default options. func Open(path string, mode os.FileMode, options *Options) (*DB, error) { - var db = &DB{opened: true} - + db := &DB{ + opened: true, + } // Set default options if no options are provided. if options == nil { options = DefaultOptions } + db.NoSync = options.NoSync db.NoGrowSync = options.NoGrowSync db.MmapFlags = options.MmapFlags + db.NoFreelistSync = options.NoFreelistSync + db.FreelistType = options.FreelistType + db.Mlock = options.Mlock // Set default values for later DB operations. db.MaxBatchSize = DefaultMaxBatchSize @@ -168,13 +207,18 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.readOnly = true } + db.openFile = options.OpenFile + if db.openFile == nil { + db.openFile = os.OpenFile + } + // Open data file and separate sync handler for metadata writes. - db.path = path var err error - if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { + if db.file, err = db.openFile(path, flag|os.O_CREATE, mode); err != nil { _ = db.close() return nil, err } + db.path = db.file.Name() // Lock file so that other processes using Bolt in read-write mode cannot // use the database at the same time. This would cause corruption since @@ -183,7 +227,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // if !options.ReadOnly. // The database file is locked using the shared lock (more than one process may // hold a lock at the same time) otherwise (options.ReadOnly is set). - if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { + if err := flock(db, !db.readOnly, options.Timeout); err != nil { _ = db.close() return nil, err } @@ -191,31 +235,41 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // Default values for test hooks db.ops.writeAt = db.file.WriteAt + if db.pageSize = options.PageSize; db.pageSize == 0 { + // Set the default page size to the OS page size. + db.pageSize = defaultPageSize + } + // Initialize the database if it doesn't exist. if info, err := db.file.Stat(); err != nil { + _ = db.close() return nil, err } else if info.Size() == 0 { // Initialize new files with meta pages. if err := db.init(); err != nil { + // clean up file descriptor on initialization fail + _ = db.close() return nil, err } } else { // Read the first meta page to determine the page size. var buf [0x1000]byte - if _, err := db.file.ReadAt(buf[:], 0); err == nil { - m := db.pageInBuffer(buf[:], 0).meta() - if err := m.validate(); err != nil { - // If we can't read the page size, we can assume it's the same - // as the OS -- since that's how the page size was chosen in the - // first place. - // - // If the first page is invalid and this OS uses a different - // page size than what the database was created with then we - // are out of luck and cannot access the database. - db.pageSize = os.Getpagesize() - } else { + // If we can't read the page size, but can read a page, assume + // it's the same as the OS or one given -- since that's how the + // page size was chosen in the first place. + // + // If the first page is invalid and this OS uses a different + // page size than what the database was created with then we + // are out of luck and cannot access the database. + // + // TODO: scan for next page + if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) { + if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { db.pageSize = int(m.pageSize) } + } else { + _ = db.close() + return nil, ErrInvalid } } @@ -232,14 +286,50 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { return nil, err } - // Read in the freelist. - db.freelist = newFreelist() - db.freelist.read(db.page(db.meta().freelist)) + if db.readOnly { + return db, nil + } + + db.loadFreelist() + + // Flush freelist when transitioning from no sync to sync so + // NoFreelistSync unaware boltdb can open the db later. + if !db.NoFreelistSync && !db.hasSyncedFreelist() { + tx, err := db.Begin(true) + if tx != nil { + err = tx.Commit() + } + if err != nil { + _ = db.close() + return nil, err + } + } // Mark the database as opened and return. return db, nil } +// loadFreelist reads the freelist if it is synced, or reconstructs it +// by scanning the DB if it is not synced. It assumes there are no +// concurrent accesses being made to the freelist. +func (db *DB) loadFreelist() { + db.freelistLoad.Do(func() { + db.freelist = newFreelist(db.FreelistType) + if !db.hasSyncedFreelist() { + // Reconstruct free list by scanning the DB. + db.freelist.readIDs(db.freepages()) + } else { + // Read free list from freelist page. + db.freelist.read(db.page(db.meta().freelist)) + } + db.stats.FreePageN = db.freelist.free_count() + }) +} + +func (db *DB) hasSyncedFreelist() bool { + return db.meta().freelist != pgidNoFreelist +} + // mmap opens the underlying memory-mapped file and initializes the meta references. // minsz is the minimum size that the new mmap can be. func (db *DB) mmap(minsz int) error { @@ -254,7 +344,8 @@ func (db *DB) mmap(minsz int) error { } // Ensure the size is at least the minimum size. - var size = int(info.Size()) + fileSize := int(info.Size()) + var size = fileSize if size < minsz { size = minsz } @@ -263,6 +354,13 @@ func (db *DB) mmap(minsz int) error { return err } + if db.Mlock { + // Unlock db memory + if err := db.munlock(fileSize); err != nil { + return err + } + } + // Dereference all mmap references before unmapping. if db.rwtx != nil { db.rwtx.root.dereference() @@ -278,6 +376,13 @@ func (db *DB) mmap(minsz int) error { return err } + if db.Mlock { + // Don't allow swapping of data file + if err := db.mlock(fileSize); err != nil { + return err + } + } + // Save references to the meta pages. db.meta0 = db.page(0).meta() db.meta1 = db.page(1).meta() @@ -339,15 +444,36 @@ func (db *DB) mmapSize(size int) (int, error) { return int(sz), nil } +func (db *DB) munlock(fileSize int) error { + if err := munlock(db, fileSize); err != nil { + return fmt.Errorf("munlock error: " + err.Error()) + } + return nil +} + +func (db *DB) mlock(fileSize int) error { + if err := mlock(db, fileSize); err != nil { + return fmt.Errorf("mlock error: " + err.Error()) + } + return nil +} + +func (db *DB) mrelock(fileSizeFrom, fileSizeTo int) error { + if err := db.munlock(fileSizeFrom); err != nil { + return err + } + if err := db.mlock(fileSizeTo); err != nil { + return err + } + return nil +} + // init creates a new database file and initializes its meta pages. func (db *DB) init() error { - // Set the page size to the OS page size. - db.pageSize = os.Getpagesize() - // Create two meta pages on a buffer. buf := make([]byte, db.pageSize*4) for i := 0; i < 2; i++ { - p := db.pageInBuffer(buf[:], pgid(i)) + p := db.pageInBuffer(buf, pgid(i)) p.id = pgid(i) p.flags = metaPageFlag @@ -364,13 +490,13 @@ func (db *DB) init() error { } // Write an empty freelist at page 3. - p := db.pageInBuffer(buf[:], pgid(2)) + p := db.pageInBuffer(buf, pgid(2)) p.id = pgid(2) p.flags = freelistPageFlag p.count = 0 // Write an empty leaf page at page 4. - p = db.pageInBuffer(buf[:], pgid(3)) + p = db.pageInBuffer(buf, pgid(3)) p.id = pgid(3) p.flags = leafPageFlag p.count = 0 @@ -382,12 +508,14 @@ func (db *DB) init() error { if err := fdatasync(db); err != nil { return err } + db.filesz = len(buf) return nil } // Close releases all database resources. -// All transactions must be closed before closing the database. +// It will block waiting for any open transactions to finish +// before closing the database and returning. func (db *DB) Close() error { db.rwlock.Lock() defer db.rwlock.Unlock() @@ -395,8 +523,8 @@ func (db *DB) Close() error { db.metalock.Lock() defer db.metalock.Unlock() - db.mmaplock.RLock() - defer db.mmaplock.RUnlock() + db.mmaplock.Lock() + defer db.mmaplock.Unlock() return db.close() } @@ -526,21 +654,36 @@ func (db *DB) beginRWTx() (*Tx, error) { t := &Tx{writable: true} t.init(db) db.rwtx = t + db.freePages() + return t, nil +} - // Free any pages associated with closed read-only transactions. - var minid txid = 0xFFFFFFFFFFFFFFFF - for _, t := range db.txs { - if t.meta.txid < minid { - minid = t.meta.txid - } +// freePages releases any pages associated with closed read-only transactions. +func (db *DB) freePages() { + // Free all pending pages prior to earliest open transaction. + sort.Sort(txsById(db.txs)) + minid := txid(0xFFFFFFFFFFFFFFFF) + if len(db.txs) > 0 { + minid = db.txs[0].meta.txid } if minid > 0 { db.freelist.release(minid - 1) } - - return t, nil + // Release unused txid extents. + for _, t := range db.txs { + db.freelist.releaseRange(minid, t.meta.txid-1) + minid = t.meta.txid + 1 + } + db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF)) + // Any page both allocated and freed in an extent is safe to release. } +type txsById []*Tx + +func (t txsById) Len() int { return len(t) } +func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid } + // removeTx removes a transaction from the database. func (db *DB) removeTx(tx *Tx) { // Release the read lock on the mmap. @@ -552,7 +695,10 @@ func (db *DB) removeTx(tx *Tx) { // Remove the transaction. for i, t := range db.txs { if t == tx { - db.txs = append(db.txs[:i], db.txs[i+1:]...) + last := len(db.txs) - 1 + db.txs[i] = db.txs[last] + db.txs[last] = nil + db.txs = db.txs[:last] break } } @@ -630,11 +776,7 @@ func (db *DB) View(fn func(*Tx) error) error { return err } - if err := t.Rollback(); err != nil { - return err - } - - return nil + return t.Rollback() } // Batch calls fn as part of a batch. It behaves similar to Update, @@ -734,9 +876,7 @@ retry: // pass success, or bolt internal errors, to all callers for _, c := range b.calls { - if c.err != nil { - c.err <- err - } + c.err <- err } break retry } @@ -823,7 +963,7 @@ func (db *DB) meta() *meta { } // allocate returns a contiguous block of memory starting at a given page. -func (db *DB) allocate(count int) (*page, error) { +func (db *DB) allocate(txid txid, count int) (*page, error) { // Allocate a temporary buffer for the page. var buf []byte if count == 1 { @@ -835,7 +975,7 @@ func (db *DB) allocate(count int) (*page, error) { p.overflow = uint32(count - 1) // Use pages from the freelist if they are available. - if p.id = db.freelist.allocate(count); p.id != 0 { + if p.id = db.freelist.allocate(txid, count); p.id != 0 { return p, nil } @@ -880,6 +1020,12 @@ func (db *DB) grow(sz int) error { if err := db.file.Sync(); err != nil { return fmt.Errorf("file sync error: %s", err) } + if db.Mlock { + // unlock old file and lock new one + if err := db.mrelock(db.filesz, sz); err != nil { + return fmt.Errorf("mlock/munlock error: %s", err) + } + } } db.filesz = sz @@ -890,6 +1036,38 @@ func (db *DB) IsReadOnly() bool { return db.readOnly } +func (db *DB) freepages() []pgid { + tx, err := db.beginTx() + defer func() { + err = tx.Rollback() + if err != nil { + panic("freepages: failed to rollback tx") + } + }() + if err != nil { + panic("freepages: failed to open read only tx") + } + + reachable := make(map[pgid]*page) + nofreed := make(map[pgid]bool) + ech := make(chan error) + go func() { + for e := range ech { + panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e)) + } + }() + tx.checkBucket(&tx.root, reachable, nofreed, ech) + close(ech) + + var fids []pgid + for i := pgid(2); i < db.meta().pgid; i++ { + if _, ok := reachable[i]; !ok { + fids = append(fids, i) + } + } + return fids +} + // Options represents the options that can be set when opening a database. type Options struct { // Timeout is the amount of time to wait to obtain a file lock. @@ -900,6 +1078,17 @@ type Options struct { // Sets the DB.NoGrowSync flag before memory mapping the file. NoGrowSync bool + // Do not sync freelist to disk. This improves the database write performance + // under normal operation, but requires a full database re-sync during recovery. + NoFreelistSync bool + + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures + // dramatic performance degradation if database is large and framentation in freelist is common. + // The alternative one is using hashmap, it is faster in almost all circumstances + // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. + // The default type is array + FreelistType FreelistType + // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to // grab a shared lock (UNIX). ReadOnly bool @@ -916,13 +1105,31 @@ type Options struct { // If initialMmapSize is smaller than the previous database size, // it takes no effect. InitialMmapSize int + + // PageSize overrides the default OS page size. + PageSize int + + // NoSync sets the initial value of DB.NoSync. Normally this can just be + // set directly on the DB itself when returned from Open(), but this option + // is useful in APIs which expose Options but not the underlying DB. + NoSync bool + + // OpenFile is used to open files. It defaults to os.OpenFile. This option + // is useful for writing hermetic tests. + OpenFile func(string, int, os.FileMode) (*os.File, error) + + // Mlock locks database file in memory when set to true. + // It prevents potential page faults, however + // used memory can't be reclaimed. (UNIX only) + Mlock bool } // DefaultOptions represent the options used if nil options are passed into Open(). // No timeout is used which will cause Bolt to wait indefinitely for a lock. var DefaultOptions = &Options{ - Timeout: 0, - NoGrowSync: false, + Timeout: 0, + NoGrowSync: false, + FreelistType: FreelistArrayType, } // Stats represents statistics about the database. @@ -952,15 +1159,11 @@ func (s *Stats) Sub(other *Stats) Stats { diff.PendingPageN = s.PendingPageN diff.FreeAlloc = s.FreeAlloc diff.FreelistInuse = s.FreelistInuse - diff.TxN = other.TxN - s.TxN + diff.TxN = s.TxN - other.TxN diff.TxStats = s.TxStats.Sub(&other.TxStats) return diff } -func (s *Stats) add(other *Stats) { - s.TxStats.add(&other.TxStats) -} - type Info struct { Data uintptr PageSize int @@ -999,7 +1202,8 @@ func (m *meta) copy(dest *meta) { func (m *meta) write(p *page) { if m.root.root >= m.pgid { panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) - } else if m.freelist >= m.pgid { + } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist { + // TODO: reject pgidNoFreeList if !NoFreelistSync panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) } @@ -1026,11 +1230,3 @@ func _assert(condition bool, msg string, v ...interface{}) { panic(fmt.Sprintf("assertion failed: "+msg, v...)) } } - -func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } -func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) } - -func printstack() { - stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n") - fmt.Fprintln(os.Stderr, stack) -} diff --git a/vendor/github.com/coreos/bbolt/doc.go b/vendor/go.etcd.io/bbolt/doc.go similarity index 94% rename from vendor/github.com/coreos/bbolt/doc.go rename to vendor/go.etcd.io/bbolt/doc.go index cc937845db..95f25f01c6 100644 --- a/vendor/github.com/coreos/bbolt/doc.go +++ b/vendor/go.etcd.io/bbolt/doc.go @@ -1,5 +1,5 @@ /* -Package bolt implements a low-level key/value store in pure Go. It supports +package bbolt implements a low-level key/value store in pure Go. It supports fully serializable transactions, ACID semantics, and lock-free MVCC with multiple readers and a single writer. Bolt can be used for projects that want a simple data store without the need to add large dependencies such as @@ -41,4 +41,4 @@ point to different data or can point to invalid memory which will cause a panic. */ -package bolt +package bbolt diff --git a/vendor/github.com/coreos/bbolt/errors.go b/vendor/go.etcd.io/bbolt/errors.go similarity index 99% rename from vendor/github.com/coreos/bbolt/errors.go rename to vendor/go.etcd.io/bbolt/errors.go index a3620a3ebb..48758ca577 100644 --- a/vendor/github.com/coreos/bbolt/errors.go +++ b/vendor/go.etcd.io/bbolt/errors.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import "errors" diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go new file mode 100644 index 0000000000..697a46968b --- /dev/null +++ b/vendor/go.etcd.io/bbolt/freelist.go @@ -0,0 +1,404 @@ +package bbolt + +import ( + "fmt" + "sort" + "unsafe" +) + +// txPending holds a list of pgids and corresponding allocation txns +// that are pending to be freed. +type txPending struct { + ids []pgid + alloctx []txid // txids allocating the ids + lastReleaseBegin txid // beginning txid of last matching releaseRange +} + +// pidSet holds the set of starting pgids which have the same span size +type pidSet map[pgid]struct{} + +// freelist represents a list of all pages that are available for allocation. +// It also tracks pages that have been freed but are still in use by open transactions. +type freelist struct { + freelistType FreelistType // freelist type + ids []pgid // all free and available free page ids. + allocs map[pgid]txid // mapping of txid that allocated a pgid. + pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. + cache map[pgid]bool // fast lookup of all free and pending page ids. + freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size + forwardMap map[pgid]uint64 // key is start pgid, value is its span size + backwardMap map[pgid]uint64 // key is end pgid, value is its span size + allocate func(txid txid, n int) pgid // the freelist allocate func + free_count func() int // the function which gives you free page number + mergeSpans func(ids pgids) // the mergeSpan func + getFreePageIDs func() []pgid // get free pgids func + readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist +} + +// newFreelist returns an empty, initialized freelist. +func newFreelist(freelistType FreelistType) *freelist { + f := &freelist{ + freelistType: freelistType, + allocs: make(map[pgid]txid), + pending: make(map[txid]*txPending), + cache: make(map[pgid]bool), + freemaps: make(map[uint64]pidSet), + forwardMap: make(map[pgid]uint64), + backwardMap: make(map[pgid]uint64), + } + + if freelistType == FreelistMapType { + f.allocate = f.hashmapAllocate + f.free_count = f.hashmapFreeCount + f.mergeSpans = f.hashmapMergeSpans + f.getFreePageIDs = f.hashmapGetFreePageIDs + f.readIDs = f.hashmapReadIDs + } else { + f.allocate = f.arrayAllocate + f.free_count = f.arrayFreeCount + f.mergeSpans = f.arrayMergeSpans + f.getFreePageIDs = f.arrayGetFreePageIDs + f.readIDs = f.arrayReadIDs + } + + return f +} + +// size returns the size of the page after serialization. +func (f *freelist) size() int { + n := f.count() + if n >= 0xFFFF { + // The first element will be used to store the count. See freelist.write. + n++ + } + return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n) +} + +// count returns count of pages on the freelist +func (f *freelist) count() int { + return f.free_count() + f.pending_count() +} + +// arrayFreeCount returns count of free pages(array version) +func (f *freelist) arrayFreeCount() int { + return len(f.ids) +} + +// pending_count returns count of pending pages +func (f *freelist) pending_count() int { + var count int + for _, txp := range f.pending { + count += len(txp.ids) + } + return count +} + +// copyall copies a list of all free ids and all pending ids in one sorted list. +// f.count returns the minimum length required for dst. +func (f *freelist) copyall(dst []pgid) { + m := make(pgids, 0, f.pending_count()) + for _, txp := range f.pending { + m = append(m, txp.ids...) + } + sort.Sort(m) + mergepgids(dst, f.getFreePageIDs(), m) +} + +// arrayAllocate returns the starting page id of a contiguous list of pages of a given size. +// If a contiguous block cannot be found then 0 is returned. +func (f *freelist) arrayAllocate(txid txid, n int) pgid { + if len(f.ids) == 0 { + return 0 + } + + var initial, previd pgid + for i, id := range f.ids { + if id <= 1 { + panic(fmt.Sprintf("invalid page allocation: %d", id)) + } + + // Reset initial page if this is not contiguous. + if previd == 0 || id-previd != 1 { + initial = id + } + + // If we found a contiguous block then remove it and return it. + if (id-initial)+1 == pgid(n) { + // If we're allocating off the beginning then take the fast path + // and just adjust the existing slice. This will use extra memory + // temporarily but the append() in free() will realloc the slice + // as is necessary. + if (i + 1) == n { + f.ids = f.ids[i+1:] + } else { + copy(f.ids[i-n+1:], f.ids[i+1:]) + f.ids = f.ids[:len(f.ids)-n] + } + + // Remove from the free cache. + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, initial+i) + } + f.allocs[initial] = txid + return initial + } + + previd = id + } + return 0 +} + +// free releases a page and its overflow for a given transaction id. +// If the page is already free then a panic will occur. +func (f *freelist) free(txid txid, p *page) { + if p.id <= 1 { + panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) + } + + // Free page and all its overflow pages. + txp := f.pending[txid] + if txp == nil { + txp = &txPending{} + f.pending[txid] = txp + } + allocTxid, ok := f.allocs[p.id] + if ok { + delete(f.allocs, p.id) + } else if (p.flags & freelistPageFlag) != 0 { + // Freelist is always allocated by prior tx. + allocTxid = txid - 1 + } + + for id := p.id; id <= p.id+pgid(p.overflow); id++ { + // Verify that page is not already free. + if f.cache[id] { + panic(fmt.Sprintf("page %d already freed", id)) + } + // Add to the freelist and cache. + txp.ids = append(txp.ids, id) + txp.alloctx = append(txp.alloctx, allocTxid) + f.cache[id] = true + } +} + +// release moves all page ids for a transaction id (or older) to the freelist. +func (f *freelist) release(txid txid) { + m := make(pgids, 0) + for tid, txp := range f.pending { + if tid <= txid { + // Move transaction's pending pages to the available freelist. + // Don't remove from the cache since the page is still free. + m = append(m, txp.ids...) + delete(f.pending, tid) + } + } + f.mergeSpans(m) +} + +// releaseRange moves pending pages allocated within an extent [begin,end] to the free list. +func (f *freelist) releaseRange(begin, end txid) { + if begin > end { + return + } + var m pgids + for tid, txp := range f.pending { + if tid < begin || tid > end { + continue + } + // Don't recompute freed pages if ranges haven't updated. + if txp.lastReleaseBegin == begin { + continue + } + for i := 0; i < len(txp.ids); i++ { + if atx := txp.alloctx[i]; atx < begin || atx > end { + continue + } + m = append(m, txp.ids[i]) + txp.ids[i] = txp.ids[len(txp.ids)-1] + txp.ids = txp.ids[:len(txp.ids)-1] + txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1] + txp.alloctx = txp.alloctx[:len(txp.alloctx)-1] + i-- + } + txp.lastReleaseBegin = begin + if len(txp.ids) == 0 { + delete(f.pending, tid) + } + } + f.mergeSpans(m) +} + +// rollback removes the pages from a given pending tx. +func (f *freelist) rollback(txid txid) { + // Remove page ids from cache. + txp := f.pending[txid] + if txp == nil { + return + } + var m pgids + for i, pgid := range txp.ids { + delete(f.cache, pgid) + tx := txp.alloctx[i] + if tx == 0 { + continue + } + if tx != txid { + // Pending free aborted; restore page back to alloc list. + f.allocs[pgid] = tx + } else { + // Freed page was allocated by this txn; OK to throw away. + m = append(m, pgid) + } + } + // Remove pages from pending list and mark as free if allocated by txid. + delete(f.pending, txid) + f.mergeSpans(m) +} + +// freed returns whether a given page is in the free list. +func (f *freelist) freed(pgid pgid) bool { + return f.cache[pgid] +} + +// read initializes the freelist from a freelist page. +func (f *freelist) read(p *page) { + if (p.flags & freelistPageFlag) == 0 { + panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ())) + } + // If the page.count is at the max uint16 value (64k) then it's considered + // an overflow and the size of the freelist is stored as the first element. + var idx, count = 0, int(p.count) + if count == 0xFFFF { + idx = 1 + c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) + count = int(c) + if count < 0 { + panic(fmt.Sprintf("leading element count %d overflows int", c)) + } + } + + // Copy the list of page ids from the freelist. + if count == 0 { + f.ids = nil + } else { + var ids []pgid + data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx) + unsafeSlice(unsafe.Pointer(&ids), data, count) + + // copy the ids, so we don't modify on the freelist page directly + idsCopy := make([]pgid, count) + copy(idsCopy, ids) + // Make sure they're sorted. + sort.Sort(pgids(idsCopy)) + + f.readIDs(idsCopy) + } +} + +// arrayReadIDs initializes the freelist from a given list of ids. +func (f *freelist) arrayReadIDs(ids []pgid) { + f.ids = ids + f.reindex() +} + +func (f *freelist) arrayGetFreePageIDs() []pgid { + return f.ids +} + +// write writes the page ids onto a freelist page. All free and pending ids are +// saved to disk since in the event of a program crash, all pending ids will +// become free. +func (f *freelist) write(p *page) error { + // Combine the old free pgids and pgids waiting on an open transaction. + + // Update the header flag. + p.flags |= freelistPageFlag + + // The page.count can only hold up to 64k elements so if we overflow that + // number then we handle it by putting the size in the first element. + l := f.count() + if l == 0 { + p.count = uint16(l) + } else if l < 0xFFFF { + p.count = uint16(l) + var ids []pgid + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&ids), data, l) + f.copyall(ids) + } else { + p.count = 0xFFFF + var ids []pgid + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&ids), data, l+1) + ids[0] = pgid(l) + f.copyall(ids[1:]) + } + + return nil +} + +// reload reads the freelist from a page and filters out pending items. +func (f *freelist) reload(p *page) { + f.read(p) + + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range f.getFreePageIDs() { + if !pcache[id] { + a = append(a, id) + } + } + + f.readIDs(a) +} + +// noSyncReload reads the freelist from pgids and filters out pending items. +func (f *freelist) noSyncReload(pgids []pgid) { + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range pgids { + if !pcache[id] { + a = append(a, id) + } + } + + f.readIDs(a) +} + +// reindex rebuilds the free cache based on available and pending free lists. +func (f *freelist) reindex() { + ids := f.getFreePageIDs() + f.cache = make(map[pgid]bool, len(ids)) + for _, id := range ids { + f.cache[id] = true + } + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + f.cache[pendingID] = true + } + } +} + +// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array +func (f *freelist) arrayMergeSpans(ids pgids) { + sort.Sort(ids) + f.ids = pgids(f.ids).merge(ids) +} diff --git a/vendor/go.etcd.io/bbolt/freelist_hmap.go b/vendor/go.etcd.io/bbolt/freelist_hmap.go new file mode 100644 index 0000000000..dbd67a1e73 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/freelist_hmap.go @@ -0,0 +1,178 @@ +package bbolt + +import "sort" + +// hashmapFreeCount returns count of free pages(hashmap version) +func (f *freelist) hashmapFreeCount() int { + // use the forwardMap to get the total count + count := 0 + for _, size := range f.forwardMap { + count += int(size) + } + return count +} + +// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend +func (f *freelist) hashmapAllocate(txid txid, n int) pgid { + if n == 0 { + return 0 + } + + // if we have a exact size match just return short path + if bm, ok := f.freemaps[uint64(n)]; ok { + for pid := range bm { + // remove the span + f.delSpan(pid, uint64(n)) + + f.allocs[pid] = txid + + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, pid+i) + } + return pid + } + } + + // lookup the map to find larger span + for size, bm := range f.freemaps { + if size < uint64(n) { + continue + } + + for pid := range bm { + // remove the initial + f.delSpan(pid, size) + + f.allocs[pid] = txid + + remain := size - uint64(n) + + // add remain span + f.addSpan(pid+pgid(n), remain) + + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, pid+i) + } + return pid + } + } + + return 0 +} + +// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version) +func (f *freelist) hashmapReadIDs(pgids []pgid) { + f.init(pgids) + + // Rebuild the page cache. + f.reindex() +} + +// hashmapGetFreePageIDs returns the sorted free page ids +func (f *freelist) hashmapGetFreePageIDs() []pgid { + count := f.free_count() + if count == 0 { + return nil + } + + m := make([]pgid, 0, count) + for start, size := range f.forwardMap { + for i := 0; i < int(size); i++ { + m = append(m, start+pgid(i)) + } + } + sort.Sort(pgids(m)) + + return m +} + +// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans +func (f *freelist) hashmapMergeSpans(ids pgids) { + for _, id := range ids { + // try to see if we can merge and update + f.mergeWithExistingSpan(id) + } +} + +// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward +func (f *freelist) mergeWithExistingSpan(pid pgid) { + prev := pid - 1 + next := pid + 1 + + preSize, mergeWithPrev := f.backwardMap[prev] + nextSize, mergeWithNext := f.forwardMap[next] + newStart := pid + newSize := uint64(1) + + if mergeWithPrev { + //merge with previous span + start := prev + 1 - pgid(preSize) + f.delSpan(start, preSize) + + newStart -= pgid(preSize) + newSize += preSize + } + + if mergeWithNext { + // merge with next span + f.delSpan(next, nextSize) + newSize += nextSize + } + + f.addSpan(newStart, newSize) +} + +func (f *freelist) addSpan(start pgid, size uint64) { + f.backwardMap[start-1+pgid(size)] = size + f.forwardMap[start] = size + if _, ok := f.freemaps[size]; !ok { + f.freemaps[size] = make(map[pgid]struct{}) + } + + f.freemaps[size][start] = struct{}{} +} + +func (f *freelist) delSpan(start pgid, size uint64) { + delete(f.forwardMap, start) + delete(f.backwardMap, start+pgid(size-1)) + delete(f.freemaps[size], start) + if len(f.freemaps[size]) == 0 { + delete(f.freemaps, size) + } +} + +// initial from pgids using when use hashmap version +// pgids must be sorted +func (f *freelist) init(pgids []pgid) { + if len(pgids) == 0 { + return + } + + size := uint64(1) + start := pgids[0] + + if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { + panic("pgids not sorted") + } + + f.freemaps = make(map[uint64]pidSet) + f.forwardMap = make(map[pgid]uint64) + f.backwardMap = make(map[pgid]uint64) + + for i := 1; i < len(pgids); i++ { + // continuous page + if pgids[i] == pgids[i-1]+1 { + size++ + } else { + f.addSpan(start, size) + + size = 1 + start = pgids[i] + } + } + + // init the tail + if size != 0 && start != 0 { + f.addSpan(start, size) + } +} diff --git a/vendor/go.etcd.io/bbolt/go.mod b/vendor/go.etcd.io/bbolt/go.mod new file mode 100644 index 0000000000..96355a69b9 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/go.mod @@ -0,0 +1,5 @@ +module go.etcd.io/bbolt + +go 1.12 + +require golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d diff --git a/vendor/go.etcd.io/bbolt/go.sum b/vendor/go.etcd.io/bbolt/go.sum new file mode 100644 index 0000000000..c13f8f4700 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/go.sum @@ -0,0 +1,2 @@ +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d h1:L/IKR6COd7ubZrs2oTnTi73IhgqJ71c9s80WsQnh0Es= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/go.etcd.io/bbolt/mlock_unix.go b/vendor/go.etcd.io/bbolt/mlock_unix.go new file mode 100644 index 0000000000..6a6c7b3537 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/mlock_unix.go @@ -0,0 +1,36 @@ +// +build !windows + +package bbolt + +import "golang.org/x/sys/unix" + +// mlock locks memory of db file +func mlock(db *DB, fileSize int) error { + sizeToLock := fileSize + if sizeToLock > db.datasz { + // Can't lock more than mmaped slice + sizeToLock = db.datasz + } + if err := unix.Mlock(db.dataref[:sizeToLock]); err != nil { + return err + } + return nil +} + +//munlock unlocks memory of db file +func munlock(db *DB, fileSize int) error { + if db.dataref == nil { + return nil + } + + sizeToUnlock := fileSize + if sizeToUnlock > db.datasz { + // Can't unlock more than mmaped slice + sizeToUnlock = db.datasz + } + + if err := unix.Munlock(db.dataref[:sizeToUnlock]); err != nil { + return err + } + return nil +} diff --git a/vendor/go.etcd.io/bbolt/mlock_windows.go b/vendor/go.etcd.io/bbolt/mlock_windows.go new file mode 100644 index 0000000000..b4a36a493d --- /dev/null +++ b/vendor/go.etcd.io/bbolt/mlock_windows.go @@ -0,0 +1,11 @@ +package bbolt + +// mlock locks memory of db file +func mlock(_ *DB, _ int) error { + panic("mlock is supported only on UNIX systems") +} + +//munlock unlocks memory of db file +func munlock(_ *DB, _ int) error { + panic("munlock is supported only on UNIX systems") +} diff --git a/vendor/github.com/coreos/bbolt/node.go b/vendor/go.etcd.io/bbolt/node.go similarity index 91% rename from vendor/github.com/coreos/bbolt/node.go rename to vendor/go.etcd.io/bbolt/node.go index 159318b229..73988b5c4c 100644 --- a/vendor/github.com/coreos/bbolt/node.go +++ b/vendor/go.etcd.io/bbolt/node.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "bytes" @@ -41,19 +41,19 @@ func (n *node) size() int { sz, elsz := pageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { item := &n.inodes[i] - sz += elsz + len(item.key) + len(item.value) + sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value)) } - return sz + return int(sz) } // sizeLessThan returns true if the node is less than a given size. // This is an optimization to avoid calculating a large node when we only need // to know if it fits inside a certain page size. -func (n *node) sizeLessThan(v int) bool { +func (n *node) sizeLessThan(v uintptr) bool { sz, elsz := pageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { item := &n.inodes[i] - sz += elsz + len(item.key) + len(item.value) + sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value)) if sz >= v { return false } @@ -62,7 +62,7 @@ func (n *node) sizeLessThan(v int) bool { } // pageElementSize returns the size of each page element based on the type of node. -func (n *node) pageElementSize() int { +func (n *node) pageElementSize() uintptr { if n.isLeaf { return leafPageElementSize } @@ -207,10 +207,17 @@ func (n *node) write(p *page) { } // Loop over each item and write it to the page. - b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):] + // off tracks the offset into p of the start of the next data. + off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes)) for i, item := range n.inodes { _assert(len(item.key) > 0, "write: zero-length inode key") + // Create a slice to write into of needed size and advance + // byte pointer for next iteration. + sz := len(item.key) + len(item.value) + b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz) + off += uintptr(sz) + // Write the page element. if n.isLeaf { elem := p.leafPageElement(uint16(i)) @@ -226,20 +233,9 @@ func (n *node) write(p *page) { _assert(elem.pgid != p.id, "write: circular dependency occurred") } - // If the length of key+value is larger than the max allocation size - // then we need to reallocate the byte array pointer. - // - // See: https://github.com/boltdb/bolt/pull/335 - klen, vlen := len(item.key), len(item.value) - if len(b) < klen+vlen { - b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:] - } - // Write data for the element to the end of the page. - copy(b[0:], item.key) - b = b[klen:] - copy(b[0:], item.value) - b = b[vlen:] + l := copy(b, item.key) + copy(b[l:], item.value) } // DEBUG ONLY: n.dump() @@ -247,7 +243,7 @@ func (n *node) write(p *page) { // split breaks up a node into multiple smaller nodes, if appropriate. // This should only be called from the spill() function. -func (n *node) split(pageSize int) []*node { +func (n *node) split(pageSize uintptr) []*node { var nodes []*node node := n @@ -270,7 +266,7 @@ func (n *node) split(pageSize int) []*node { // splitTwo breaks up a node into two smaller nodes, if appropriate. // This should only be called from the split() function. -func (n *node) splitTwo(pageSize int) (*node, *node) { +func (n *node) splitTwo(pageSize uintptr) (*node, *node) { // Ignore the split if the page doesn't have at least enough nodes for // two pages or if the nodes can fit in a single page. if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) { @@ -312,18 +308,18 @@ func (n *node) splitTwo(pageSize int) (*node, *node) { // splitIndex finds the position where a page will fill a given threshold. // It returns the index as well as the size of the first page. // This is only be called from split(). -func (n *node) splitIndex(threshold int) (index, sz int) { +func (n *node) splitIndex(threshold int) (index, sz uintptr) { sz = pageHeaderSize // Loop until we only have the minimum number of keys required for the second page. for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { - index = i + index = uintptr(i) inode := n.inodes[i] - elsize := n.pageElementSize() + len(inode.key) + len(inode.value) + elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value)) // If we have at least the minimum number of keys and adding another // node would put us over the threshold then exit and return. - if i >= minKeysPerPage && sz+elsize > threshold { + if index >= minKeysPerPage && sz+elsize > uintptr(threshold) { break } @@ -356,7 +352,7 @@ func (n *node) spill() error { n.children = nil // Split nodes into appropriate sizes. The first node will always be n. - var nodes = n.split(tx.db.pageSize) + var nodes = n.split(uintptr(tx.db.pageSize)) for _, node := range nodes { // Add node's page to the freelist if it's not new. if node.pgid > 0 { @@ -365,7 +361,7 @@ func (n *node) spill() error { } // Allocate contiguous space for the node. - p, err := tx.allocate((node.size() / tx.db.pageSize) + 1) + p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize) if err != nil { return err } @@ -587,9 +583,11 @@ func (n *node) dump() { type nodes []*node -func (s nodes) Len() int { return len(s) } -func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 } +func (s nodes) Len() int { return len(s) } +func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s nodes) Less(i, j int) bool { + return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 +} // inode represents an internal node inside of a node. // It can be used to point to elements in a page or point diff --git a/vendor/github.com/coreos/bbolt/page.go b/vendor/go.etcd.io/bbolt/page.go similarity index 66% rename from vendor/github.com/coreos/bbolt/page.go rename to vendor/go.etcd.io/bbolt/page.go index 7651a6bf7d..c9a158fb06 100644 --- a/vendor/github.com/coreos/bbolt/page.go +++ b/vendor/go.etcd.io/bbolt/page.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "fmt" @@ -7,12 +7,12 @@ import ( "unsafe" ) -const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr)) +const pageHeaderSize = unsafe.Sizeof(page{}) const minKeysPerPage = 2 -const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{})) -const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) +const branchPageElementSize = unsafe.Sizeof(branchPageElement{}) +const leafPageElementSize = unsafe.Sizeof(leafPageElement{}) const ( branchPageFlag = 0x01 @@ -32,7 +32,6 @@ type page struct { flags uint16 count uint16 overflow uint32 - ptr uintptr } // typ returns a human readable page type string used for debugging. @@ -51,13 +50,13 @@ func (p *page) typ() string { // meta returns a pointer to the metadata section of the page. func (p *page) meta() *meta { - return (*meta)(unsafe.Pointer(&p.ptr)) + return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) } // leafPageElement retrieves the leaf node by index func (p *page) leafPageElement(index uint16) *leafPageElement { - n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] - return n + return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), + leafPageElementSize, int(index))) } // leafPageElements retrieves a list of leaf nodes. @@ -65,12 +64,16 @@ func (p *page) leafPageElements() []leafPageElement { if p.count == 0 { return nil } - return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:] + var elems []leafPageElement + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + return elems } // branchPageElement retrieves the branch node by index func (p *page) branchPageElement(index uint16) *branchPageElement { - return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] + return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), + unsafe.Sizeof(branchPageElement{}), int(index))) } // branchPageElements retrieves a list of branch nodes. @@ -78,12 +81,15 @@ func (p *page) branchPageElements() []branchPageElement { if p.count == 0 { return nil } - return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:] + var elems []branchPageElement + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + return elems } // dump writes n bytes of the page to STDERR as hex output. func (p *page) hexdump(n int) { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n] + buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n) fmt.Fprintf(os.Stderr, "%x\n", buf) } @@ -102,8 +108,7 @@ type branchPageElement struct { // key returns a byte slice of the node key. func (n *branchPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] + return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize)) } // leafPageElement represents a node on a leaf page. @@ -116,14 +121,16 @@ type leafPageElement struct { // key returns a byte slice of the node key. func (n *leafPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] + i := int(n.pos) + j := i + int(n.ksize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) } // value returns a byte slice of the node value. func (n *leafPageElement) value() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] + i := int(n.pos) + int(n.ksize) + j := i + int(n.vsize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) } // PageInfo represents human readable information about a page. @@ -145,12 +152,33 @@ func (a pgids) merge(b pgids) pgids { // Return the opposite slice if one is nil. if len(a) == 0 { return b - } else if len(b) == 0 { + } + if len(b) == 0 { return a } + merged := make(pgids, len(a)+len(b)) + mergepgids(merged, a, b) + return merged +} + +// mergepgids copies the sorted union of a and b into dst. +// If dst is too small, it panics. +func mergepgids(dst, a, b pgids) { + if len(dst) < len(a)+len(b) { + panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b))) + } + // Copy in the opposite slice if one is nil. + if len(a) == 0 { + copy(dst, b) + return + } + if len(b) == 0 { + copy(dst, a) + return + } - // Create a list to hold all elements from both lists. - merged := make(pgids, 0, len(a)+len(b)) + // Merged will hold all elements from both lists. + merged := dst[:0] // Assign lead to the slice with a lower starting value, follow to the higher value. lead, follow := a, b @@ -172,7 +200,5 @@ func (a pgids) merge(b pgids) pgids { } // Append what's left in follow. - merged = append(merged, follow...) - - return merged + _ = append(merged, follow...) } diff --git a/vendor/github.com/coreos/bbolt/tx.go b/vendor/go.etcd.io/bbolt/tx.go similarity index 87% rename from vendor/github.com/coreos/bbolt/tx.go rename to vendor/go.etcd.io/bbolt/tx.go index 1cfb4cde85..869d412008 100644 --- a/vendor/github.com/coreos/bbolt/tx.go +++ b/vendor/go.etcd.io/bbolt/tx.go @@ -1,4 +1,4 @@ -package bolt +package bbolt import ( "fmt" @@ -126,10 +126,7 @@ func (tx *Tx) DeleteBucket(name []byte) error { // the error is returned to the caller. func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error { return tx.root.ForEach(func(k, v []byte) error { - if err := fn(k, tx.root.Bucket(k)); err != nil { - return err - } - return nil + return fn(k, tx.root.Bucket(k)) }) } @@ -169,28 +166,18 @@ func (tx *Tx) Commit() error { // Free the old root bucket. tx.meta.root.root = tx.root.root - opgid := tx.meta.pgid - - // Free the freelist and allocate new pages for it. This will overestimate - // the size of the freelist but not underestimate the size (which would be bad). - tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) - p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) - if err != nil { - tx.rollback() - return err + // Free the old freelist because commit writes out a fresh freelist. + if tx.meta.freelist != pgidNoFreelist { + tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) } - if err := tx.db.freelist.write(p); err != nil { - tx.rollback() - return err - } - tx.meta.freelist = p.id - // If the high water mark has moved up then attempt to grow the database. - if tx.meta.pgid > opgid { - if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { - tx.rollback() + if !tx.db.NoFreelistSync { + err := tx.commitFreelist() + if err != nil { return err } + } else { + tx.meta.freelist = pgidNoFreelist } // Write dirty pages to disk. @@ -201,7 +188,6 @@ func (tx *Tx) Commit() error { } // If strict mode is enabled then perform a consistency check. - // Only the first consistency error is reported in the panic. if tx.db.StrictMode { ch := tx.Check() var errs []string @@ -235,6 +221,31 @@ func (tx *Tx) Commit() error { return nil } +func (tx *Tx) commitFreelist() error { + // Allocate new pages for the new free list. This will overestimate + // the size of the freelist but not underestimate the size (which would be bad). + opgid := tx.meta.pgid + p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) + if err != nil { + tx.rollback() + return err + } + if err := tx.db.freelist.write(p); err != nil { + tx.rollback() + return err + } + tx.meta.freelist = p.id + // If the high water mark has moved up then attempt to grow the database. + if tx.meta.pgid > opgid { + if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + tx.rollback() + return err + } + } + + return nil +} + // Rollback closes the transaction and ignores all previous updates. Read-only // transactions must be rolled back and not committed. func (tx *Tx) Rollback() error { @@ -242,17 +253,36 @@ func (tx *Tx) Rollback() error { if tx.db == nil { return ErrTxClosed } - tx.rollback() + tx.nonPhysicalRollback() return nil } +// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk. +func (tx *Tx) nonPhysicalRollback() { + if tx.db == nil { + return + } + if tx.writable { + tx.db.freelist.rollback(tx.meta.txid) + } + tx.close() +} + +// rollback needs to reload the free pages from disk in case some system error happens like fsync error. func (tx *Tx) rollback() { if tx.db == nil { return } if tx.writable { tx.db.freelist.rollback(tx.meta.txid) - tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + if !tx.db.hasSyncedFreelist() { + // Reconstruct free page list by scanning the DB to get the whole free page list. + // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. + tx.db.freelist.noSyncReload(tx.db.freepages()) + } else { + // Read free page list from freelist page. + tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + } } tx.close() } @@ -291,7 +321,9 @@ func (tx *Tx) close() { } // Copy writes the entire database to a writer. -// This function exists for backwards compatibility. Use WriteTo() instead. +// This function exists for backwards compatibility. +// +// Deprecated; Use WriteTo() instead. func (tx *Tx) Copy(w io.Writer) error { _, err := tx.WriteTo(w) return err @@ -301,11 +333,15 @@ func (tx *Tx) Copy(w io.Writer) error { // If err == nil then exactly tx.Size() bytes will be written into the writer. func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { // Attempt to open reader with WriteFlag - f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) + f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) if err != nil { return 0, err } - defer func() { _ = f.Close() }() + defer func() { + if cerr := f.Close(); err == nil { + err = cerr + } + }() // Generate a meta page. We use the same page data for both meta pages. buf := make([]byte, tx.db.pageSize) @@ -333,7 +369,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { } // Move past the meta pages in the file. - if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { + if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil { return n, fmt.Errorf("seek: %s", err) } @@ -344,19 +380,19 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { return n, err } - return n, f.Close() + return n, nil } // CopyFile copies the entire database to file at the given path. // A reader transaction is maintained during the copy so it is safe to continue // using the database while a copy is in progress. func (tx *Tx) CopyFile(path string, mode os.FileMode) error { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) + f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) if err != nil { return err } - err = tx.Copy(f) + _, err = tx.WriteTo(f) if err != nil { _ = f.Close() return err @@ -379,9 +415,14 @@ func (tx *Tx) Check() <-chan error { } func (tx *Tx) check(ch chan error) { + // Force loading free list if opened in ReadOnly mode. + tx.db.loadFreelist() + // Check if any pages are double freed. freed := make(map[pgid]bool) - for _, id := range tx.db.freelist.all() { + all := make([]pgid, tx.db.freelist.count()) + tx.db.freelist.copyall(all) + for _, id := range all { if freed[id] { ch <- fmt.Errorf("page %d: already freed", id) } @@ -392,8 +433,10 @@ func (tx *Tx) check(ch chan error) { reachable := make(map[pgid]*page) reachable[0] = tx.page(0) // meta0 reachable[1] = tx.page(1) // meta1 - for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { - reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) + if tx.meta.freelist != pgidNoFreelist { + for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { + reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) + } } // Recursively check buckets. @@ -451,7 +494,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo // allocate returns a contiguous block of memory starting at a given page. func (tx *Tx) allocate(count int) (*page, error) { - p, err := tx.db.allocate(count) + p, err := tx.db.allocate(tx.meta.txid, count) if err != nil { return nil, err } @@ -460,7 +503,7 @@ func (tx *Tx) allocate(count int) (*page, error) { tx.pages[p.id] = p // Update statistics. - tx.stats.PageCount++ + tx.stats.PageCount += count tx.stats.PageAlloc += count * tx.db.pageSize return p, nil @@ -479,20 +522,18 @@ func (tx *Tx) write() error { // Write pages to disk in order. for _, p := range pages { - size := (int(p.overflow) + 1) * tx.db.pageSize + rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize) offset := int64(p.id) * int64(tx.db.pageSize) + var written uintptr // Write out page in "max allocation" sized chunks. - ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p)) for { - // Limit our write to our max allocation size. - sz := size + sz := rem if sz > maxAllocSize-1 { sz = maxAllocSize - 1 } + buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz)) - // Write chunk to disk. - buf := ptr[:sz] if _, err := tx.db.ops.writeAt(buf, offset); err != nil { return err } @@ -501,14 +542,14 @@ func (tx *Tx) write() error { tx.stats.Write++ // Exit inner for loop if we've written all the chunks. - size -= sz - if size == 0 { + rem -= sz + if rem == 0 { break } // Otherwise move offset forward and move pointer to next chunk. offset += int64(sz) - ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz])) + written += uintptr(sz) } } @@ -527,7 +568,7 @@ func (tx *Tx) write() error { continue } - buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize] + buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize) // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 for i := range buf { diff --git a/vendor/go.etcd.io/bbolt/unsafe.go b/vendor/go.etcd.io/bbolt/unsafe.go new file mode 100644 index 0000000000..c0e5037500 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/unsafe.go @@ -0,0 +1,39 @@ +package bbolt + +import ( + "reflect" + "unsafe" +) + +func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset) +} + +func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz) +} + +func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte { + // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // + // This memory is not allocated from C, but it is unmanaged by Go's + // garbage collector and should behave similarly, and the compiler + // should produce similar code. Note that this conversion allows a + // subslice to begin after the base address, with an optional offset, + // while the URL above does not cover this case and only slices from + // index 0. However, the wiki never says that the address must be to + // the beginning of a C allocation (or even that malloc was used at + // all), so this is believed to be correct. + return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j] +} + +// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by +// the slice parameter. This helper should be used over other direct +// manipulation of reflect.SliceHeader to prevent misuse, namely, converting +// from reflect.SliceHeader to a Go slice type. +func unsafeSlice(slice, data unsafe.Pointer, len int) { + s := (*reflect.SliceHeader)(slice) + s.Data = uintptr(data) + s.Cap = len + s.Len = len +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 5b7c9002cf..cc5033bafe 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -28,9 +28,6 @@ github.com/btcsuite/btcutil/hdkeychain github.com/btcsuite/btcutil/txsort # github.com/cespare/cp v1.1.1 ## explicit -# github.com/coreos/bbolt v1.3.0 -## explicit -github.com/coreos/bbolt # github.com/davecgh/go-spew v1.1.1 github.com/davecgh/go-spew/spew # github.com/deckarep/golang-set v1.7.1 @@ -146,6 +143,9 @@ github.com/tklauser/numcpus ## explicit github.com/tyler-smith/go-bip39 github.com/tyler-smith/go-bip39/wordlists +# go.etcd.io/bbolt v1.3.6 +## explicit +go.etcd.io/bbolt # golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2 ## explicit golang.org/x/crypto/blake2b