Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Legacy receipt converter tool #23454

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
9351cac
core/types: add legacy receipt encoding, conversion test
s1na Aug 11, 2021
6bae0aa
core/types: add convertLegacyStoredReceipt method
s1na Aug 11, 2021
3a3ca13
core/types: encode list of legacy receipts
s1na Aug 12, 2021
b250783
core/types: add method checking if receipt is legacy
s1na Aug 12, 2021
45b73a3
cmd/geth: add boilerplate for freezer-migrate cmd
s1na Aug 12, 2021
7474ef0
core/types: add method for converting list of legacy receipts
s1na Aug 17, 2021
39e5aee
cmd/geth: write converted receipts to tmp freezer file
s1na Aug 17, 2021
b1c67fd
cmd,core,ethdb: drop receipts table after copying over new receipts
s1na Aug 19, 2021
82f36c9
cmd,core,ethdb: move logic to transform table fn
s1na Aug 24, 2021
05dade6
core/rawdb: remove getFileBounds
s1na Aug 24, 2021
dce1336
core/rawdb: copy over rest of index bits
s1na Aug 27, 2021
16c55dc
core/rawdb: drop table append
s1na Nov 9, 2021
710b7d9
core/rawdb: use new freezer funcs
s1na Nov 10, 2021
16286fa
core/rawdb: forgot batch commit
s1na Nov 10, 2021
05812de
core/rawdb: comment, minor fixes
s1na Nov 15, 2021
99c1a32
core/rawdb: handle clean switch-over boundary
s1na Nov 16, 2021
66b5285
core/rawdb: fix off-by-one err
s1na Nov 16, 2021
8092cdd
core/rawdb: add tableFilePath method
s1na Nov 16, 2021
26ec367
cmd,core: rename files after switchover
s1na Nov 16, 2021
fd000bc
core/rawdb: fix file juggling
s1na Nov 17, 2021
e4028b6
core/rawdb: remove DropTable method
s1na Nov 17, 2021
e3b1daa
ethdb: minor
s1na Nov 17, 2021
0d22403
core/rawdb: improve readability
s1na Nov 17, 2021
8466887
core/rawdb: fix initial legacy check
s1na Nov 17, 2021
eb306b6
Add start param to migrateTable
s1na Nov 17, 2021
76426e3
check batch append err
s1na Nov 18, 2021
1279bb3
drop start param from migrateTable
s1na Nov 18, 2021
7338e21
add license to legacy file
s1na Nov 18, 2021
63fb341
rm helper funcs and their tests
s1na Nov 18, 2021
fa28df5
freezer: rm unneeded line
s1na Nov 30, 2021
6bef481
core, ethdb: add comments
s1na Nov 30, 2021
2b5b633
Merge remote-tracking branch 's1na/legacy-receipt-converter' into leg…
s1na Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions cmd/geth/dbcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/console/prompt"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
Expand Down Expand Up @@ -69,6 +70,7 @@ Remove blockchain and state databases`,
dbDumpFreezerIndex,
dbImportCmd,
dbExportCmd,
dbMigrateFreezer,
},
}
dbInspectCmd = cli.Command{
Expand Down Expand Up @@ -232,6 +234,21 @@ WARNING: This is a low-level operation which may cause database corruption!`,
},
Description: "Exports the specified chain data to an RLP encoded stream, optionally gzip-compressed.",
}
dbMigrateFreezer = cli.Command{
Action: utils.MigrateFlags(freezerMigrate),
Name: "freezer-migrate",
Usage: "Migrate legacy parts of the freezer. (WARNING: may take a long time)",
ArgsUsage: "",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.SyncModeFlag,
utils.MainnetFlag,
utils.RopstenFlag,
utils.RinkebyFlag,
utils.GoerliFlag,
},
Description: "The import command imports the specific chain data from an RLP encoded stream.",
}
)

func removeDB(ctx *cli.Context) error {
Expand Down Expand Up @@ -684,3 +701,84 @@ func exportChaindata(ctx *cli.Context) error {
db := utils.MakeChainDatabase(ctx, stack, true)
return utils.ExportChaindata(ctx.Args().Get(1), kind, exporter(db), stop)
}

func freezerMigrate(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()

db := utils.MakeChainDatabase(ctx, stack, false)
defer db.Close()

// Check first block for legacy receipt format
numAncients, err := db.Ancients()
if err != nil {
return err
}
if numAncients < 1 {
log.Info("No blocks in freezer to migrate")
return nil
}

// Find first block with non-empty receipt
firstIdx := uint64(0)
emptyRLPList := []byte{192}
for i := uint64(0); i < numAncients; i++ {
r, err := db.Ancient("receipts", i)
if err != nil {
return err
}
if len(r) == 0 {
continue
}
if !bytes.Equal(r, emptyRLPList) {
firstIdx = i
break
}
}
// Is first non-empty receipt legacy?
first, err := db.Ancient("receipts", firstIdx)
if err != nil {
return err
}
isFirstLegacy, err := types.IsLegacyStoredReceipts(first)
if err != nil {
return err
}
if !isFirstLegacy {
log.Info("No legacy receipts to migrate", "number", firstIdx)
return nil
}
log.Info("First legacy receipt", "number", firstIdx)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Afaict, this is the last time you make any use of firstIdx: the rest of the code starts over from 0 and feeds it into the transformer. And the transformer will (?) return stop=true for the first empty receipt list, so I don't see how this can work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to double-check to see why its working :) so an empty receipt list is both legacy and non-legacy and transformer will ask is this legacy? it is so stop=false. stop will be true only for the first non-empty non-legacy receipt list

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I was planning to pass firstIdx to MigrateTable so we could start from there but we also need to copy over the first empty receipts so I decided to keep MigrateTable as is and only use firstIdx to determine whether the db is in the old format.


transformer := func(blob []byte) ([]byte, bool, error) {
// Stop when first v5 receipt is spotted.
// TODO: Combine detecting legacy and converting it
// to avoid 2 decoding.
legacy, err := types.IsLegacyStoredReceipts(blob)
if err != nil {
return nil, false, err
}
if !legacy {
return blob, true, nil
}

out, err := types.ConvertLegacyStoredReceipts(blob)
if err != nil {
return nil, false, err
}
return out, false, nil
}

log.Info("Starting migration", "ancients", numAncients)
start := time.Now()
if err := db.MigrateTable("receipts", transformer); err != nil {
return err
}

if err := db.Close(); err != nil {
return err
}
log.Info("Migration finished", "duration", time.Since(start))

return nil
}
4 changes: 4 additions & 0 deletions core/rawdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ func (db *nofreezedb) ReadAncients(fn func(reader ethdb.AncientReader) error) (e
return fn(db)
}

func (db *nofreezedb) MigrateTable(_ string, _ TransformerFn) error {
return errNotSupported
}

// NewDatabase creates a high level database on top of a given key-value data
// store without a freezer moving immutable chain segments into cold storage.
func NewDatabase(db ethdb.KeyValueStore) ethdb.Database {
Expand Down
167 changes: 167 additions & 0 deletions core/rawdb/freezer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package rawdb
import (
"errors"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
Expand Down Expand Up @@ -546,3 +547,169 @@ func (f *freezer) freezeRange(nfdb *nofreezedb, number, limit uint64) (hashes []

return hashes, err
}

// TransformerFn takes a freezer entry in an older format and returns
// the same in a new format. The second return argument determines
// if the entry is not in the legacy format. Note that empty items can
// be considered both legacy and non-legacy.
type TransformerFn = func([]byte) ([]byte, bool, error)

// MigrateTable processes the entries in a given table in sequence
// converting them to a new format if they're of an old format.
func (f *freezer) MigrateTable(kind string, fn TransformerFn) error {
if f.readonly {
return errReadOnly
}
f.writeLock.Lock()
defer f.writeLock.Unlock()

table, ok := f.tables[kind]
if !ok {
return errUnknownTable
}
ancientsPath := filepath.Dir(table.index.Name())
// Set up new dir for the migrated table, the content of which
// we'll at the end move over to the ancients dir.
migrationPath := filepath.Join(ancientsPath, "migration")
newTable, err := NewFreezerTable(migrationPath, kind, FreezerNoSnappy[kind])
if err != nil {
return err
}
batch := newTable.newBatch()

numAncients, err := f.Ancients()
if err != nil {
return err
}
i := uint64(0)
// Number of the file in which the first up-to-date receipt appers
var filenum uint32
copyOver := true
// Iterate through entries and transform them
// until reaching first non-legacy one.
for ; i < numAncients; i++ {
if i%500000 == 0 {
log.Info("Processing legacy elements", "number", i)
}
blob, err := table.Retrieve(i)
if err != nil {
return err
}
out, stop, err := fn(blob)
if err != nil {
return err
}
if !stop {
// Entry is legacy, push transformed version
if err := batch.AppendRaw(i, out); err != nil {
return err
}
} else {
// Reached the first up-to-date entry.
// Remember in which file the switch happens.
indices, err := table.getIndices(i, 1)
if err != nil {
return err
}
// First non-legacy entry coincidentally is located in a new
// file and we have a clean switch-over boundary. No need to
// copy over further elements.
if indices[0].filenum != indices[1].filenum {
copyOver = false
}
filenum = indices[1].filenum
log.Info("Found first non-legacy element", "number", i, "filenum", filenum, "copyOver", copyOver)
break
}
}
if copyOver {
// Copy over left-over receipts in the file with last legacy receipt:
// 1. loop getBounds until filenum exceeds threshold filenum
// 2. copy verbatim to new table
for ; i < numAncients; i++ {
indices, err := table.getIndices(i, 1)
if err != nil {
return err
}
if indices[1].filenum > filenum {
log.Info("Copied over rest of switch-over file", "number", i, "nextFile", indices[1].filenum)
break
}
blob, err := table.Retrieve(i)
if err != nil {
return err
}
if err := batch.AppendRaw(i, blob); err != nil {
return err
}
}
}

if err := batch.commit(); err != nil {
return err
}

var fileCountDiff int32
toRename := make(map[uint32]struct{})

// 3. need to copy rest of old index and repair the filenum in the entries
if i < numAncients {
indices, err := table.getIndices(i, 1)
if err != nil {
return err
}

log.Info("Duplicating rest of index", "fromFile", indices[1].filenum, "toFile", newTable.headId+1)
fileCountDiff = int32(newTable.headId+1) - int32(indices[1].filenum)
for ; i < numAncients; i++ {
indices, err := table.getIndices(i, 1)
if err != nil {
return err
}
idx := indexEntry{
// (idx.filenum + fileCountDiff) is always > 0
filenum: uint32(int32(indices[1].filenum) + fileCountDiff),
offset: indices[1].offset,
}
newTable.writeEntry(idx)
toRename[indices[1].filenum] = struct{}{}
}
}

log.Info("Replacing table files")
// Warning: file juggling to follow.
// First release open table files because we need to move some of them.
table.releaseFilesAfter(0, false)
// Move table files after the switchover point to migration dir.
for k := range toRename {
if err := os.Rename(table.tableFilePath(k), newTable.tableFilePath(uint32(int32(k)+fileCountDiff))); err != nil {
return err
}
}

// Now we can delete all the rest.
if err := table.deleteFiles(); err != nil {
return err
}

// Move migrated files to ancients dir.
if err := newTable.Close(); err != nil {
return err
}
files, err := ioutil.ReadDir(migrationPath)
if err != nil {
return err
}
for _, f := range files {
// This will replace the index + table files up to and including the switchover file.
if err := os.Rename(filepath.Join(migrationPath, f.Name()), filepath.Join(ancientsPath, f.Name())); err != nil {
return err
}
}
// Delete by now empty dir.
if err := os.Remove(migrationPath); err != nil {
return err
}

return nil
}
44 changes: 37 additions & 7 deletions core/rawdb/freezer_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,13 +431,8 @@ func (t *freezerTable) Close() error {
func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) {
var exist bool
if f, exist = t.files[num]; !exist {
var name string
if t.noCompression {
name = fmt.Sprintf("%s.%04d.rdat", t.name, num)
} else {
name = fmt.Sprintf("%s.%04d.cdat", t.name, num)
}
f, err = opener(filepath.Join(t.path, name))
path := t.tableFilePath(num)
f, err = opener(path)
if err != nil {
return nil, err
}
Expand All @@ -446,6 +441,17 @@ func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error
return f, err
}

// tableFileName returns path to a table file of a given index.
func (t *freezerTable) tableFilePath(num uint32) string {
var name string
if t.noCompression {
name = fmt.Sprintf("%s.%04d.rdat", t.name, num)
} else {
name = fmt.Sprintf("%s.%04d.cdat", t.name, num)
}
return filepath.Join(t.path, name)
}

// releaseFile closes a file, and removes it from the open file cache.
// Assumes that the caller holds the write lock
func (t *freezerTable) releaseFile(num uint32) {
Expand All @@ -468,6 +474,20 @@ func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) {
}
}

// deleteFiles deletes table files based on tail and head
// indices, and assumes these files are not open.
func (t *freezerTable) deleteFiles() error {
t.lock.Lock()
defer t.lock.Unlock()
for i := t.tailId; i < t.headId; i++ {
path := t.tableFilePath(i)
if err := os.Remove(path); err != nil {
return err
}
}
return nil
}

// getIndices returns the index entries for the given from-item, covering 'count' items.
// N.B: The actual number of returned indices for N items will always be N+1 (unless an
// error is returned).
Expand Down Expand Up @@ -740,3 +760,13 @@ func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
}
fmt.Fprintf(w, "|--------------------------|\n")
}

// low-level, doesnt increase counters, assumes lock
func (t *freezerTable) writeEntry(idx indexEntry) error {
// Ensure the table is still accessible
if t.index == nil {
return errClosed
}
_, err := t.index.Write(idx.append([]byte{}))
return err
}
6 changes: 6 additions & 0 deletions core/rawdb/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ func (t *table) Sync() error {
return t.db.Sync()
}

// MigrateTable processes the entries in a given table in sequence
// converting them to a new format if they're of an old format.
func (t *table) MigrateTable(kind string, fn TransformerFn) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring

return t.db.MigrateTable(kind, fn)
}

// Put inserts the given value into the database at a prefixed version of the
// provided key.
func (t *table) Put(key []byte, value []byte) error {
Expand Down
Loading