From 61b2738d6a4e5eb7fdafe8dad82254839e617fa6 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Tue, 13 Jul 2021 12:22:33 +0100 Subject: [PATCH 1/2] Unexport unecesserry index APIs and integrate with mulitcodec Use the codec dedicated to CAR index sorted when marshalling and unmarshalling `indexSorted`. Note, the code depends on a specific commit of `go-multicodec` `master` branch. This needs to be replaced with a tag once a release is made on the go-multicodec side later. Unexport index APIs that should not be exposed publicly. Remove `Builder` now that it is not needed anywhere. Move `insertionIndex` into `blockstore` package since that's the only place it is used. Introduce an index constructor that takes multicodec code and instantiates an index. Fix ignored errors in `indexsorted.go` during marshalling/unmarshlling. Rename index constructor functions to use consistent terminology; i.e. `new` instead if `mk`. Remove redundant TODOs in code. Relates to: - https://github.com/multiformats/go-multicodec/pull/46 --- v2/{index => blockstore}/insertionindex.go | 86 ++++++++++++---------- v2/blockstore/readwrite.go | 17 ++--- v2/go.mod | 1 + v2/go.sum | 2 + v2/index/errors.go | 8 +- v2/index/generator.go | 3 +- v2/index/index.go | 48 ++++++------ v2/index/indexgobhash.go | 10 ++- v2/index/indexhashed.go | 10 ++- v2/index/indexsorted.go | 23 ++++-- 10 files changed, 110 insertions(+), 98 deletions(-) rename v2/{index => blockstore}/insertionindex.go (58%) diff --git a/v2/index/insertionindex.go b/v2/blockstore/insertionindex.go similarity index 58% rename from v2/index/insertionindex.go rename to v2/blockstore/insertionindex.go index 78dace1d..c13d0ace 100644 --- a/v2/index/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -1,29 +1,36 @@ -package index +package blockstore import ( "bytes" "encoding/binary" + "errors" "fmt" "io" + "github.com/ipld/go-car/v2/index" + "github.com/multiformats/go-multicodec" + "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" "github.com/petar/GoLLRB/llrb" cbor "github.com/whyrusleeping/cbor/go" ) -type InsertionIndex struct { - items llrb.LLRB -} +var ( + errUnsupported = errors.New("not supported") + insertionIndexCodec = 0x300003 +) -func (ii *InsertionIndex) InsertNoReplace(key cid.Cid, n uint64) { - ii.items.InsertNoReplace(mkRecordFromCid(key, n)) -} +type ( + insertionIndex struct { + items llrb.LLRB + } -type recordDigest struct { - digest []byte - Record -} + recordDigest struct { + digest []byte + index.Record + } +) func (r recordDigest) Less(than llrb.Item) bool { other, ok := than.(recordDigest) @@ -33,7 +40,7 @@ func (r recordDigest) Less(than llrb.Item) bool { return bytes.Compare(r.digest, other.digest) < 0 } -func mkRecord(r Record) recordDigest { +func newRecordDigest(r index.Record) recordDigest { d, err := multihash.Decode(r.Hash()) if err != nil { panic(err) @@ -42,16 +49,20 @@ func mkRecord(r Record) recordDigest { return recordDigest{d.Digest, r} } -func mkRecordFromCid(c cid.Cid, at uint64) recordDigest { +func newRecordFromCid(c cid.Cid, at uint64) recordDigest { d, err := multihash.Decode(c.Hash()) if err != nil { panic(err) } - return recordDigest{d.Digest, Record{Cid: c, Idx: at}} + return recordDigest{d.Digest, index.Record{Cid: c, Idx: at}} } -func (ii *InsertionIndex) Get(c cid.Cid) (uint64, error) { +func (ii *insertionIndex) insertNoReplace(key cid.Cid, n uint64) { + ii.items.InsertNoReplace(newRecordFromCid(key, n)) +} + +func (ii *insertionIndex) Get(c cid.Cid) (uint64, error) { d, err := multihash.Decode(c.Hash()) if err != nil { return 0, err @@ -59,7 +70,7 @@ func (ii *InsertionIndex) Get(c cid.Cid) (uint64, error) { entry := recordDigest{digest: d.Digest} e := ii.items.Get(entry) if e == nil { - return 0, ErrNotFound + return 0, index.ErrNotFound } r, ok := e.(recordDigest) if !ok { @@ -69,13 +80,11 @@ func (ii *InsertionIndex) Get(c cid.Cid) (uint64, error) { return r.Record.Idx, nil } -func (ii *InsertionIndex) Marshal(w io.Writer) error { +func (ii *insertionIndex) Marshal(w io.Writer) error { if err := binary.Write(w, binary.LittleEndian, int64(ii.items.Len())); err != nil { return err } - var err error - iter := func(i llrb.Item) bool { if err = cbor.Encode(w, i.(recordDigest).Record); err != nil { return false @@ -86,30 +95,29 @@ func (ii *InsertionIndex) Marshal(w io.Writer) error { return err } -func (ii *InsertionIndex) Unmarshal(r io.Reader) error { - var len int64 - if err := binary.Read(r, binary.LittleEndian, &len); err != nil { +func (ii *insertionIndex) Unmarshal(r io.Reader) error { + var length int64 + if err := binary.Read(r, binary.LittleEndian, &length); err != nil { return err } d := cbor.NewDecoder(r) - for i := int64(0); i < len; i++ { - var rec Record + for i := int64(0); i < length; i++ { + var rec index.Record if err := d.Decode(&rec); err != nil { return err } - ii.items.InsertNoReplace(mkRecord(rec)) + ii.items.InsertNoReplace(newRecordDigest(rec)) } return nil } -// Codec identifies this index format -func (ii *InsertionIndex) Codec() Codec { - return IndexInsertion +func (ii *insertionIndex) Codec() multicodec.Code { + return multicodec.Code(insertionIndexCodec) } -func (ii *InsertionIndex) Load(rs []Record) error { +func (ii *insertionIndex) Load(rs []index.Record) error { for _, r := range rs { - rec := mkRecord(r) + rec := newRecordDigest(r) if rec.digest == nil { return fmt.Errorf("invalid entry: %v", r) } @@ -118,15 +126,17 @@ func (ii *InsertionIndex) Load(rs []Record) error { return nil } -func mkInsertion() Index { - ii := InsertionIndex{} - return &ii +func newInsertionIndex() *insertionIndex { + return &insertionIndex{} } -// Flatten returns a 'indexsorted' formatted index for more efficient subsequent loading -func (ii *InsertionIndex) Flatten() (Index, error) { - si := BuildersByCodec[IndexSorted]() - rcrds := make([]Record, ii.items.Len()) +// flatten returns a 'indexsorted' formatted index for more efficient subsequent loading +func (ii *insertionIndex) flatten() (index.Index, error) { + si, err := index.NewFromCodec(multicodec.CarIndexSorted) + if err != nil { + return nil, err + } + rcrds := make([]index.Record, ii.items.Len()) idx := 0 iter := func(i llrb.Item) bool { @@ -142,7 +152,7 @@ func (ii *InsertionIndex) Flatten() (Index, error) { return si, nil } -func (ii *InsertionIndex) HasExactCID(c cid.Cid) bool { +func (ii *insertionIndex) hasExactCID(c cid.Cid) bool { d, err := multihash.Decode(c.Hash()) if err != nil { panic(err) diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 9cfd6381..c4bb3e42 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -37,7 +37,7 @@ type ReadWrite struct { f *os.File carV1Writer *internalio.OffsetWriteSeeker ReadOnly - idx *index.InsertionIndex + idx *insertionIndex header carv2.Header dedupCids bool @@ -119,16 +119,11 @@ func NewReadWrite(path string, roots []cid.Cid, opts ...Option) (*ReadWrite, err } }() - idxBuilder, ok := index.BuildersByCodec[index.IndexInsertion] - if !ok { - return nil, fmt.Errorf("unknownindex codec: %#v", index.IndexInsertion) - } - // Instantiate block store. // Set the header fileld before applying options since padding options may modify header. rwbs := &ReadWrite{ f: f, - idx: (idxBuilder()).(*index.InsertionIndex), + idx: newInsertionIndex(), header: carv2.NewHeader(0), } for _, opt := range opts { @@ -235,7 +230,7 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { if err != nil { return err } - b.idx.InsertNoReplace(c, uint64(frameOffset)) + b.idx.insertNoReplace(c, uint64(frameOffset)) // Seek to the next frame by skipping the block. // The frame length includes the CID, so subtract it. @@ -270,7 +265,7 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error { for _, bl := range blks { c := bl.Cid() - if b.dedupCids && b.idx.HasExactCID(c) { + if b.dedupCids && b.idx.hasExactCID(c) { continue } @@ -278,7 +273,7 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error { if err := util.LdWrite(b.carV1Writer, c.Bytes(), bl.RawData()); err != nil { return err } - b.idx.InsertNoReplace(c, n) + b.idx.insertNoReplace(c, n) } return nil } @@ -302,7 +297,7 @@ func (b *ReadWrite) Finalize() error { defer b.Close() // TODO if index not needed don't bother flattening it. - fi, err := b.idx.Flatten() + fi, err := b.idx.flatten() if err != nil { return err } diff --git a/v2/go.mod b/v2/go.mod index 75926c8c..093216d0 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -10,6 +10,7 @@ require ( github.com/ipfs/go-ipld-format v0.2.0 github.com/ipfs/go-merkledag v0.3.2 github.com/mattn/go-colorable v0.1.8 // indirect + github.com/multiformats/go-multicodec v0.2.1-0.20210713081508-b421db6850ae github.com/multiformats/go-multihash v0.0.15 github.com/multiformats/go-varint v0.0.6 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 diff --git a/v2/go.sum b/v2/go.sum index deca2a5b..2d8a07c6 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -398,6 +398,8 @@ github.com/multiformats/go-multiaddr-net v0.0.1/go.mod h1:nw6HSxNmCIQH27XPGBuX+d github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk= github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= +github.com/multiformats/go-multicodec v0.2.1-0.20210713081508-b421db6850ae h1:wfljHPpiR0UDOjeqld9ds0Zxl3Nt/j+0wnvyBc01JgY= +github.com/multiformats/go-multicodec v0.2.1-0.20210713081508-b421db6850ae/go.mod h1:qGGaQmioCDh+TeFOnxrbU0DaIPw8yFgAZgFG0V7p1qQ= github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= github.com/multiformats/go-multihash v0.0.5/go.mod h1:lt/HCbqlQwlPBz7lv0sQCdtfcMtlJvakRUn/0Ual8po= github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= diff --git a/v2/index/errors.go b/v2/index/errors.go index fba7afba..1a10a984 100644 --- a/v2/index/errors.go +++ b/v2/index/errors.go @@ -2,9 +2,5 @@ package index import "errors" -var ( - // ErrNotFound signals a record is not found in the index. - ErrNotFound = errors.New("not found") - // errUnsupported signals unsupported operation by an index. - errUnsupported = errors.New("not supported") -) +// ErrNotFound signals a record is not found in the index. +var ErrNotFound = errors.New("not found") diff --git a/v2/index/generator.go b/v2/index/generator.go index 1781f8dc..f7c5075a 100644 --- a/v2/index/generator.go +++ b/v2/index/generator.go @@ -30,7 +30,6 @@ func Generate(v1 io.ReadSeeker) (Index, error) { return nil, fmt.Errorf("error reading car header: %w", err) } - // TODO: Generate should likely just take an io.ReadSeeker. // TODO: ensure the input's header version is 1. offset, err := carv1.HeaderSize(header) @@ -38,7 +37,7 @@ func Generate(v1 io.ReadSeeker) (Index, error) { return nil, err } - idx := mkSorted() + idx := newSorted() records := make([]Record, 0) diff --git a/v2/index/index.go b/v2/index/index.go index 53df02fb..97fbbd63 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -7,6 +7,8 @@ import ( "io" "os" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-varint" internalio "github.com/ipld/go-car/v2/internal/io" @@ -16,22 +18,14 @@ import ( // Codec table is a first var-int in CAR indexes const ( - IndexSorted Codec = 0x0400 // as per https://github.com/multiformats/multicodec/pull/220 - - // TODO: unexport these before the final release, probably - IndexHashed Codec = 0x300000 + iota - IndexSingleSorted - IndexGobHashed - IndexInsertion + indexHashed codec = 0x300000 + iota + indexSingleSorted + indexGobHashed ) type ( - // Codec is used as a multicodec identifier for CAR index files - // TODO: use go-multicodec before the final release - Codec int - - // Builder is a constructor for an index type - Builder func() Index + // codec is used as a multicodec identifier for CAR index files + codec int // Record is a pre-processed record of a car item and location. Record struct { @@ -41,7 +35,7 @@ type ( // Index provides an interface for looking up byte offset of a given CID. Index interface { - Codec() Codec + Codec() multicodec.Code Marshal(w io.Writer) error Unmarshal(r io.Reader) error Get(cid.Cid) (uint64, error) @@ -49,14 +43,14 @@ type ( } ) -// BuildersByCodec holds known index formats -// TODO: turn this into a func before the final release? -var BuildersByCodec = map[Codec]Builder{ - IndexHashed: mkHashed, - IndexSorted: mkSorted, - IndexSingleSorted: mkSingleSorted, - IndexGobHashed: mkGobHashed, - IndexInsertion: mkInsertion, +// NewFromCodec constructs a new index corresponding to the given codec. +func NewFromCodec(codec multicodec.Code) (Index, error) { + switch codec { + case multicodec.CarIndexSorted: + return newSorted(), nil + default: + return nil, fmt.Errorf("unknwon index codec: %v", codec) + } } // Save writes a generated index into the given `path`. @@ -97,15 +91,15 @@ func WriteTo(idx Index, w io.Writer) error { // Returns error if the encoding is not known. func ReadFrom(r io.Reader) (Index, error) { reader := bufio.NewReader(r) - codec, err := varint.ReadUvarint(reader) + code, err := varint.ReadUvarint(reader) if err != nil { return nil, err } - builder, ok := BuildersByCodec[Codec(codec)] - if !ok { - return nil, fmt.Errorf("unknown codec: %d", codec) + codec := multicodec.Code(code) + idx, err := NewFromCodec(codec) + if err != nil { + return nil, err } - idx := builder() if err := idx.Unmarshal(reader); err != nil { return nil, err } diff --git a/v2/index/indexgobhash.go b/v2/index/indexgobhash.go index 9e61001f..a74e8b92 100644 --- a/v2/index/indexgobhash.go +++ b/v2/index/indexgobhash.go @@ -4,9 +4,12 @@ import ( "encoding/gob" "io" + "github.com/multiformats/go-multicodec" + "github.com/ipfs/go-cid" ) +//lint:ignore U1000 kept for potential future use. type mapGobIndex map[cid.Cid]uint64 func (m *mapGobIndex) Get(c cid.Cid) (uint64, error) { @@ -27,8 +30,8 @@ func (m *mapGobIndex) Unmarshal(r io.Reader) error { return d.Decode(m) } -func (m *mapGobIndex) Codec() Codec { - return IndexHashed +func (m *mapGobIndex) Codec() multicodec.Code { + return multicodec.Code(indexHashed) } func (m *mapGobIndex) Load(rs []Record) error { @@ -38,7 +41,8 @@ func (m *mapGobIndex) Load(rs []Record) error { return nil } -func mkGobHashed() Index { +//lint:ignore U1000 kept for potential future use. +func newGobHashed() Index { mi := make(mapGobIndex) return &mi } diff --git a/v2/index/indexhashed.go b/v2/index/indexhashed.go index b24a9014..84b0ad15 100644 --- a/v2/index/indexhashed.go +++ b/v2/index/indexhashed.go @@ -3,10 +3,13 @@ package index import ( "io" + "github.com/multiformats/go-multicodec" + "github.com/ipfs/go-cid" cbor "github.com/whyrusleeping/cbor/go" ) +//lint:ignore U1000 kept for potential future use. type mapIndex map[cid.Cid]uint64 func (m *mapIndex) Get(c cid.Cid) (uint64, error) { @@ -26,8 +29,8 @@ func (m *mapIndex) Unmarshal(r io.Reader) error { return d.Decode(m) } -func (m *mapIndex) Codec() Codec { - return IndexHashed +func (m *mapIndex) Codec() multicodec.Code { + return multicodec.Code(indexHashed) } func (m *mapIndex) Load(rs []Record) error { @@ -37,7 +40,8 @@ func (m *mapIndex) Load(rs []Record) error { return nil } -func mkHashed() Index { +//lint:ignore U1000 kept for potential future use. +func newHashed() Index { mi := make(mapIndex) return &mi } diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index b7d27510..a8f401ae 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -7,6 +7,8 @@ import ( "io" "sort" + "github.com/multiformats/go-multicodec" + "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" ) @@ -42,8 +44,8 @@ func (r recordSet) Swap(i, j int) { r[i], r[j] = r[j], r[i] } -func (s *singleWidthIndex) Codec() Codec { - return IndexSingleSorted +func (s *singleWidthIndex) Codec() multicodec.Code { + return multicodec.Code(indexSingleSorted) } func (s *singleWidthIndex) Marshal(w io.Writer) error { @@ -124,12 +126,14 @@ func (m *multiWidthIndex) Get(c cid.Cid) (uint64, error) { return 0, ErrNotFound } -func (m *multiWidthIndex) Codec() Codec { - return IndexSorted +func (m *multiWidthIndex) Codec() multicodec.Code { + return multicodec.CarIndexSorted } func (m *multiWidthIndex) Marshal(w io.Writer) error { - binary.Write(w, binary.LittleEndian, int32(len(*m))) + if err := binary.Write(w, binary.LittleEndian, int32(len(*m))); err != nil { + return err + } // The widths are unique, but ranging over a map isn't deterministic. // As per the CARv2 spec, we must order buckets by digest length. @@ -153,7 +157,9 @@ func (m *multiWidthIndex) Marshal(w io.Writer) error { func (m *multiWidthIndex) Unmarshal(r io.Reader) error { var l int32 - binary.Read(r, binary.LittleEndian, &l) + if err := binary.Read(r, binary.LittleEndian, &l); err != nil { + return err + } for i := 0; i < int(l); i++ { s := singleWidthIndex{} if err := s.Unmarshal(r); err != nil { @@ -199,12 +205,13 @@ func (m *multiWidthIndex) Load(items []Record) error { return nil } -func mkSorted() Index { +func newSorted() Index { m := make(multiWidthIndex) return &m } -func mkSingleSorted() Index { +//lint:ignore U1000 kept for potential future use. +func newSingleSorted() Index { s := singleWidthIndex{} return &s } From 6bbbf82e0b936114261dc3fb60a2c302ed6d235f Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Tue, 13 Jul 2021 12:39:42 +0100 Subject: [PATCH 2/2] Address review comments * Rename constructor of index by codec to a simpler name and update docs. * Use multicodec.Code as constant instead of wrappint unit64 every time. --- v2/blockstore/insertionindex.go | 6 +++--- v2/index/index.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index c13d0ace..8e664eac 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -18,7 +18,7 @@ import ( var ( errUnsupported = errors.New("not supported") - insertionIndexCodec = 0x300003 + insertionIndexCodec = multicodec.Code(0x300003) ) type ( @@ -112,7 +112,7 @@ func (ii *insertionIndex) Unmarshal(r io.Reader) error { } func (ii *insertionIndex) Codec() multicodec.Code { - return multicodec.Code(insertionIndexCodec) + return insertionIndexCodec } func (ii *insertionIndex) Load(rs []index.Record) error { @@ -132,7 +132,7 @@ func newInsertionIndex() *insertionIndex { // flatten returns a 'indexsorted' formatted index for more efficient subsequent loading func (ii *insertionIndex) flatten() (index.Index, error) { - si, err := index.NewFromCodec(multicodec.CarIndexSorted) + si, err := index.New(multicodec.CarIndexSorted) if err != nil { return nil, err } diff --git a/v2/index/index.go b/v2/index/index.go index 97fbbd63..4c66ac13 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -43,8 +43,8 @@ type ( } ) -// NewFromCodec constructs a new index corresponding to the given codec. -func NewFromCodec(codec multicodec.Code) (Index, error) { +// New constructs a new index corresponding to the given CAR index codec. +func New(codec multicodec.Code) (Index, error) { switch codec { case multicodec.CarIndexSorted: return newSorted(), nil @@ -96,7 +96,7 @@ func ReadFrom(r io.Reader) (Index, error) { return nil, err } codec := multicodec.Code(code) - idx, err := NewFromCodec(codec) + idx, err := New(codec) if err != nil { return nil, err }