From 2a1cd355824c8eaa2bbca003e7ce3cdfdf41d294 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 29 Sep 2022 20:33:58 +1000 Subject: [PATCH] feat: check links on setting and sanitise on encoding Attempt to keep ProtoNode always in a state that can be encoded without errors to avoid cases where the panicing methods may be forced to panic on error. go-codec-dagpb will error when encoding either of the following cases: * The Hash field in links should always be set, cannot be cid.Undef * The Tsize field needs to fit into an int64, otherwise it'll overflow to negative which is not allowed Error on cases where a user may attempt to set links that will eventually error on encode. Then when we do encode, silently handle these cases if they manage to slip through (e.g. if they come in from a decoded block with a bad form). --- coding.go | 19 ++++++--- merkledag_test.go | 101 +++++++++++++++++++++++++++++++++++++++++++++- node.go | 34 ++++++++++++++-- 3 files changed, 143 insertions(+), 11 deletions(-) diff --git a/coding.go b/coding.go index 0a71b0a..811e25f 100644 --- a/coding.go +++ b/coding.go @@ -80,13 +80,20 @@ func (n *ProtoNode) marshalImmutable() (*immutableProtoNode, error) { nd, err := qp.BuildMap(dagpb.Type.PBNode, 2, func(ma ipld.MapAssembler) { qp.MapEntry(ma, "Links", qp.List(int64(len(links)), func(la ipld.ListAssembler) { for _, link := range links { - qp.ListEntry(la, qp.Map(3, func(ma ipld.MapAssembler) { - if link.Cid.Defined() { + // it shouldn't be possible to get here with an undefined CID, but in + // case it is we're going to drop this link from the encoded form + // entirely + if link.Cid.Defined() { + qp.ListEntry(la, qp.Map(3, func(ma ipld.MapAssembler) { qp.MapEntry(ma, "Hash", qp.Link(cidlink.Link{Cid: link.Cid})) - } - qp.MapEntry(ma, "Name", qp.String(link.Name)) - qp.MapEntry(ma, "Tsize", qp.Int(int64(link.Size))) - })) + qp.MapEntry(ma, "Name", qp.String(link.Name)) + sz := int64(link.Size) + if sz < 0 { // overflow, >MaxInt64 is almost certainly an error + sz = 0 + } + qp.MapEntry(ma, "Tsize", qp.Int(sz)) + })) + } } })) if n.data != nil { diff --git a/merkledag_test.go b/merkledag_test.go index b433a32..e7ca4fb 100644 --- a/merkledag_test.go +++ b/merkledag_test.go @@ -3,10 +3,12 @@ package merkledag_test import ( "bytes" "context" + "encoding/hex" "encoding/json" "errors" "fmt" "io" + "math" "math/rand" "strings" "sync" @@ -28,6 +30,11 @@ import ( mh "github.com/multiformats/go-multihash" ) +var someCid cid.Cid = func() cid.Cid { + c, _ := cid.Cast([]byte{1, 85, 0, 5, 0, 1, 2, 3, 4}) + return c +}() + // makeDepthTestingGraph makes a small DAG with two levels. The level-two // nodes are both children of the root and of one of the level 1 nodes. // This is meant to test the Walk*Depth functions. @@ -108,6 +115,97 @@ func TestBadBuilderEncode(t *testing.T) { } } +func TestLinkChecking(t *testing.T) { + cases := []struct { + name string + fn func(*ProtoNode) error + }{ + { + name: "AddRawLink overflow Tsize", + fn: func(n *ProtoNode) error { + return n.AddRawLink("foo", &ipld.Link{Size: math.MaxUint64, Cid: someCid}) + }, + }, + + { + name: "AddRawLink undefined CID", + fn: func(n *ProtoNode) error { + return n.AddRawLink("foo", &ipld.Link{Cid: cid.Undef}) + }, + }, + + { + name: "SetLinks overflow Tsize", + fn: func(n *ProtoNode) error { + return n.SetLinks([]*ipld.Link{{Size: math.MaxUint64, Cid: someCid}}) + }, + }, + + { + name: "SetLinks undefined CID", + fn: func(n *ProtoNode) error { + return n.SetLinks([]*ipld.Link{{Cid: cid.Undef}}) + }, + }, + + { + name: "UnmarshalJSON overflow Tsize", + fn: func(n *ProtoNode) error { + return n.UnmarshalJSON([]byte(`{"data":null,"links":[{"Name":"","Size":18446744073709549568,"Cid":{"/":"QmNPWHBrVQiiV8FpyNuEPhB9E2rbvdy9Yx79EY1EJuyf9o"}}]}`)) + }, + }, + + { + name: "UnmarshalJSON undefined CID", + fn: func(n *ProtoNode) error { + return n.UnmarshalJSON([]byte(`{"data":null,"links":[{"Name":"","Size":100}]}`)) + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + n := NodeWithData([]byte("boop")) + err := tc.fn(n) + if err == nil { + t.Fatal("expected error") + } + }) + } + + t.Run("round-trip block with bad Tsize", func(t *testing.T) { + badblock, _ := hex.DecodeString("122f0a22122000bb3604d2ecd386227007c548249521fbb9a394e1e26460091d0a692888e7361880f0ffffffffffffff01") + n, err := DecodeProtobuf(badblock) + if err != nil { + t.Fatal(err) + } + // sanity + if len(n.Links()) != 1 { + t.Fatal("expected a link") + } + // sanity + if n.Links()[0].Size <= math.MaxInt64 { + t.Fatal("expected link Tsize to be oversized") + } + + // forced round-trip + byts, err := n.EncodeProtobuf(true) + if err != nil { + t.Fatal(err) + } + n, err = DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + if len(n.Links()) != 1 { + t.Fatal("expected a link") + } + if n.Links()[0].Size != 0 { + t.Fatal("expected link Tsize to be truncated on reencode") + } + }) +} + func TestNode(t *testing.T) { n1 := NodeWithData([]byte("beep")) @@ -604,7 +702,7 @@ func TestGetRawNodes(t *testing.T) { func TestProtoNodeResolve(t *testing.T) { nd := new(ProtoNode) - nd.SetLinks([]*ipld.Link{{Name: "foo"}}) + nd.SetLinks([]*ipld.Link{{Name: "foo", Cid: someCid}}) lnk, left, err := nd.ResolveLink([]string{"foo", "bar"}) if err != nil { @@ -959,7 +1057,6 @@ func TestLinkSorting(t *testing.T) { if err != nil { t.Fatal(err) } - someCid, _ := cid.Cast([]byte{1, 85, 0, 5, 0, 1, 2, 3, 4}) if err = node.AddRawLink("foo", &ipld.Link{ Size: 10, Cid: someCid, diff --git a/node.go b/node.go index 53fd0a4..1dad902 100644 --- a/node.go +++ b/node.go @@ -3,7 +3,9 @@ package merkledag import ( "context" "encoding/json" + "errors" "fmt" + "math" "sort" blocks "github.com/ipfs/go-block-format" @@ -163,11 +165,15 @@ func (n *ProtoNode) AddNodeLink(name string, that format.Node) error { // RemoveNodeLink for the same link, will not result in an identically encoded // form as the links will have been sorted. func (n *ProtoNode) AddRawLink(name string, l *format.Link) error { - n.links = append(n.links, &format.Link{ + lnk := &format.Link{ Name: name, Size: l.Size, Cid: l.Cid, - }) + } + if err := checkLink(lnk); err != nil { + return err + } + n.links = append(n.links, lnk) n.linksDirty = true // needs a sort n.encoded = nil return nil @@ -360,10 +366,26 @@ func (n *ProtoNode) UnmarshalJSON(b []byte) error { // them until we mutate this node since we're representing the current, // as-serialized state. So n.linksDirty is not set here. n.links = s.Links + for _, lnk := range s.Links { + if err := checkLink(lnk); err != nil { + return err + } + } + n.encoded = nil return nil } +func checkLink(lnk *format.Link) error { + if lnk.Size > math.MaxInt64 { + return fmt.Errorf("value of Tsize is too large: %d", lnk.Size) + } + if !lnk.Cid.Defined() { + return errors.New("link must have a value Cid value") + } + return nil +} + // MarshalJSON returns a JSON representation of the node. func (n *ProtoNode) MarshalJSON() ([]byte, error) { if n.linksDirty { @@ -429,10 +451,16 @@ func (n *ProtoNode) Links() []*format.Link { // SetLinks replaces the node links with a copy of the provided links. Sorting // will be applied to the list. -func (n *ProtoNode) SetLinks(links []*format.Link) { +func (n *ProtoNode) SetLinks(links []*format.Link) error { + for _, lnk := range links { + if err := checkLink(lnk); err != nil { + return err + } + } n.links = append([]*format.Link(nil), links...) n.linksDirty = true // needs a sort n.encoded = nil + return nil } // Resolve is an alias for ResolveLink.