Skip to content

Commit

Permalink
Merge pull request #358 from ipld/feat/selector-resume
Browse files Browse the repository at this point in the history
Implement option to start traversals at a path
  • Loading branch information
hannahhoward authored Mar 6, 2022
2 parents c10d43b + 0a71d45 commit 9e783aa
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 15 deletions.
6 changes: 4 additions & 2 deletions traversal/fns.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ type Progress struct {
Path datamodel.Path
Link datamodel.Link
}
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
SeenLinks map[datamodel.Link]struct{} // Set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
PastStartAtPath bool // Indicates whether the traversal has progressed passed the StartAtPath in the config -- use to avoid path checks when inside a sub portion of a DAG that is entirely inside the "not-skipped" portion of a traversal
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
SeenLinks map[datamodel.Link]struct{} // Set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
}

type Config struct {
Ctx context.Context // Context carried through a traversal. Optional; use it if you need cancellation.
LinkSystem linking.LinkSystem // LinkSystem used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used.
LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser // Chooser for Node implementations to produce during automatic link traversal.
LinkVisitOnlyOnce bool // By default, we visit across links wherever we see them again, even if we've visited them before, because the reason for visiting might be different than it was before since we got to it via a different path. If set to true, track links we've seen before in Progress.SeenLinks and do not visit them again. Note that sufficiently complex selectors may require valid revisiting of some links, so setting this to true can change behavior noticably and should be done with care.
StartAtPath datamodel.Path // If set, causes a traversal to skip forward until passing this path, and only then begins calling visit functions. Block loads will also be skipped wherever possible.
}

type Budget struct {
Expand Down
39 changes: 31 additions & 8 deletions traversal/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,16 @@ func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF
n = rn
}

// Decide if this node is matched -- do callbacks as appropriate.
if s.Decide(n) {
if err := fn(prog, n, VisitReason_SelectionMatch); err != nil {
return err
}
} else {
if err := fn(prog, n, VisitReason_SelectionCandidate); err != nil {
return err
if prog.Path.Len() >= prog.Cfg.StartAtPath.Len() || !prog.PastStartAtPath {
// Decide if this node is matched -- do callbacks as appropriate.
if s.Decide(n) {
if err := fn(prog, n, VisitReason_SelectionMatch); err != nil {
return err
}
} else {
if err := fn(prog, n, VisitReason_SelectionCandidate); err != nil {
return err
}
}
}
// If we're handling scalars (e.g. not maps and lists) we can return now.
Expand All @@ -211,11 +213,23 @@ func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF
}

func (prog Progress) walkAdv_iterateAll(n datamodel.Node, s selector.Selector, fn AdvVisitFn) error {
var reachedStartAtPath bool
for itr := selector.NewSegmentIterator(n); !itr.Done(); {
if reachedStartAtPath {
prog.PastStartAtPath = reachedStartAtPath
}
ps, v, err := itr.Next()
if err != nil {
return err
}
if prog.Path.Len() < prog.Cfg.StartAtPath.Len() && !prog.PastStartAtPath {
if ps.Equals(prog.Cfg.StartAtPath.Segments()[prog.Path.Len()]) {
reachedStartAtPath = true
}
if !reachedStartAtPath {
continue
}
}
sNext, err := s.Explore(n, ps)
if err != nil {
return err
Expand Down Expand Up @@ -252,7 +266,16 @@ func (prog Progress) walkAdv_iterateAll(n datamodel.Node, s selector.Selector, f
}

func (prog Progress) walkAdv_iterateSelective(n datamodel.Node, attn []datamodel.PathSegment, s selector.Selector, fn AdvVisitFn) error {
var reachedStartAtPath bool
for _, ps := range attn {
if prog.Path.Len() < prog.Cfg.StartAtPath.Len() {
if ps.Equals(prog.Cfg.StartAtPath.Segments()[prog.Path.Len()]) {
reachedStartAtPath = true
}
if !reachedStartAtPath {
continue
}
}
v, err := n.LookupBySegment(ps)
if err != nil {
continue
Expand Down
76 changes: 71 additions & 5 deletions traversal/walk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,46 @@ func TestWalkMatching(t *testing.T) {
qt.Check(t, err, qt.IsNil)
qt.Check(t, order, qt.Equals, 7)
})

t.Run("no visiting of nodes before start path", func(t *testing.T) {
ss := ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert("linkedList", ssb.ExploreAll(ssb.Matcher()))
efsb.Insert("linkedMap", ssb.ExploreRecursive(selector.RecursionLimitDepth(3), ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert("foo", ssb.Matcher())
efsb.Insert("nonlink", ssb.Matcher())
efsb.Insert("alink", ssb.Matcher())
efsb.Insert("nested", ssb.ExploreRecursiveEdge())
})))
})
s, err := ss.Selector()
var order int
lsys := cidlink.DefaultLinkSystem()
lsys.SetReadStorage(&store)
err = traversal.Progress{
Cfg: &traversal.Config{
LinkSystem: lsys,
LinkTargetNodePrototypeChooser: basicnode.Chooser,
StartAtPath: datamodel.ParsePath("linkedMap/nested/nonlink"),
},
}.WalkMatching(rootNode, s, func(prog traversal.Progress, n datamodel.Node) error {
switch order {
case 0:
qt.Check(t, n, nodetests.NodeContentEquals, basicnode.NewString("zoo"))
qt.Check(t, prog.Path.String(), qt.Equals, "linkedMap/nested/nonlink")
qt.Check(t, prog.LastBlock.Path.String(), qt.Equals, "linkedMap")
qt.Check(t, prog.LastBlock.Link.String(), qt.Equals, middleMapNodeLnk.String())
case 1:
qt.Check(t, n, nodetests.NodeContentEquals, basicnode.NewString("alpha"))
qt.Check(t, prog.Path.String(), qt.Equals, "linkedMap/nested/alink")
qt.Check(t, prog.LastBlock.Path.String(), qt.Equals, "linkedMap/nested/alink")
qt.Check(t, prog.LastBlock.Link.String(), qt.Equals, leafAlphaLnk.String())
}
order++
return nil
})
qt.Check(t, err, qt.IsNil)
qt.Check(t, order, qt.Equals, 2)
})
}

func TestWalkBudgets(t *testing.T) {
Expand Down Expand Up @@ -390,6 +430,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
expected []datamodel.Link,
s datamodel.Node,
linkVisitOnce bool,
startAtPath datamodel.Path,
readFn func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error)) {

var count int
Expand All @@ -407,6 +448,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
LinkSystem: lsys,
LinkTargetNodePrototypeChooser: basicnode.Chooser,
LinkVisitOnlyOnce: linkVisitOnce,
StartAtPath: startAtPath,
},
}.WalkMatching(newRootNode, sel, func(prog traversal.Progress, n datamodel.Node) error {
return nil
Expand All @@ -417,14 +459,14 @@ func TestWalkBlockLoadOrder(t *testing.T) {

t.Run("CommonSelector_MatchAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_MatchAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})

t.Run("CommonSelector_ExploreAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
Expand All @@ -435,7 +477,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
s := ssb.ExploreRecursive(selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge())).
Node()
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
Expand Down Expand Up @@ -464,7 +506,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {

s := selectorparse.CommonSelector_ExploreAllRecursively
visited := make(map[datamodel.Link]bool)
verifySelectorLoads(t, expectedSkipMeBlocks, s, false, func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedSkipMeBlocks, s, false, datamodel.NewPath(nil), func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
log.Printf("load %v [%v]\n", l, visited[l])
if visited[l] {
return nil, traversal.SkipMe{}
Expand All @@ -486,10 +528,34 @@ func TestWalkBlockLoadOrder(t *testing.T) {
middleMapNodeLnk,
}
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
t.Run("explore-all with duplicate traversal skip via load at path", func(t *testing.T) {
// when using LinkRevisit:false to skip duplicate block loads, our loader
// doesn't even get to see the load attempts (unlike SkipMe, where the
// loader signals the skips)
testPathsToBlocksSkipped := map[string][]datamodel.Link{
// 5th node in load sequence for rootNode
"0/linkedList/2": append([]datamodel.Link{rootNodeLnk, middleListNodeLnk}, expectedAllBlocks[4:]...),
// LinkedMap is 7th no, foo doesn't affect loading
"0/linkedMap/foo": append([]datamodel.Link{rootNodeLnk}, expectedAllBlocks[6:]...),
// 8th node in load sequence for rootNode
"0/linkedMap/nested/alink": append([]datamodel.Link{rootNodeLnk, middleMapNodeLnk}, expectedAllBlocks[7:]...),
"0/linkedString": append([]datamodel.Link{rootNodeLnk}, expectedAllBlocks[8:]...),
// pash through all nodes first root block, then go load middle list block
"1/2": append([]datamodel.Link{middleListNodeLnk}, expectedAllBlocks[len(rootNodeExpectedLinks)+3:]...),
"3/1": append([]datamodel.Link{middleListNodeLnk}, expectedAllBlocks[2*len(rootNodeExpectedLinks)+len(middleListNodeLinks)+2:]...),
}
for path, expectedLinkVisits := range testPathsToBlocksSkipped {
startAtPath := datamodel.ParsePath(path)
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedLinkVisits, s, false, startAtPath, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
}
})
}

func TestWalk_ADLs(t *testing.T) {
Expand Down

0 comments on commit 9e783aa

Please sign in to comment.