Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Files API root as best-effort pin #2872

Merged
merged 2 commits into from
Jun 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/commands/pin.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func pinLsAll(typeStr string, ctx context.Context, n *core.IpfsNode) (map[string
if err != nil {
return nil, err
}
err = dag.EnumerateChildren(n.Context(), n.DAG, nd, ks)
err = dag.EnumerateChildren(n.Context(), n.DAG, nd, ks, false)
if err != nil {
return nil, err
}
Expand Down
25 changes: 23 additions & 2 deletions core/corerepo/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

key "github.com/ipfs/go-ipfs/blocks/key"
"github.com/ipfs/go-ipfs/core"
mfs "github.com/ipfs/go-ipfs/mfs"
gc "github.com/ipfs/go-ipfs/pin/gc"
repo "github.com/ipfs/go-ipfs/repo"
humanize "gx/ipfs/QmPSBJL4momYnE7DcUyk2DVhD6rH488ZmHBGLbxNdhU44K/go-humanize"
Expand Down Expand Up @@ -71,10 +72,26 @@ func NewGC(n *core.IpfsNode) (*GC, error) {
}, nil
}

func BestEffortRoots(filesRoot *mfs.Root) ([]key.Key, error) {
rootDag, err := filesRoot.GetValue().GetNode()
if err != nil {
return nil, err
}
rootKey, err := rootDag.Key()
if err != nil {
return nil, err
}
return []key.Key{rootKey}, nil
}

func GarbageCollect(n *core.IpfsNode, ctx context.Context) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel() // in case error occurs during operation
rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning)
roots, err := BestEffortRoots(n.FilesRoot)
if err != nil {
return err
}
rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning, roots)
if err != nil {
return err
}
Expand All @@ -93,7 +110,11 @@ func GarbageCollect(n *core.IpfsNode, ctx context.Context) error {
}

func GarbageCollectAsync(n *core.IpfsNode, ctx context.Context) (<-chan *KeyRemoved, error) {
rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning)
roots, err := BestEffortRoots(n.FilesRoot)
if err != nil {
return nil, err
}
rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning, roots)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions core/coreunix/add_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestAddGCLive(t *testing.T) {
gcstarted := make(chan struct{})
go func() {
defer close(gcstarted)
gcchan, err := gc.GC(context.Background(), node.Blockstore, node.Pinning)
gcchan, err := gc.GC(context.Background(), node.Blockstore, node.Pinning, nil)
if err != nil {
log.Error("GC ERROR:", err)
errs <- err
Expand Down Expand Up @@ -155,7 +155,7 @@ func TestAddGCLive(t *testing.T) {
t.Fatal(err)
}

err = dag.EnumerateChildren(ctx, node.DAG, root, key.NewKeySet())
err = dag.EnumerateChildren(ctx, node.DAG, root, key.NewKeySet(), false)
if err != nil {
t.Fatal(err)
}
Expand Down
10 changes: 7 additions & 3 deletions merkledag/merkledag.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,16 +357,20 @@ func (t *Batch) Commit() error {
// EnumerateChildren will walk the dag below the given root node and add all
// unseen children to the passed in set.
// TODO: parallelize to avoid disk latency perf hits?
func EnumerateChildren(ctx context.Context, ds DAGService, root *Node, set key.KeySet) error {
func EnumerateChildren(ctx context.Context, ds DAGService, root *Node, set key.KeySet, bestEffort bool) error {
for _, lnk := range root.Links {
k := key.Key(lnk.Hash)
if !set.Has(k) {
set.Add(k)
child, err := ds.Get(ctx, k)
if err != nil {
return err
if bestEffort && err == ErrNotFound {
continue
} else {
return err
}
}
err = EnumerateChildren(ctx, ds, child, set)
err = EnumerateChildren(ctx, ds, child, set, bestEffort)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions merkledag/merkledag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ func TestFetchGraph(t *testing.T) {
offline_ds := NewDAGService(bs)
ks := key.NewKeySet()

err = EnumerateChildren(context.Background(), offline_ds, root, ks)
err = EnumerateChildren(context.Background(), offline_ds, root, ks, false)
if err != nil {
t.Fatal(err)
}
Expand All @@ -309,7 +309,7 @@ func TestEnumerateChildren(t *testing.T) {
}

ks := key.NewKeySet()
err = EnumerateChildren(context.Background(), ds, root, ks)
err = EnumerateChildren(context.Background(), ds, root, ks, false)
if err != nil {
t.Fatal(err)
}
Expand Down
20 changes: 13 additions & 7 deletions pin/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,19 @@ var log = logging.Logger("gc")
// GC performs a mark and sweep garbage collection of the blocks in the blockstore
// first, it creates a 'marked' set and adds to it the following:
// - all recursively pinned blocks, plus all of their descendants (recursively)
// - bestEffortRoots, plus all of its descendants (recursively)
// - all directly pinned blocks
// - all blocks utilized internally by the pinner
//
// The routine then iterates over every block in the blockstore and
// deletes any block that is not found in the marked set.
func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner) (<-chan key.Key, error) {
func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner, bestEffortRoots []key.Key) (<-chan key.Key, error) {
unlocker := bs.GCLock()

bsrv := bserv.New(bs, offline.Exchange(bs))
ds := dag.NewDAGService(bsrv)

gcs, err := ColoredSet(ctx, pn, ds)
gcs, err := ColoredSet(ctx, pn, ds, bestEffortRoots)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -69,7 +70,7 @@ func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner) (<-chan key.
return output, nil
}

func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots []key.Key) error {
func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots []key.Key, bestEffort bool) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i strongly dislike how this is implemented. Adding bestEffort here to this function ruins the whole abstraction. You can implement this "best effort" notion outside of this function, just in the GC function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbenet in this function (Descendants) and EnumerateChildren all that bestEffort means is to not abort if a child is unreadable. In order to collect the set of keys to keep on a "best effort" bases I needed a way to to collected all the children of the initial best effort set that are local on the node.

I am not entirely clear what abstraction I am ruining so I am not 100% sure how to fix this. I could create a new function to find the Descendants of a keySet that doesn't abort on an error, but to me it seamed better to extend the functionally of the existing function.

@whyrusleeping I could appreciate any insights you can offer.

for _, k := range roots {
set.Add(k)
nd, err := ds.Get(ctx, k)
Expand All @@ -78,7 +79,7 @@ func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots [
}

// EnumerateChildren recursively walks the dag and adds the keys to the given set
err = dag.EnumerateChildren(ctx, ds, nd, set)
err = dag.EnumerateChildren(ctx, ds, nd, set, bestEffort)
if err != nil {
return err
}
Expand All @@ -87,11 +88,16 @@ func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots [
return nil
}

func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService) (key.KeySet, error) {
func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService, bestEffortRoots []key.Key) (key.KeySet, error) {
// KeySet currently implemented in memory, in the future, may be bloom filter or
// disk backed to conserve memory.
gcs := key.NewKeySet()
err := Descendants(ctx, ds, gcs, pn.RecursiveKeys())
err := Descendants(ctx, ds, gcs, pn.RecursiveKeys(), false)
if err != nil {
return nil, err
}

err = Descendants(ctx, ds, gcs, bestEffortRoots, true)
if err != nil {
return nil, err
}
Expand All @@ -100,7 +106,7 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService) (key.KeyS
gcs.Add(k)
}

err = Descendants(ctx, ds, gcs, pn.InternalPins())
err = Descendants(ctx, ds, gcs, pn.InternalPins(), false)
if err != nil {
return nil, err
}
Expand Down
45 changes: 45 additions & 0 deletions test/sharness/t0252-files-gc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh
#
# Copyright (c) 2016 Kevin Atkinson
# MIT Licensed; see the LICENSE file in this repository.
#

test_description="test how the unix files api interacts with the gc"

. lib/test-lib.sh

test_init_ipfs

test_expect_success "object not removed after gc" '
echo "hello world" | ipfs files write --create /hello.txt &&
ipfs repo gc &&
ipfs cat QmVib14uvPnCP73XaCDpwugRuwfTsVbGyWbatHAmLSdZUS
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should also add another test_expect_success to make sure that /hello.txt is still accessible through the files api here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So... if this is "best effort", under what conditions should gc remove these, and can we make sure to test that here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All "best effort" means is not to abort if a child is unreadable. For the GC to remove the object will need to be detached somehow from the MFS Root.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevina want to take this one on and add a few more tests?

  • Test that the read test below outputs the right text
  • Exercise a situation that uses the best effort pin:
    • Add a directory containing a file with --pin=false
    • pin the hash directly
    • run a gc so that file is no longer accessible
    • ipfs files cp <dirhash> /somewhere (this should work just fine)
    • ipfs repo gc should succeed
    • re-add the file that was originally in that directory with --pin=false
    • run ipfs repo gc again, that file shouldnt be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK Will do.

Copy link
Contributor Author

@kevina kevina Aug 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevina want to take this one on and add a few more tests?

@whyrusleeping See #3151

'

test_expect_success "/hello.txt still accessible after gc" '
ipfs files read /hello.txt
'

ADIR_HASH=QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW
FILE1_HASH=QmX4eaSJz39mNhdu5ACUwTDpyA6y24HmrQNnAape6u3buS

test_expect_success "gc okay after adding incomplete node -- prep" '
ipfs files mkdir /adir &&
echo "file1" | ipfs files write --create /adir/file1 &&
echo "file2" | ipfs files write --create /adir/file2 &&
ipfs cat $FILE1_HASH &&
ipfs pin add --recursive=false $ADIR_HASH &&
ipfs files rm -r /adir &&
ipfs repo gc && # will remove /adir/file1 and /adir/file2 but not /adir
test_must_fail ipfs cat $FILE1_HASH &&
ipfs files cp /ipfs/$ADIR_HASH /adir &&
ipfs pin rm $ADIR_HASH
'

test_expect_success "gc okay after adding incomplete node" '
ipfs refs $ADIR_HASH &&
ipfs repo gc &&
ipfs refs $ADIR_HASH
'

test_done