From 407d871e1fa65308042ff607a66ff3302e49d047 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Wang Date: Wed, 29 Nov 2023 12:46:26 +0800 Subject: [PATCH 1/4] feat: dangling node detection for graph.Memory Signed-off-by: Xiaoxuan Wang --- content/oci/oci.go | 4 +- internal/graph/memory.go | 34 +++++++- internal/graph/memory_test.go | 157 ++++++++++++++++++++++++++++++++-- 3 files changed, 183 insertions(+), 12 deletions(-) diff --git a/content/oci/oci.go b/content/oci/oci.go index 5d4699a1..f9114706 100644 --- a/content/oci/oci.go +++ b/content/oci/oci.go @@ -156,9 +156,7 @@ func (s *Store) Delete(ctx context.Context, target ocispec.Descriptor) error { untagged = true } } - if err := s.graph.Remove(ctx, target); err != nil { - return err - } + s.graph.Remove(ctx, target) if untagged && s.AutoSaveIndex { err := s.saveIndex() if err != nil { diff --git a/internal/graph/memory.go b/internal/graph/memory.go index bbb57556..28e83211 100644 --- a/internal/graph/memory.go +++ b/internal/graph/memory.go @@ -31,7 +31,23 @@ import ( // Memory is a memory based PredecessorFinder. type Memory struct { - nodes map[descriptor.Descriptor]ocispec.Descriptor // nodes saves the map keys of ocispec.Descriptor + // properties and behaviors of the fields: + // - Memory.nodes: + // 1. a node exists in Memory.nodes if and only if it exists in the memory + // 2. Memory.nodes saves the ocispec.Descriptor map keys, which are used by + // the other fields. + // - Memory.predecessors: + // 1. a node exists in Memory.predecessors if it has at least one predecessor + // in the memory, regardless of whether or not the node itself exists in + // the memory. + // 2. a node does not exist in Memory.nodes, if it doesn't have any predecessors + // in the memory. + // - Memory.successors: + // 1. a node exists in Memory.successors if and only if it exists in the memory. + // 2. A node's entry in Memory.successors is always consistent with the actual + // content of the node, regardless of whether or not each successor exists + // in the memory. + nodes map[descriptor.Descriptor]ocispec.Descriptor predecessors map[descriptor.Descriptor]set.Set[descriptor.Descriptor] successors map[descriptor.Descriptor]set.Set[descriptor.Descriptor] lock sync.RWMutex @@ -101,12 +117,14 @@ func (m *Memory) Predecessors(_ context.Context, node ocispec.Descriptor) ([]oci return res, nil } -// Remove removes the node from its predecessors and successors. -func (m *Memory) Remove(ctx context.Context, node ocispec.Descriptor) error { +// Remove removes the node from its predecessors and successors, and returns the +// dangling nodes caused by the deletion. +func (m *Memory) Remove(ctx context.Context, node ocispec.Descriptor) []ocispec.Descriptor { m.lock.Lock() defer m.lock.Unlock() nodeKey := descriptor.FromOCI(node) + danglings := []ocispec.Descriptor{} // remove the node from its successors' predecessor list for successorKey := range m.successors[nodeKey] { predecessorEntry := m.predecessors[successorKey] @@ -116,11 +134,12 @@ func (m *Memory) Remove(ctx context.Context, node ocispec.Descriptor) error { // predecessors entry. Otherwise, we do not remove the entry. if len(predecessorEntry) == 0 { delete(m.predecessors, successorKey) + danglings = append(danglings, m.nodes[successorKey]) } } delete(m.successors, nodeKey) delete(m.nodes, nodeKey) - return nil + return danglings } // index indexes predecessors for each direct successor of the given node. @@ -152,3 +171,10 @@ func (m *Memory) index(ctx context.Context, fetcher content.Fetcher, node ocispe } return successors, nil } + +func (m *Memory) isDanglingNode(desc ocispec.Descriptor) bool { + key := descriptor.FromOCI(desc) + _, existsInMemory := m.nodes[key] + _, existsInPredecessors := m.predecessors[key] + return existsInMemory && !existsInPredecessors +} diff --git a/internal/graph/memory_test.go b/internal/graph/memory_test.go index f9f9e89d..d8700c63 100644 --- a/internal/graph/memory_test.go +++ b/internal/graph/memory_test.go @@ -21,6 +21,7 @@ import ( "encoding/json" "io" "reflect" + "sort" "testing" "github.com/opencontainers/go-digest" @@ -253,7 +254,7 @@ func TestMemory_IndexAndRemove(t *testing.T) { // still exist, since its predecessor A still exists predecessorsB, exists = testMemory.predecessors[nodeKeyB] if !exists { - t.Errorf("testDeletableMemory.predecessors should still contain the entry of %v", "B") + t.Errorf("testDeletableMemory.predecessors should still contain the entry of %s", "B") } if !predecessorsB.Contains(nodeKeyA) { t.Errorf("predecessors of %s should still contain %s", "B", "A") @@ -261,7 +262,7 @@ func TestMemory_IndexAndRemove(t *testing.T) { // 3. verify B' successors info: B's entry in testMemory.successors should no // longer exist if _, exists := testMemory.successors[nodeKeyB]; exists { - t.Errorf("testDeletableMemory.successors should not contain the entry of %v", "B") + t.Errorf("testDeletableMemory.successors should not contain the entry of %s", "B") } // 4. verify B' predecessors' successors info: B should still exist in A's // successors @@ -291,7 +292,7 @@ func TestMemory_IndexAndRemove(t *testing.T) { // 2. verify A' successors info: A's entry in testMemory.successors should no // longer exist if _, exists := testMemory.successors[nodeKeyA]; exists { - t.Errorf("testDeletableMemory.successors should not contain the entry of %v", "A") + t.Errorf("testDeletableMemory.successors should not contain the entry of %s", "A") } // 3. verify A' successors' predecessors info: D's entry in testMemory.predecessors // should no longer exist, since all predecessors of D are already deleted @@ -567,7 +568,7 @@ func TestMemory_IndexAllAndPredecessors(t *testing.T) { t.Errorf("successors of %s should still contain %s", "A", "C") } if _, exists := testMemory.successors[nodeKeyC]; exists { - t.Errorf("testMemory.successors should not contain the entry of %v", "C") + t.Errorf("testMemory.successors should not contain the entry of %s", "C") } if _, exists := testMemory.predecessors[nodeKeyC]; !exists { t.Errorf("entry %s in predecessors should still exists since it still has at least one predecessor node present", "C") @@ -585,7 +586,7 @@ func TestMemory_IndexAllAndPredecessors(t *testing.T) { t.Errorf("entry %s in predecessors should no longer exists", "D") } if _, exists := testMemory.successors[nodeKeyA]; exists { - t.Errorf("testDeletableMemory.successors should not contain the entry of %v", "A") + t.Errorf("testDeletableMemory.successors should not contain the entry of %s", "A") } // check that the Predecessors of node D is empty @@ -610,3 +611,149 @@ func TestMemory_IndexAllAndPredecessors(t *testing.T) { t.Errorf("incorrect predecessor result") } } + +// This test is for intermediate testing and may be removed when the pull request +// is finalized. +// +-----------------------------------------------+ +// | | +// | +-----------+ | +// | |A(manifest)| | +// | +-----+-----+ | +// | | | +// | +------------+------------+ | +// | | | | | +// | | | | | +// | v v v | +// | +---------+ +--------+ +--------+ | +// | |B(config)| |C(layer)| |D(layer)| | +// | +---------+ +--------+ +--------+ | +// | | +// | | +// +-----------------------------------------------+ +func TestMemory_isDanglingNode(t *testing.T) { + testFetcher := cas.NewMemory() + testMemory := NewMemory() + ctx := context.Background() + + // generate test content + var blobs [][]byte + var descs []ocispec.Descriptor + appendBlob := func(mediaType string, blob []byte) ocispec.Descriptor { + blobs = append(blobs, blob) + descs = append(descs, ocispec.Descriptor{ + MediaType: mediaType, + Digest: digest.FromBytes(blob), + Size: int64(len(blob)), + }) + return descs[len(descs)-1] + } + descB := appendBlob("layer node B", []byte("Node B is a config")) // blobs[0], layer "B" + descC := appendBlob("layer node C", []byte("Node C is a layer")) // blobs[1], layer "C" + descD := appendBlob("layer node D", []byte("Node D is a layer")) // blobs[2], layer "D" + + generateManifest := func(layers ...ocispec.Descriptor) ocispec.Descriptor { + manifest := ocispec.Manifest{ + Config: descB, + Layers: layers, + } + manifestJSON, err := json.Marshal(manifest) + if err != nil { + t.Fatal(err) + } + return appendBlob(ocispec.MediaTypeImageManifest, manifestJSON) + } + + descA := generateManifest(descs[1:3]...) // blobs[3], manifest "A" + + // prepare the content in the fetcher, so that it can be used to test Index + testContents := []ocispec.Descriptor{descB, descC, descD, descA} + for i := 0; i < len(blobs); i++ { + testFetcher.Push(ctx, testContents[i], bytes.NewReader(blobs[i])) + } + + // make sure that testFetcher works + rc, err := testFetcher.Fetch(ctx, descA) + if err != nil { + t.Errorf("testFetcher.Fetch() error = %v", err) + } + got, err := io.ReadAll(rc) + if err != nil { + t.Errorf("testFetcher.Fetch().Read() error = %v", err) + } + err = rc.Close() + if err != nil { + t.Errorf("testFetcher.Fetch().Close() error = %v", err) + } + if !bytes.Equal(got, blobs[3]) { + t.Errorf("testFetcher.Fetch() = %v, want %v", got, blobs[4]) + } + + nodeKeyA := descriptor.FromOCI(descA) + nodeKeyB := descriptor.FromOCI(descB) + nodeKeyC := descriptor.FromOCI(descC) + nodeKeyD := descriptor.FromOCI(descD) + nodeKeys := []descriptor.Descriptor{nodeKeyA, nodeKeyB, nodeKeyC, nodeKeyD} + + // index the nodes and verify the information + testMemory.IndexAll(ctx, testFetcher, descA) + + // check that all nodes exist in the memory + for i := 0; i < len(nodeKeys); i++ { + if _, exists := testMemory.nodes[nodeKeys[i]]; !exists { + t.Errorf("nodes entry of %v should exist", nodeKeys[i]) + } + } + + // check that predecessor of B,C,D is A + for i := 1; i < len(nodeKeys); i++ { + if len(testMemory.predecessors[nodeKeys[i]]) != 1 || !testMemory.predecessors[nodeKeys[i]].Contains(nodeKeyA) { + t.Errorf("the predecessor of %v should be A", nodeKeys[i]) + } + } + + // remove node A and verify the information + danglings := testMemory.Remove(ctx, descA) + + // check that A is no longer in the memory, while B,C,D still exist in memory + if _, exists := testMemory.nodes[nodeKeyA]; exists { + t.Errorf("nodes entry of %s should not exist", "A") + } + for i := 1; i < len(nodeKeys); i++ { + if _, exists := testMemory.nodes[nodeKeys[i]]; !exists { + t.Errorf("nodes entry of %v should exist", nodeKeys[i]) + } + } + + // check that B,C,D have no predecessors and are dangling nodes + if len(testMemory.predecessors) != 0 { + t.Errorf("testMemory.predecessors should be empty") + } + + // check that B,C,D are all garbage + for i := 1; i < len(nodeKeys); i++ { + if isGarbage := testMemory.isDanglingNode(testMemory.nodes[nodeKeys[i]]); !isGarbage { + t.Errorf("%v should be considered a dangling node", nodeKeys[i]) + } + } + + // check that danglings has the correct content + expectedDanglings := []ocispec.Descriptor{} + for i := 1; i < len(nodeKeys); i++ { + expectedDanglings = append(expectedDanglings, testMemory.nodes[nodeKeys[i]]) + } + + sort.SliceStable(expectedDanglings, func(i, j int) bool { return expectedDanglings[i].Digest <= expectedDanglings[j].Digest }) + sort.SliceStable(danglings, func(i, j int) bool { return danglings[i].Digest <= danglings[j].Digest }) + + if len(danglings) != len(expectedDanglings) { + t.Errorf("danglings has length = %d, want length %d", len(danglings), len(expectedDanglings)) + } + for i := 0; i < len(expectedDanglings); i++ { + if !reflect.DeepEqual(danglings, expectedDanglings) { + t.Errorf("danglings[i] = %v, want %v", danglings[i], expectedDanglings[i]) + } + if isGarbage := testMemory.isDanglingNode(danglings[i]); !isGarbage { + t.Errorf("%v should be considered a dangling node", danglings[i]) + } + } +} From 20ca6880c2681acf69e81a27c8d1e7b95c977bd4 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Wang Date: Wed, 29 Nov 2023 13:08:21 +0800 Subject: [PATCH 2/4] fixed a bug Signed-off-by: Xiaoxuan Wang --- internal/graph/memory.go | 7 +++++-- internal/graph/memory_test.go | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/graph/memory.go b/internal/graph/memory.go index 28e83211..8759fc4f 100644 --- a/internal/graph/memory.go +++ b/internal/graph/memory.go @@ -131,10 +131,13 @@ func (m *Memory) Remove(ctx context.Context, node ocispec.Descriptor) []ocispec. predecessorEntry.Delete(nodeKey) // if none of the predecessors of the node still exists, we remove the - // predecessors entry. Otherwise, we do not remove the entry. + // predecessors entry and return it as a dangling node. Otherwise, we do + // not remove the entry. if len(predecessorEntry) == 0 { delete(m.predecessors, successorKey) - danglings = append(danglings, m.nodes[successorKey]) + if _, exists := m.nodes[successorKey]; exists { + danglings = append(danglings, m.nodes[successorKey]) + } } } delete(m.successors, nodeKey) diff --git a/internal/graph/memory_test.go b/internal/graph/memory_test.go index d8700c63..7d222093 100644 --- a/internal/graph/memory_test.go +++ b/internal/graph/memory_test.go @@ -630,7 +630,7 @@ func TestMemory_IndexAllAndPredecessors(t *testing.T) { // | | // | | // +-----------------------------------------------+ -func TestMemory_isDanglingNode(t *testing.T) { +func TestMemory_trackDanglingNodes(t *testing.T) { testFetcher := cas.NewMemory() testMemory := NewMemory() ctx := context.Background() From 6f9f338c99f03f2deab31f0b1fa8385165709003 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Wang Date: Wed, 29 Nov 2023 16:43:53 +0800 Subject: [PATCH 3/4] made isDangling public Signed-off-by: Xiaoxuan Wang --- internal/graph/memory.go | 2 +- internal/graph/memory_test.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/graph/memory.go b/internal/graph/memory.go index 8759fc4f..1390e790 100644 --- a/internal/graph/memory.go +++ b/internal/graph/memory.go @@ -175,7 +175,7 @@ func (m *Memory) index(ctx context.Context, fetcher content.Fetcher, node ocispe return successors, nil } -func (m *Memory) isDanglingNode(desc ocispec.Descriptor) bool { +func (m *Memory) IsDanglingNode(desc ocispec.Descriptor) bool { key := descriptor.FromOCI(desc) _, existsInMemory := m.nodes[key] _, existsInPredecessors := m.predecessors[key] diff --git a/internal/graph/memory_test.go b/internal/graph/memory_test.go index 7d222093..011389b1 100644 --- a/internal/graph/memory_test.go +++ b/internal/graph/memory_test.go @@ -662,7 +662,6 @@ func TestMemory_trackDanglingNodes(t *testing.T) { } return appendBlob(ocispec.MediaTypeImageManifest, manifestJSON) } - descA := generateManifest(descs[1:3]...) // blobs[3], manifest "A" // prepare the content in the fetcher, so that it can be used to test Index @@ -731,7 +730,7 @@ func TestMemory_trackDanglingNodes(t *testing.T) { // check that B,C,D are all garbage for i := 1; i < len(nodeKeys); i++ { - if isGarbage := testMemory.isDanglingNode(testMemory.nodes[nodeKeys[i]]); !isGarbage { + if isGarbage := testMemory.IsDanglingNode(testMemory.nodes[nodeKeys[i]]); !isGarbage { t.Errorf("%v should be considered a dangling node", nodeKeys[i]) } } @@ -752,7 +751,7 @@ func TestMemory_trackDanglingNodes(t *testing.T) { if !reflect.DeepEqual(danglings, expectedDanglings) { t.Errorf("danglings[i] = %v, want %v", danglings[i], expectedDanglings[i]) } - if isGarbage := testMemory.isDanglingNode(danglings[i]); !isGarbage { + if isGarbage := testMemory.IsDanglingNode(danglings[i]); !isGarbage { t.Errorf("%v should be considered a dangling node", danglings[i]) } } From 38fd16c1abea0f0a6f2bf135fc941f398165a7b6 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Wang Date: Thu, 30 Nov 2023 12:54:51 +0800 Subject: [PATCH 4/4] added doc comments Signed-off-by: Xiaoxuan Wang --- internal/graph/memory.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/graph/memory.go b/internal/graph/memory.go index 1390e790..6c23ca99 100644 --- a/internal/graph/memory.go +++ b/internal/graph/memory.go @@ -175,6 +175,8 @@ func (m *Memory) index(ctx context.Context, fetcher content.Fetcher, node ocispe return successors, nil } +// IsDanglingNode decides whether a node is dangling. A node is considered +// dangling if it is present and has no predecessors. func (m *Memory) IsDanglingNode(desc ocispec.Descriptor) bool { key := descriptor.FromOCI(desc) _, existsInMemory := m.nodes[key]