From 0cf8ccdd397e748f69ebf7bc6442523b72cec0f7 Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Wed, 20 Sep 2023 15:44:53 +0200 Subject: [PATCH 1/3] Add information about the cache When using `desync info` it might be useful to have additional information regarding your local cache. For example knowing the amount of chunks that can be picked up by your cache can be used as a metric to ensure the cache is actually working as expected. With this commit we add `in-cache` to print the number of chunks that the cache already has, `not-in-seed-nor-cache` with the number of chunks that needs to be downloaded from the store and finally `dedup-size-not-in-seed-nor-cache` with the size of the chunks that needs to be downloaded from the store. Signed-off-by: Ludovico de Nittis --- cmd/desync/info.go | 51 +++++++++++++++++++++++++++++++++-------- cmd/desync/info_test.go | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 10 deletions(-) diff --git a/cmd/desync/info.go b/cmd/desync/info.go index 33f2662..7bfcb81 100644 --- a/cmd/desync/info.go +++ b/cmd/desync/info.go @@ -14,6 +14,7 @@ type infoOptions struct { cmdStoreOptions stores []string seeds []string + cache string printFormat string } @@ -38,6 +39,7 @@ in the seeds are also shown. Use '-' to read the index from STDIN.`, flags := cmd.Flags() flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") flags.StringVarP(&opt.printFormat, "format", "f", "json", "output format, plain or json") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd @@ -55,15 +57,18 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error { } var results struct { - Total int `json:"total"` - Unique int `json:"unique"` - InStore uint64 `json:"in-store"` - InSeed uint64 `json:"in-seed"` - Size uint64 `json:"size"` - SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"` - ChunkSizeMin uint64 `json:"chunk-size-min"` - ChunkSizeAvg uint64 `json:"chunk-size-avg"` - ChunkSizeMax uint64 `json:"chunk-size-max"` + Total int `json:"total"` + Unique int `json:"unique"` + InStore uint64 `json:"in-store"` + InSeed uint64 `json:"in-seed"` + InCache uint64 `json:"in-cache"` + NotInSeedNorCache uint64 `json:"not-in-seed-nor-cache"` + Size uint64 `json:"size"` + SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"` + SizeNotInSeedNorCache uint64 `json:"dedup-size-not-in-seed-nor-cache"` + ChunkSizeMin uint64 `json:"chunk-size-min"` + ChunkSizeAvg uint64 `json:"chunk-size-avg"` + ChunkSizeMax uint64 `json:"chunk-size-max"` } dedupedSeeds := make(map[desync.ChunkID]struct{}) @@ -93,6 +98,14 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error { results.ChunkSizeAvg = c.Index.ChunkSizeAvg results.ChunkSizeMax = c.Index.ChunkSizeMax + var cache desync.WriteStore + if opt.cache != "" { + cache, err = WritableStore(opt.cache, opt.cmdStoreOptions) + if err != nil { + return err + } + } + // Go through each chunk from the index to count them, de-dup each chunks // with a map and calculate the size of the chunks that are not available // in seed @@ -110,14 +123,29 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error { continue } + inSeed := false + inCache := false deduped[chunk.ID] = struct{}{} if _, isAvailable := dedupedSeeds[chunk.ID]; isAvailable { // This chunk is available in the seed results.InSeed++ - } else { + inSeed = true + } + if cache != nil { + if hasChunk, _ := cache.HasChunk(chunk.ID); hasChunk { + results.InCache++ + inCache = true + } + } + + if !inSeed { // The seed doesn't have this chunk, sum its size results.SizeNotInSeed += chunk.Size } + if !inSeed && !inCache { + results.NotInSeedNorCache++ + results.SizeNotInSeedNorCache += chunk.Size + } } results.Unique = len(deduped) @@ -156,10 +184,13 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error { case "plain": fmt.Println("Blob size:", results.Size) fmt.Println("Size of deduplicated chunks not in seed:", results.SizeNotInSeed) + fmt.Println("Size of deduplicated chunks not in seed nor cache:", results.SizeNotInSeedNorCache) fmt.Println("Total chunks:", results.Total) fmt.Println("Unique chunks:", results.Unique) fmt.Println("Chunks in store:", results.InStore) fmt.Println("Chunks in seed:", results.InSeed) + fmt.Println("Chunks in cache:", results.InCache) + fmt.Println("Chunks not in seed nor cache:", results.NotInSeedNorCache) fmt.Println("Chunk size min:", results.ChunkSizeMin) fmt.Println("Chunk size avg:", results.ChunkSizeAvg) fmt.Println("Chunk size max:", results.ChunkSizeMax) diff --git a/cmd/desync/info_test.go b/cmd/desync/info_test.go index b98d3a9..12c4b01 100644 --- a/cmd/desync/info_test.go +++ b/cmd/desync/info_test.go @@ -16,8 +16,11 @@ func TestInfoCommand(t *testing.T) { "unique": 131, "in-store": 131, "in-seed": 0, + "in-cache": 0, + "not-in-seed-nor-cache": 131, "size": 2097152, "dedup-size-not-in-seed": 1114112, + "dedup-size-not-in-seed-nor-cache": 1114112, "chunk-size-min": 2048, "chunk-size-avg": 8192, "chunk-size-max": 32768 @@ -49,8 +52,11 @@ func TestInfoCommandWithSeed(t *testing.T) { "unique": 131, "in-store": 131, "in-seed": 124, + "in-cache": 0, + "not-in-seed-nor-cache": 7, "size": 2097152, "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 80029, "chunk-size-min": 2048, "chunk-size-avg": 8192, "chunk-size-max": 32768 @@ -79,3 +85,44 @@ func TestInfoCommandWithSeed(t *testing.T) { require.NoError(t, err) require.Equal(t, exp, got) } + +func TestInfoCommandWithSeedAndCache(t *testing.T) { + expectedOutput := []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 124, + "in-cache": 18, + "not-in-seed-nor-cache": 5, + "size": 2097152, + "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 67099, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`) + exp := make(map[string]interface{}) + err := json.Unmarshal(expectedOutput, &exp) + require.NoError(t, err) + + cmd := newInfoCommand(context.Background()) + cmd.SetArgs([]string{ + "-s", "testdata/blob2.store", + "--seed", "testdata/blob1.caibx", + "--cache", "testdata/blob2.cache", + "testdata/blob2.caibx", + }) + b := new(bytes.Buffer) + + // Redirect the command's output + stdout = b + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) + + // Decode the output and compare to what's expected + got := make(map[string]interface{}) + err = json.Unmarshal(b.Bytes(), &got) + require.NoError(t, err) + require.Equal(t, exp, got) +} From fd69d09c53f8dc0fc85157ad827b17e305cb2543 Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Wed, 20 Sep 2023 15:45:33 +0200 Subject: [PATCH 2/3] Reduce code duplication in info_test Signed-off-by: Ludovico de Nittis --- cmd/desync/info_test.go | 184 ++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 111 deletions(-) diff --git a/cmd/desync/info_test.go b/cmd/desync/info_test.go index 12c4b01..59920cf 100644 --- a/cmd/desync/info_test.go +++ b/cmd/desync/info_test.go @@ -11,118 +11,80 @@ import ( ) func TestInfoCommand(t *testing.T) { - expectedOutput := []byte(`{ - "total": 161, - "unique": 131, - "in-store": 131, - "in-seed": 0, - "in-cache": 0, - "not-in-seed-nor-cache": 131, - "size": 2097152, - "dedup-size-not-in-seed": 1114112, - "dedup-size-not-in-seed-nor-cache": 1114112, - "chunk-size-min": 2048, - "chunk-size-avg": 8192, - "chunk-size-max": 32768 - }`) - exp := make(map[string]interface{}) - err := json.Unmarshal(expectedOutput, &exp) - require.NoError(t, err) + for _, test := range []struct { + name string + args []string + expectedOutput []byte + }{ + {"info command with store", + []string{"-s", "testdata/blob1.store", "testdata/blob1.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 0, + "in-cache": 0, + "not-in-seed-nor-cache": 131, + "size": 2097152, + "dedup-size-not-in-seed": 1114112, + "dedup-size-not-in-seed-nor-cache": 1114112, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + {"info command with seed", + []string{"-s", "testdata/blob1.store", "--seed", "testdata/blob2.caibx", "testdata/blob1.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 124, + "in-cache": 0, + "not-in-seed-nor-cache": 7, + "size": 2097152, + "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 80029, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + {"info command with seed and cache", + []string{"-s", "testdata/blob2.store", "--seed", "testdata/blob1.caibx", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 124, + "in-cache": 18, + "not-in-seed-nor-cache": 5, + "size": 2097152, + "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 67099, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + } { + t.Run(test.name, func(t *testing.T) { + exp := make(map[string]interface{}) + err := json.Unmarshal(test.expectedOutput, &exp) + require.NoError(t, err) - cmd := newInfoCommand(context.Background()) - cmd.SetArgs([]string{"-s", "testdata/blob1.store", "testdata/blob1.caibx"}) - b := new(bytes.Buffer) + cmd := newInfoCommand(context.Background()) + cmd.SetArgs(test.args) + b := new(bytes.Buffer) - // Redirect the command's output - stdout = b - cmd.SetOutput(ioutil.Discard) - _, err = cmd.ExecuteC() - require.NoError(t, err) + // Redirect the command's output + stdout = b + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) - // Decode the output and compare to what's expected - got := make(map[string]interface{}) - err = json.Unmarshal(b.Bytes(), &got) - require.NoError(t, err) - require.Equal(t, exp, got) -} - -func TestInfoCommandWithSeed(t *testing.T) { - expectedOutput := []byte(`{ - "total": 161, - "unique": 131, - "in-store": 131, - "in-seed": 124, - "in-cache": 0, - "not-in-seed-nor-cache": 7, - "size": 2097152, - "dedup-size-not-in-seed": 80029, - "dedup-size-not-in-seed-nor-cache": 80029, - "chunk-size-min": 2048, - "chunk-size-avg": 8192, - "chunk-size-max": 32768 - }`) - exp := make(map[string]interface{}) - err := json.Unmarshal(expectedOutput, &exp) - require.NoError(t, err) - - cmd := newInfoCommand(context.Background()) - cmd.SetArgs([]string{ - "-s", "testdata/blob1.store", - "--seed", "testdata/blob2.caibx", - "testdata/blob1.caibx", - }) - b := new(bytes.Buffer) - - // Redirect the command's output - stdout = b - cmd.SetOutput(ioutil.Discard) - _, err = cmd.ExecuteC() - require.NoError(t, err) - - // Decode the output and compare to what's expected - got := make(map[string]interface{}) - err = json.Unmarshal(b.Bytes(), &got) - require.NoError(t, err) - require.Equal(t, exp, got) -} - -func TestInfoCommandWithSeedAndCache(t *testing.T) { - expectedOutput := []byte(`{ - "total": 161, - "unique": 131, - "in-store": 131, - "in-seed": 124, - "in-cache": 18, - "not-in-seed-nor-cache": 5, - "size": 2097152, - "dedup-size-not-in-seed": 80029, - "dedup-size-not-in-seed-nor-cache": 67099, - "chunk-size-min": 2048, - "chunk-size-avg": 8192, - "chunk-size-max": 32768 - }`) - exp := make(map[string]interface{}) - err := json.Unmarshal(expectedOutput, &exp) - require.NoError(t, err) - - cmd := newInfoCommand(context.Background()) - cmd.SetArgs([]string{ - "-s", "testdata/blob2.store", - "--seed", "testdata/blob1.caibx", - "--cache", "testdata/blob2.cache", - "testdata/blob2.caibx", - }) - b := new(bytes.Buffer) - - // Redirect the command's output - stdout = b - cmd.SetOutput(ioutil.Discard) - _, err = cmd.ExecuteC() - require.NoError(t, err) - - // Decode the output and compare to what's expected - got := make(map[string]interface{}) - err = json.Unmarshal(b.Bytes(), &got) - require.NoError(t, err) - require.Equal(t, exp, got) + // Decode the output and compare to what's expected + got := make(map[string]interface{}) + err = json.Unmarshal(b.Bytes(), &got) + require.NoError(t, err) + require.Equal(t, exp, got) + }) + } } From 03797c5a5be4245b5cdd17ed5efc67d0b9ae03ec Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Wed, 20 Sep 2023 15:51:30 +0200 Subject: [PATCH 3/3] Add info test with cache and without a seed Signed-off-by: Ludovico de Nittis --- cmd/desync/info_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cmd/desync/info_test.go b/cmd/desync/info_test.go index 59920cf..17960bb 100644 --- a/cmd/desync/info_test.go +++ b/cmd/desync/info_test.go @@ -64,6 +64,22 @@ func TestInfoCommand(t *testing.T) { "chunk-size-avg": 8192, "chunk-size-max": 32768 }`)}, + {"info command with cache", + []string{"-s", "testdata/blob2.store", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 0, + "in-cache": 18, + "not-in-seed-nor-cache": 113, + "size": 2097152, + "dedup-size-not-in-seed": 1114112, + "dedup-size-not-in-seed-nor-cache": 950410, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, } { t.Run(test.name, func(t *testing.T) { exp := make(map[string]interface{})