Skip to content

Commit

Permalink
Add new inspect-chunks command for more detailed chunks info
Browse files Browse the repository at this point in the history
Add a new `inspect-chunks` command that can provide additional
information about the chunks in an index, including their compressed
size (if a local store is provided).

One use case is for the clients to download the server's inspect-chunks
JSON and feed it into `desync info`, to calculate the precise download
size of an update.

Signed-off-by: Ludovico de Nittis <[email protected]>
  • Loading branch information
RyuzakiKK committed Nov 19, 2024
1 parent 6c5d7aa commit ba044f9
Show file tree
Hide file tree
Showing 12 changed files with 2,421 additions and 27 deletions.
10 changes: 10 additions & 0 deletions chunkadditionalinfo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package desync

// ChunkAdditionalInfo contains detailed information about a particular chunk.
// Some of those info, e.g. CompressedSize, are only exact for the store used when
// generating it. Because other stores could potentially use different compression levels.
type ChunkAdditionalInfo struct {
ID ChunkID `json:"id"`
UncompressedSize uint64 `json:"uncompressed_size"`
CompressedSize int64 `json:"compressed_size,omitempty"`
}
3 changes: 2 additions & 1 deletion chunker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ func TestChunkerLargeFile(t *testing.T) {
if err != nil {
t.Fatal(err)
}
hash := ChunkID(sha512.Sum512_256(buf)).String()
chunkID := ChunkID(sha512.Sum512_256(buf))
hash := (&chunkID).String()
if hash != e.ID {
t.Fatalf("chunk #%d, unexpected hash %s, expected %s", i+1, hash, e.ID)
}
Expand Down
77 changes: 62 additions & 15 deletions cmd/desync/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package main

import (
"context"
"encoding/json"
"fmt"
"os"
"sync"
"sync/atomic"

Expand All @@ -16,6 +18,7 @@ type infoOptions struct {
seeds []string
cache string
printFormat string
chunksInfo string
}

func newInfoCommand(ctx context.Context) *cobra.Command {
Expand All @@ -27,10 +30,13 @@ func newInfoCommand(ctx context.Context) *cobra.Command {
Long: `Displays information about the provided index, such as the number of chunks
and the total size of unique chunks that are not available in the seed. If a
store is provided, it'll also show how many of the chunks are present in the
store. If one or more seed indexes are provided, the number of chunks available
store. By providing a chunks info file, generated by 'inspect-chunks', additional
information will be shown, like the size of compressed chunks not in the seed nor cache.
If one or more seed indexes are provided, the number of chunks available
in the seeds are also shown. Use '-' to read the index from STDIN.`,
Example: ` desync info -s /path/to/local --format=json file.caibx`,
Args: cobra.ExactArgs(1),
Example: ` desync info -s /path/to/local --format=json file.caibx
desync info --seed http://192.168.1.1/rootfs2.caibx --chunks-info chunks.json --format=json rootfs.caibx`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runInfo(ctx, opt, args)
},
Expand All @@ -41,6 +47,7 @@ in the seeds are also shown. Use '-' to read the index from STDIN.`,
flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes")
flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache")
flags.StringVarP(&opt.printFormat, "format", "f", "json", "output format, plain or json")
flags.StringVar(&opt.chunksInfo, "chunks-info", "", "json file with additional chunks info")
addStoreOptions(&opt.cmdStoreOptions, flags)
return cmd
}
Expand All @@ -57,18 +64,37 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
}

var results struct {
Total int `json:"total"`
Unique int `json:"unique"`
InStore uint64 `json:"in-store"`
InSeed uint64 `json:"in-seed"`
InCache uint64 `json:"in-cache"`
NotInSeedNorCache uint64 `json:"not-in-seed-nor-cache"`
Size uint64 `json:"size"`
SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"`
SizeNotInSeedNorCache uint64 `json:"dedup-size-not-in-seed-nor-cache"`
ChunkSizeMin uint64 `json:"chunk-size-min"`
ChunkSizeAvg uint64 `json:"chunk-size-avg"`
ChunkSizeMax uint64 `json:"chunk-size-max"`
Total int `json:"total"`
Unique int `json:"unique"`
InStore uint64 `json:"in-store"`
InSeed uint64 `json:"in-seed"`
InCache uint64 `json:"in-cache"`
NotInSeedNorCache uint64 `json:"not-in-seed-nor-cache"`
Size uint64 `json:"size"`
SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"`
SizeNotInSeedNorCache uint64 `json:"dedup-size-not-in-seed-nor-cache"`
SizeNotInSeedNorCacheCompressed uint64 `json:"dedup-size-not-in-seed-nor-cache-compressed"`
ChunkSizeMin uint64 `json:"chunk-size-min"`
ChunkSizeAvg uint64 `json:"chunk-size-avg"`
ChunkSizeMax uint64 `json:"chunk-size-max"`
}

var estimateCompressedSize = opt.chunksInfo != ""
var chunksInfo []desync.ChunkAdditionalInfo
if opt.chunksInfo != "" {
b, err := os.ReadFile(opt.chunksInfo)
if err != nil {
return err
}
err = json.Unmarshal(b, &chunksInfo)
if err != nil {
return err
}
}

chunkIDMap := make(map[desync.ChunkID]desync.ChunkAdditionalInfo)
for _, info := range chunksInfo {
chunkIDMap[info.ID] = info
}

dedupedSeeds := make(map[desync.ChunkID]struct{})
Expand Down Expand Up @@ -145,6 +171,26 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
if !inSeed && !inCache {
results.NotInSeedNorCache++
results.SizeNotInSeedNorCache += chunk.Size
if estimateCompressedSize {
if chunkInfo, found := chunkIDMap[chunk.ID]; found {
results.SizeNotInSeedNorCacheCompressed += uint64(chunkInfo.CompressedSize)
if chunkInfo.CompressedSize == 0 {
// We don't have the compressed info for at least one chunk. We shouldn't print that info
// because it would not be accurate.
estimateCompressedSize = false
results.SizeNotInSeedNorCacheCompressed = 0
}
if chunkInfo.UncompressedSize != chunk.Size {
return fmt.Errorf("the chunks info file has an unexpected size for the chunk %s: %d instead of %d",
chunk.ID, chunkInfo.UncompressedSize, chunk.Size)
}
} else {
// If the provided chunks info file is missing some chunks we stop estimating the size.
// Otherwise, the shown value at the end could end up being wrong.
estimateCompressedSize = false
results.SizeNotInSeedNorCacheCompressed = 0
}
}
}
}
results.Unique = len(deduped)
Expand Down Expand Up @@ -191,6 +237,7 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
fmt.Println("Chunks in seed:", results.InSeed)
fmt.Println("Chunks in cache:", results.InCache)
fmt.Println("Chunks not in seed nor cache:", results.NotInSeedNorCache)
fmt.Println("Compressed chunks not in seed nor cache:", results.SizeNotInSeedNorCacheCompressed)
fmt.Println("Chunk size min:", results.ChunkSizeMin)
fmt.Println("Chunk size avg:", results.ChunkSizeAvg)
fmt.Println("Chunk size max:", results.ChunkSizeMax)
Expand Down
26 changes: 23 additions & 3 deletions cmd/desync/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/stretchr/testify/require"
)


func TestInfoCommand(t *testing.T) {
for _, test := range []struct {
name string
Expand All @@ -29,6 +28,7 @@ func TestInfoCommand(t *testing.T) {
"size": 2097152,
"dedup-size-not-in-seed": 1114112,
"dedup-size-not-in-seed-nor-cache": 1114112,
"dedup-size-not-in-seed-nor-cache-compressed": 0,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
Expand All @@ -45,12 +45,13 @@ func TestInfoCommand(t *testing.T) {
"size": 2097152,
"dedup-size-not-in-seed": 80029,
"dedup-size-not-in-seed-nor-cache": 80029,
"dedup-size-not-in-seed-nor-cache-compressed": 0,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
}`)},
{"info command with seed and cache",
[]string{"-s", "testdata/blob2.store", "--seed", "testdata/blob1.caibx", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"},
[]string{"-s", "testdata/blob2.store", "--seed", "testdata/blob1.caibx", "--cache", "testdata/blob2.cache", "--chunks-info", "testdata/blob2_chunks_info.json", "testdata/blob2.caibx"},
[]byte(`{
"total": 161,
"unique": 131,
Expand All @@ -61,12 +62,13 @@ func TestInfoCommand(t *testing.T) {
"size": 2097152,
"dedup-size-not-in-seed": 80029,
"dedup-size-not-in-seed-nor-cache": 80029,
"dedup-size-not-in-seed-nor-cache-compressed": 76000,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
}`)},
{"info command with cache",
[]string{"-s", "testdata/blob2.store", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"},
[]string{"-s", "testdata/blob2.store", "--cache", "testdata/blob2.cache", "--chunks-info", "testdata/blob2_chunks_info.json", "testdata/blob2.caibx"},
[]byte(`{
"total": 161,
"unique": 131,
Expand All @@ -77,6 +79,24 @@ func TestInfoCommand(t *testing.T) {
"size": 2097152,
"dedup-size-not-in-seed": 1114112,
"dedup-size-not-in-seed-nor-cache": 853943,
"dedup-size-not-in-seed-nor-cache-compressed": 818145,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
}`)},
{"info command with chunks info that doesn't have the compressed size for all chunk",
[]string{"-s", "testdata/blob2.store", "--chunks-info", "testdata/blob2_chunks_info_missing.json", "testdata/blob2.caibx"},
[]byte(`{
"total": 161,
"unique": 131,
"in-store": 131,
"in-seed": 0,
"in-cache": 0,
"not-in-seed-nor-cache": 131,
"size": 2097152,
"dedup-size-not-in-seed": 1114112,
"dedup-size-not-in-seed-nor-cache": 1114112,
"dedup-size-not-in-seed-nor-cache-compressed": 0,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
Expand Down
106 changes: 106 additions & 0 deletions cmd/desync/inspectchunks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package main

import (
"context"
"fmt"
"github.com/folbricht/desync"
"github.com/spf13/cobra"
"io"
"os"
)

type inspectChunksOptions struct {
cmdStoreOptions
store string
}

func newinspectChunksCommand(ctx context.Context) *cobra.Command {
var opt inspectChunksOptions

cmd := &cobra.Command{
Use: "inspect-chunks <index> [<output>]",
Short: "Inspect chunks from an index and an optional local store",
Long: `Prints a detailed JSON with information about chunks stored in an index file.
By using the '--store' option to provide a local store, the generated JSON will include, if
available, the chunks compressed size info from that particular store.`,
Example: ` desync inspect-chunks file.caibx
desync inspect-chunks --store /mnt/store file.caibx inspect_result.json`,
Args: cobra.RangeArgs(1, 2),
RunE: func(cmd *cobra.Command, args []string) error {
return runInspectChunks(ctx, opt, args)
},
SilenceUsage: true,
}
flags := cmd.Flags()
flags.StringVarP(&opt.store, "store", "s", "", "local source store")
addStoreOptions(&opt.cmdStoreOptions, flags)
return cmd
}

func runInspectChunks(ctx context.Context, opt inspectChunksOptions, args []string) error {
if err := opt.cmdStoreOptions.validate(); err != nil {
return err
}

var (
outFile io.Writer
err error
)
if len(args) == 2 {
outFileName := args[1]
outFile, err = os.Create(outFileName)
if err != nil {
return err
}
} else {
outFile = stdout
}

// Read the input
c, err := readCaibxFile(args[0], opt.cmdStoreOptions)
if err != nil {
return err
}

var (
chunksInfo []desync.ChunkAdditionalInfo
s desync.LocalStore
)

if opt.store != "" {
sr, err := storeFromLocation(opt.store, opt.cmdStoreOptions)
if err != nil {
return err
}

// We expect a local store, it is an error to provide something different
var ok bool
s, ok = sr.(desync.LocalStore)

if !ok {
return fmt.Errorf("'%s' is not a local store", opt.store)
}
}

for _, chunk := range c.Chunks {
var size int64 = 0
// Get the compressed size only if the store actually has compressed chunks
if opt.store != "" && !s.Opt.Uncompressed {
size, _ = s.GetChunkSize(chunk.ID)
}

chunksInfo = append(chunksInfo, desync.ChunkAdditionalInfo{
ID: chunk.ID,
UncompressedSize: chunk.Size,
CompressedSize: size,
})
// See if we're meant to stop
select {
case <-ctx.Done():
return nil
default:
}
}

return printJSON(outFile, chunksInfo)
}
58 changes: 58 additions & 0 deletions cmd/desync/inspectchunks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package main

import (
"bytes"
"context"
"encoding/json"
"io/ioutil"
"os"
"testing"

"github.com/folbricht/desync"
"github.com/stretchr/testify/require"
)

func TestInspectChunksCommand(t *testing.T) {
for _, test := range []struct {
name string
args []string
expectedOutputJSON string
}{
{"inspect the chunks info with a local store",
[]string{"-s", "testdata/blob2.store", "testdata/blob2.caibx"},
"testdata/blob2_chunks_info.json",
},
{"run inspect with a seed that doesn't have all the compressed chunks",
[]string{"-s", "testdata/blob2.cache", "testdata/blob2.caibx"},
"testdata/blob2_chunks_info_missing.json",
},
{"inspect the chunks info without any stores",
[]string{"testdata/blob2.caibx"},
"testdata/blob2_chunks_info_no_store.json",
},
} {
t.Run(test.name, func(t *testing.T) {
var exp []desync.ChunkAdditionalInfo
be, err := os.ReadFile(test.expectedOutputJSON)
require.NoError(t, err)
err = json.Unmarshal(be, &exp)
require.NoError(t, err)

cmd := newinspectChunksCommand(context.Background())
cmd.SetArgs(test.args)
b := new(bytes.Buffer)

// Redirect the command's output
stdout = b
cmd.SetOutput(ioutil.Discard)
_, err = cmd.ExecuteC()
require.NoError(t, err)

// Decode the output and compare to what's expected
var got []desync.ChunkAdditionalInfo
err = json.Unmarshal(b.Bytes(), &got)
require.NoError(t, err)
require.Equal(t, exp, got)
})
}
}
1 change: 1 addition & 0 deletions cmd/desync/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func main() {
newChopCommand(ctx),
newChunkCommand(ctx),
newInfoCommand(ctx),
newinspectChunksCommand(ctx),
newListCommand(ctx),
newMountIndexCommand(ctx),
newPruneCommand(ctx),
Expand Down
Loading

0 comments on commit ba044f9

Please sign in to comment.