Skip to content

Commit

Permalink
Implement zstd compression
Browse files Browse the repository at this point in the history
Zstd compression can be enabled by providing `-zstd` or `-zstd-level <level>`
to `init`, `add`, or `backup`. With `-zstd` the compression level will be
`default`, and with `-zstd-level` the level can be any of `fastest`, `default`,
`better`, or `best`.
  • Loading branch information
gilbertchen committed Mar 27, 2023
1 parent 9f27604 commit 53b0f3f
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 5 deletions.
54 changes: 52 additions & 2 deletions duplicacy/duplicacy_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,15 +371,15 @@ func configRepository(context *cli.Context, init bool) {
} else if existingConfig.CompressionLevel != 100 {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "This storage is configured with an invalid compression level %d", existingConfig.CompressionLevel)
return
} else if existingConfig.CompressionLevel != duplicacy.DEFAULT_COMPRESSION_LEVEL {
duplicacy.LOG_INFO("STORAGE_COMPRESSION", "Compression level: %d", existingConfig.CompressionLevel)
}

// Don't print config in the background mode
if !duplicacy.RunInBackground {
existingConfig.Print()
}
} else {
compressionLevel := 100

averageChunkSize := duplicacy.AtoSize(context.String("chunk-size"))
if averageChunkSize == 0 {
fmt.Fprintf(context.App.Writer, "Invalid average chunk size: %s.\n\n", context.String("chunk-size"))
Expand Down Expand Up @@ -487,6 +487,18 @@ func configRepository(context *cli.Context, init bool) {
}
}

compressionLevel := 100
zstdLevel := context.String("zstd-level")
if zstdLevel != "" {
if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found {
compressionLevel = level
} else {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel)
}
} else if context.Bool("zstd") {
compressionLevel = duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT
}

duplicacy.ConfigStorage(storage, iterations, compressionLevel, averageChunkSize, maximumChunkSize,
minimumChunkSize, storagePassword, otherConfig, bitCopy, context.String("key"), dataShards, parityShards)
}
Expand Down Expand Up @@ -786,6 +798,17 @@ func backupRepository(context *cli.Context) {
backupManager.SetupSnapshotCache(preference.Name)
backupManager.SetDryRun(dryRun)

zstdLevel := context.String("zstd-level")
if zstdLevel != "" {
if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found {
backupManager.SetCompressionLevel(level)
} else {
duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel)
}
} else if context.Bool("zstd") {
backupManager.SetCompressionLevel(duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT)
}

metadataChunkSize := context.Int("metadata-chunk-size")
maximumInMemoryEntries := context.Int("max-in-memory-entries")
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
Expand Down Expand Up @@ -1428,6 +1451,15 @@ func main() {
Usage: "the minimum size of chunks (defaults to chunk-size/4)",
Argument: "<size>",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.IntFlag{
Name: "iterations",
Usage: "the number of iterations used in storage key derivation (default is 16384)",
Expand Down Expand Up @@ -1495,6 +1527,15 @@ func main() {
Name: "dry-run",
Usage: "dry run for testing, don't backup anything. Use with -stats and -d",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.BoolFlag{
Name: "vss",
Usage: "enable the Volume Shadow Copy service (Windows and macOS using APFS only)",
Expand Down Expand Up @@ -1938,6 +1979,15 @@ func main() {
Usage: "the minimum size of chunks (default is chunk-size/4)",
Argument: "<size>",
},
cli.StringFlag{
Name: "zstd-level",
Usage: "set zstd compression level (fast, default, better, or best)",
Argument: "<level>",
},
cli.BoolFlag{
Name: "zstd",
Usage: "short for -zstd default",
},
cli.IntFlag{
Name: "iterations",
Usage: "the number of iterations used in storage key derivation (default is 16384)",
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ require (
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/googleapis/gax-go/v2 v2.0.5 // indirect
github.com/jmespath/go-jmespath v0.3.0 // indirect
github.com/klauspost/compress v1.16.3 // indirect
github.com/klauspost/cpuid v1.3.1 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/kr/text v0.2.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCV
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s=
github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=
Expand Down
6 changes: 6 additions & 0 deletions src/duplicacy_backupmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ func (manager *BackupManager) SetDryRun(dryRun bool) {
manager.config.dryRun = dryRun
}

func (manager *BackupManager) SetCompressionLevel(level int) {
manager.config.CompressionLevel = level
}

// CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to
// identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the
// master key which can be nil if encryption is not enabled.
Expand Down Expand Up @@ -138,6 +142,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta

LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag)

manager.config.PrintCompressionLevel()

if manager.config.DataShards != 0 && manager.config.ParityShards != 0 {
LOG_INFO("BACKUP_ERASURECODING", "Erasure coding is enabled with %d data shards and %d parity shards",
manager.config.DataShards, manager.config.ParityShards)
Expand Down
52 changes: 51 additions & 1 deletion src/duplicacy_chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/bkaradzic/go-lz4"
"github.com/minio/highwayhash"
"github.com/klauspost/reedsolomon"
"github.com/klauspost/compress/zstd"

// This is a fork of github.com/minio/highwayhash at 1.0.1 that computes incorrect hash on
// arm64 machines. We need this fork to be able to read the chunks created by Duplicacy
Expand Down Expand Up @@ -267,6 +268,38 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel)
deflater.Write(chunk.buffer.Bytes())
deflater.Close()
} else if chunk.config.CompressionLevel >= ZSTD_COMPRESSION_LEVEL_FASTEST && chunk.config.CompressionLevel <= ZSTD_COMPRESSION_LEVEL_BEST {
encryptedBuffer.Write([]byte("ZSTD"))

compressionLevel := zstd.SpeedDefault
if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_FASTEST {
compressionLevel = zstd.SpeedFastest
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BETTER {
compressionLevel = zstd.SpeedBetterCompression
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BEST {
compressionLevel = zstd.SpeedBestCompression
}

deflater, err := zstd.NewWriter(encryptedBuffer, zstd.WithEncoderLevel(compressionLevel))
if err != nil {
return err
}

// Make sure we have enough space in encryptedBuffer
availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes())
maximumLength := deflater.MaxEncodedSize(chunk.buffer.Len())
if availableLength < maximumLength {
encryptedBuffer.Grow(maximumLength - availableLength)
}
_, err = deflater.Write(chunk.buffer.Bytes())
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}

err = deflater.Close()
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}
} else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
encryptedBuffer.Write([]byte("LZ4 "))
// Make sure we have enough space in encryptedBuffer
Expand Down Expand Up @@ -361,7 +394,6 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
chunk.buffer.Write(header)

return nil

}

// This is to ensure compatibility with Vertical Backup, which still uses HMAC-SHA256 (instead of HMAC-BLAKE2) to
Expand Down Expand Up @@ -633,6 +665,24 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
chunk.hash = nil
return nil, rewriteNeeded
}

if len(compressed) > 4 && string(compressed[:4]) == "ZSTD" {
chunk.buffer.Reset()
chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey)
chunk.hash = nil

encryptedBuffer.Read(encryptedBuffer.Bytes()[:4])
inflater, err := zstd.NewReader(encryptedBuffer)
if err != nil {
return err, false
}
defer inflater.Close()
if _, err = io.Copy(chunk, inflater); err != nil {
return err, false
}
return nil, rewriteNeeded
}

inflater, err := zlib.NewReader(encryptedBuffer)
if err != nil {
return err, false
Expand Down
28 changes: 26 additions & 2 deletions src/duplicacy_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ var DEFAULT_KEY = []byte("duplicacy")
// standard zlib levels of -1 to 9.
var DEFAULT_COMPRESSION_LEVEL = 100

// zstd compression levels starting from 200
var ZSTD_COMPRESSION_LEVEL_FASTEST = 200
var ZSTD_COMPRESSION_LEVEL_DEFAULT = 201
var ZSTD_COMPRESSION_LEVEL_BETTER = 202
var ZSTD_COMPRESSION_LEVEL_BEST = 203

var ZSTD_COMPRESSION_LEVELS = map[string]int {
"fastest": ZSTD_COMPRESSION_LEVEL_FASTEST,
"default": ZSTD_COMPRESSION_LEVEL_DEFAULT,
"better": ZSTD_COMPRESSION_LEVEL_BETTER,
"best": ZSTD_COMPRESSION_LEVEL_BEST,
}

// The new banner of the config file (to differentiate from the old format where the salt and iterations are fixed)
var CONFIG_BANNER = "duplicacy\001"

Expand Down Expand Up @@ -202,6 +215,14 @@ func (config *Config) Print() {

}

func (config *Config) PrintCompressionLevel() {
for name, level := range ZSTD_COMPRESSION_LEVELS {
if level == config.CompressionLevel {
LOG_INFO("COMPRESSION_LEVEL", "Zstd compression is enabled (level: %s)", name)
}
}
}

func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int,
isEncrypted bool, copyFrom *Config, bitCopy bool) (config *Config) {

Expand Down Expand Up @@ -294,7 +315,10 @@ func (config *Config) PutChunk(chunk *Chunk) {
}

func (config *Config) NewKeyedHasher(key []byte) hash.Hash {
if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
// Early versions of Duplicacy used SHA256 as the hash function for chunk IDs at the time when
// only zlib compression was supported. Later SHA256 was replaced by Blake2b and LZ4 was used
// for compression (with compression level set to 100).
if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, err := blake2.New(&blake2.Config{Size: 32, Key: key})
if err != nil {
LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key)
Expand Down Expand Up @@ -339,7 +363,7 @@ func (hasher *DummyHasher) BlockSize() int {
func (config *Config) NewFileHasher() hash.Hash {
if SkipFileHash {
return &DummyHasher{}
} else if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
} else if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, _ := blake2.New(&blake2.Config{Size: 32})
return hasher
} else {
Expand Down

3 comments on commit 53b0f3f

@gilbertchen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Duplicacy Forum. There might be relevant details there:

https://forum.duplicacy.com/t/zstd-compression-implemented/7469/1

@gilbertchen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Duplicacy Forum. There might be relevant details there:

https://forum.duplicacy.com/t/cli-release-3-2-0-is-now-available/7952/1

@gilbertchen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Duplicacy Forum. There might be relevant details there:

https://forum.duplicacy.com/t/duplicacy-cli-3-2-2-release/8019/1

Please sign in to comment.