Skip to content

Commit

Permalink
AzCopy Release 10.10.0
Browse files Browse the repository at this point in the history
Merge pull request #1403 from Azure/dev
  • Loading branch information
nakulkar-msft authored Apr 14, 2021
2 parents 4caa643 + 4431d5b commit 067b2fc
Show file tree
Hide file tree
Showing 182 changed files with 1,541 additions and 559 deletions.
18 changes: 18 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@

# Change Log

## Version 10.10.0

### New features
1. Support sync for Local/Blob <-> Azure File.
1. Download to temporary file path (.azDownload-[jobID]-[name]) before renaming to the original path.
1. Support CPK by name and CPK by value.
1. Offer knob to disable application logging (Syslog/Windows Event Log).
1. Trust zonal DNS suffix for OAuth by default.
1. Added include-directory-stub flag for the copy command, to allow copying of blobs with metadata of `hdi_isfolder:true`.
1. Display more fields for the list command, please refer to the help message for example.
1. Provide environment variable to set request try timeout, to allow faster retries.

### Bug fixes
1. Improve job progress updating mechanism to improve scalability for larger jobs.
1. Time limit the container creation step, to avoid hanging and improve UX.
1. Set SMB info/permission again after file upload and copy, to fully preserve the integrity of the permission string and last-write-time.
1. Fixed module import problem for V10.

## Version 10.9.0

### New features
Expand Down
11 changes: 8 additions & 3 deletions azbfs/zc_policy_request_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ type RequestLogOptions struct {
// LogWarningIfTryOverThreshold logs a warning if a tried operation takes longer than the specified
// duration (-1=no logging; 0=default threshold).
LogWarningIfTryOverThreshold time.Duration

// SyslogDisabled is a flag to check if logging to Syslog/Windows-Event-Logger is enabled or not
// We by default print to Syslog/Windows-Event-Logger.
// If SyslogDisabled is not provided explicitly, the default value will be false.
SyslogDisabled bool
}

func (o RequestLogOptions) defaults() RequestLogOptions {
Expand Down Expand Up @@ -64,18 +69,18 @@ func NewRequestLogPolicyFactory_Deprecated(o RequestLogOptions) pipeline.Factory
// If the response took too long, we'll upgrade to warning.
if o.LogWarningIfTryOverThreshold > 0 && tryDuration > o.LogWarningIfTryOverThreshold {
// Log a warning if the try duration exceeded the specified threshold
logLevel, forceLog = pipeline.LogWarning, true
logLevel, forceLog = pipeline.LogWarning, !o.SyslogDisabled
}

if err == nil { // We got a response from the service
sc := response.Response().StatusCode
if ((sc >= 400 && sc <= 499) && sc != http.StatusNotFound && sc != http.StatusConflict && sc != http.StatusPreconditionFailed && sc != http.StatusRequestedRangeNotSatisfiable) || (sc >= 500 && sc <= 599) {
logLevel, forceLog = pipeline.LogError, true // Promote to Error any 4xx (except those listed is an error) or any 5xx
logLevel, forceLog = pipeline.LogError, !o.SyslogDisabled // Promote to Error any 4xx (except those listed is an error) or any 5xx
} else {
// For other status codes, we leave the level as is.
}
} else { // This error did not get an HTTP response from the service; upgrade the severity to Error
logLevel, forceLog = pipeline.LogError, true
logLevel, forceLog = pipeline.LogError, !o.SyslogDisabled
}

if shouldLog := po.ShouldLog(logLevel); forceLog || shouldLog {
Expand Down
2 changes: 1 addition & 1 deletion azbfs/zt_retry_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"crypto/rand"
"errors"
"fmt"
"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
"io"
"net"
"net/http"
Expand Down
2 changes: 1 addition & 1 deletion azbfs/zt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"testing"
"time"

"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
chk "gopkg.in/check.v1"
)

Expand Down
2 changes: 1 addition & 1 deletion azbfs/zt_url_directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package azbfs_test

import (
"context"
"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
chk "gopkg.in/check.v1"
"net/http"
)
Expand Down
2 changes: 1 addition & 1 deletion azbfs/zt_url_file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"net/url"
//"strings"

"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
chk "gopkg.in/check.v1" // go get gopkg.in/check.v1
"io/ioutil"
"net/http"
Expand Down
2 changes: 1 addition & 1 deletion azbfs/zt_url_filesystem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"context"
"os"

"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
chk "gopkg.in/check.v1"
"net/http"
"net/url"
Expand Down
4 changes: 2 additions & 2 deletions cmd/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
"strconv"
"strings"

"github.com/Azure/azure-storage-azcopy/azbfs"
"github.com/Azure/azure-storage-azcopy/common"
"github.com/Azure/azure-storage-azcopy/v10/azbfs"
"github.com/Azure/azure-storage-azcopy/v10/common"
"github.com/Azure/azure-storage-blob-go/azblob"
"github.com/Azure/azure-storage-file-go/azfile"
"github.com/spf13/cobra"
Expand Down
2 changes: 1 addition & 1 deletion cmd/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"errors"
"fmt"

"github.com/Azure/azure-storage-azcopy/common"
"github.com/Azure/azure-storage-azcopy/v10/common"
"github.com/spf13/cobra"
)

Expand Down
67 changes: 61 additions & 6 deletions cmd/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ import (
"github.com/Azure/azure-storage-blob-go/azblob"
"github.com/spf13/cobra"

"github.com/Azure/azure-storage-azcopy/common"
"github.com/Azure/azure-storage-azcopy/ste"
"github.com/Azure/azure-storage-azcopy/v10/common"
"github.com/Azure/azure-storage-azcopy/v10/ste"
)

const pipingUploadParallelism = 5
Expand Down Expand Up @@ -146,6 +146,15 @@ type rawCopyCmdArgs struct {

// whether to include blobs that have metadata 'hdi_isfolder = true'
includeDirectoryStubs bool

// Optional flag to encrypt user data with user provided key.
// Key is provide in the REST request itself
// Provided key (EncryptionKey and EncryptionKeySHA256) and its hash will be fetched from environment variables
// Set EncryptionAlgorithm = "AES256" by default.
cpkInfo bool
// Key is present in AzureKeyVault and Azure KeyVault is linked with storage account.
// Provided key name will be fetched from Azure Key Vault and will be used to encrypt the data
cpkScopeInfo string
}

func (raw *rawCopyCmdArgs) parsePatterns(pattern string) (cookedPatterns []string) {
Expand Down Expand Up @@ -525,6 +534,38 @@ func (raw rawCopyCmdArgs) cook() (cookedCopyCmdArgs, error) {
}
}

// Setting CPK-N
cpkOptions := common.CpkOptions{}
// Setting CPK-N
if raw.cpkScopeInfo != "" {
if raw.cpkInfo {
return cooked, errors.New("cannot use both cpk-by-name and cpk-by-value at the same time")
}
cpkOptions.CpkScopeInfo = raw.cpkScopeInfo
}

// Setting CPK-V
// Get the key (EncryptionKey and EncryptionKeySHA256) value from environment variables when required.
cpkOptions.CpkInfo = raw.cpkInfo

if cpkOptions.CpkScopeInfo != "" || cpkOptions.CpkInfo {
// We only support transfer from source encrypted by user key when user wishes to download.
// Due to service limitation, S2S transfer is not supported for source encrypted by user key.
if cooked.fromTo.IsDownload() {
glcm.Info("Client Provided Key (CPK) for encryption/decryption is provided for download scenario. " +
"Assuming source is encrypted.")
cpkOptions.IsSourceEncrypted = true
}

// TODO: Remove these warnings once service starts supporting it
if cooked.blockBlobTier != common.EBlockBlobTier.None() || cooked.pageBlobTier != common.EPageBlobTier.None() {
glcm.Info("Tier is provided by user explicitly. Ignoring it because Azure Service currently does" +
" not support setting tier when client provided keys are involved.")
}
}

cooked.cpkOptions = cpkOptions

// Make sure the given input is the one of the enums given by the blob SDK
err = cooked.deleteSnapshotsOption.Parse(raw.deleteSnapshotsOption)
if err != nil {
Expand Down Expand Up @@ -1012,6 +1053,8 @@ type cookedCopyCmdArgs struct {

// whether to include blobs that have metadata 'hdi_isfolder = true'
includeDirectoryStubs bool

cpkOptions common.CpkOptions
}

func (cca *cookedCopyCmdArgs) isRedirection() bool {
Expand Down Expand Up @@ -1069,7 +1112,7 @@ func (cca *cookedCopyCmdArgs) processRedirectionDownload(blobResource common.Res
// The isPublic flag is useful in S2S transfers but doesn't much matter for download. Fortunately, no S2S happens here.
// This means that if there's auth, there's auth. We're happy and can move on.
// getCredentialInfoForLocation also populates oauth token fields... so, it's very easy.
credInfo, _, err := getCredentialInfoForLocation(ctx, common.ELocation.Blob(), blobResource.Value, blobResource.SAS, true)
credInfo, _, err := getCredentialInfoForLocation(ctx, common.ELocation.Blob(), blobResource.Value, blobResource.SAS, true, cca.cpkOptions)

if err != nil {
return fmt.Errorf("fatal: cannot find auth on source blob URL: %s", err.Error())
Expand All @@ -1089,7 +1132,11 @@ func (cca *cookedCopyCmdArgs) processRedirectionDownload(blobResource common.Res

// step 3: start download
blobURL := azblob.NewBlobURL(*u, p)
blobStream, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, azblob.ClientProvidedKeyOptions{})
clientProvidedKey := azblob.ClientProvidedKeyOptions{}
if cca.cpkOptions.IsSourceEncrypted {
clientProvidedKey = common.GetClientProvidedKey(cca.cpkOptions)
}
blobStream, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, clientProvidedKey)
if err != nil {
return fmt.Errorf("fatal: cannot download blob due to error: %s", err.Error())
}
Expand All @@ -1115,7 +1162,7 @@ func (cca *cookedCopyCmdArgs) processRedirectionUpload(blobResource common.Resou
}

// getCredentialInfoForLocation populates oauth token fields... so, it's very easy.
credInfo, _, err := getCredentialInfoForLocation(ctx, common.ELocation.Blob(), blobResource.Value, blobResource.SAS, false)
credInfo, _, err := getCredentialInfoForLocation(ctx, common.ELocation.Blob(), blobResource.Value, blobResource.SAS, false, cca.cpkOptions)

if err != nil {
return fmt.Errorf("fatal: cannot find auth on source blob URL: %s", err.Error())
Expand Down Expand Up @@ -1161,7 +1208,7 @@ func (cca *cookedCopyCmdArgs) processCopyJobPartOrders() (err error) {
destination: cca.destination.Value,
sourceSAS: cca.source.SAS,
destinationSAS: cca.destination.SAS,
}); err != nil {
}, cca.cpkOptions); err != nil {
return err
}

Expand Down Expand Up @@ -1707,6 +1754,7 @@ func init() {
cpCmd.PersistentFlags().StringVar(&raw.listOfVersionIDs, "list-of-versions", "", "Specifies a file where each version id is listed on a separate line. Ensure that the source must point to a single blob and all the version ids specified in the file using this flag must belong to the source blob only. AzCopy will download the specified versions in the destination folder provided.")
cpCmd.PersistentFlags().StringVar(&raw.blobTags, "blob-tags", "", "Set tags on blobs to categorize data in your storage account")
cpCmd.PersistentFlags().BoolVar(&raw.s2sPreserveBlobTags, "s2s-preserve-blob-tags", false, "Preserve index tags during service to service transfer from one blob storage to another")
cpCmd.PersistentFlags().BoolVar(&raw.includeDirectoryStubs, "include-directory-stub", false, "False by default to ignore directory stubs. Directory stubs are blobs with metadata 'hdi_isfolder:true'. Setting value to true will preserve directory stubs during transfers.")
// s2sGetPropertiesInBackend is an optional flag for controlling whether S3 object's or Azure file's full properties are get during enumerating in frontend or
// right before transferring in ste(backend).
// The traditional behavior of all existing enumerator is to get full properties during enumerating(more specifically listing),
Expand All @@ -1717,6 +1765,13 @@ func init() {
// The usage of this hidden flag is to provide fallback to traditional behavior, when service supports returning full properties during list.
cpCmd.PersistentFlags().BoolVar(&raw.s2sGetPropertiesInBackend, "s2s-get-properties-in-backend", true, "get S3 objects' or Azure files' properties in backend, if properties need to be accessed. Properties need to be accessed if s2s-preserve-properties is true, and in certain other cases where we need the properties for modification time checks or MD5 checks")

// Public Documentation: https://docs.microsoft.com/en-us/azure/storage/blobs/encryption-customer-provided-keys
// Clients making requests against Azure Blob storage have the option to provide an encryption key on a per-request basis.
// Including the encryption key on the request provides granular control over encryption settings for Blob storage operations.
// Customer-provided keys can be stored in Azure Key Vault or in another key store linked to storage account.
cpCmd.PersistentFlags().StringVar(&raw.cpkScopeInfo, "cpk-by-name", "", "Client provided key by name let clients making requests against Azure Blob storage an option to provide an encryption key on a per-request basis. Provided key name will be fetched from Azure Key Vault and will be used to encrypt the data")
cpCmd.PersistentFlags().BoolVar(&raw.cpkInfo, "cpk-by-value", false, "Client provided key by name let clients making requests against Azure Blob storage an option to provide an encryption key on a per-request basis. Provided key and its hash will be fetched from environment variables")

// permanently hidden
// Hide the list-of-files flag since it is implemented only for Storage Explorer.
cpCmd.PersistentFlags().MarkHidden("list-of-files")
Expand Down
23 changes: 16 additions & 7 deletions cmd/copyEnumeratorHelper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package cmd
import (
"fmt"
"github.com/Azure/azure-pipeline-go/pipeline"
"github.com/Azure/azure-storage-azcopy/ste"
"github.com/Azure/azure-storage-azcopy/v10/ste"
"math/rand"
"strings"

"github.com/Azure/azure-storage-azcopy/common"
"github.com/Azure/azure-storage-azcopy/v10/common"
)

var enumerationParallelism = 1
Expand All @@ -22,8 +22,8 @@ func addTransfer(e *common.CopyJobPartOrderRequest, transfer common.CopyTransfer
// dispatch the transfers once the number reaches NumOfFilesPerDispatchJobPart
// we do this so that in the case of large transfer, the transfer engine can get started
// while the frontend is still gathering more transfers
if len(e.Transfers) == NumOfFilesPerDispatchJobPart {
shuffleTransfers(e.Transfers)
if len(e.Transfers.List) == NumOfFilesPerDispatchJobPart {
shuffleTransfers(e.Transfers.List)
resp := common.CopyJobPartOrderResponse{}

Rpc(common.ERpcCmd.CopyJobPartOrder(), (*common.CopyJobPartOrderRequest)(e), &resp)
Expand All @@ -35,13 +35,22 @@ func addTransfer(e *common.CopyJobPartOrderRequest, transfer common.CopyTransfer
if e.PartNum == 0 {
cca.waitUntilJobCompletion(false)
}
e.Transfers = []common.CopyTransfer{}
e.Transfers = common.Transfers{}
e.PartNum++
}

// only append the transfer after we've checked and dispatched a part
// so that there is at least one transfer for the final part
e.Transfers = append(e.Transfers, transfer)
{
//Should this block be a function?
e.Transfers.List = append(e.Transfers.List, transfer)
e.Transfers.TotalSizeInBytes += uint64(transfer.SourceSize)
if transfer.EntityType == common.EEntityType.File() {
e.Transfers.FileTransferCount++
} else {
e.Transfers.FolderTransferCount++
}
}

return nil
}
Expand All @@ -56,7 +65,7 @@ func shuffleTransfers(transfers []common.CopyTransfer) {
// we need to send a last part with isFinalPart set to true, along with whatever transfers that still haven't been sent
// dispatchFinalPart sends a last part with isFinalPart set to true, along with whatever transfers that still haven't been sent.
func dispatchFinalPart(e *common.CopyJobPartOrderRequest, cca *cookedCopyCmdArgs) error {
shuffleTransfers(e.Transfers)
shuffleTransfers(e.Transfers.List)
e.IsFinalPart = true
var resp common.CopyJobPartOrderResponse
Rpc(common.ERpcCmd.CopyJobPartOrder(), (*common.CopyJobPartOrderRequest)(e), &resp)
Expand Down
6 changes: 3 additions & 3 deletions cmd/copyEnumeratorHelper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
package cmd

import (
"github.com/Azure/azure-storage-azcopy/common"
"github.com/Azure/azure-storage-azcopy/v10/common"
chk "gopkg.in/check.v1"
)

Expand Down Expand Up @@ -58,6 +58,6 @@ func (s *copyEnumeratorHelperTestSuite) TestAddTransferPathRootsTrimmed(c *chk.C

// assert
c.Assert(err, chk.IsNil)
c.Assert(request.Transfers[0].Source, chk.Equals, "c.txt")
c.Assert(request.Transfers[0].Destination, chk.Equals, "c.txt")
c.Assert(request.Transfers.List[0].Source, chk.Equals, "c.txt")
c.Assert(request.Transfers.List[0].Destination, chk.Equals, "c.txt")
}
Loading

0 comments on commit 067b2fc

Please sign in to comment.