From 82568528d8444ceefd9591dba35c6e132cdbb601 Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Thu, 12 May 2022 22:03:24 +0530 Subject: [PATCH 01/26] adding input watcher to jobs resume (#1773) --- cmd/jobsResume.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/jobsResume.go b/cmd/jobsResume.go index 7a84717d8..165aa84aa 100644 --- a/cmd/jobsResume.go +++ b/cmd/jobsResume.go @@ -188,6 +188,11 @@ func init() { return errors.New("this command requires jobId to be passed as argument") } resumeCmdArgs.jobID = args[0] + + glcm.EnableInputWatcher() + if cancelFromStdin { + glcm.EnableCancelFromStdIn() + } return nil }, Run: func(cmd *cobra.Command, args []string) { From aad8cad3e8e65c8194d2dffec29df1d9b64c41c1 Mon Sep 17 00:00:00 2001 From: Nitin Singla Date: Wed, 6 Apr 2022 18:41:37 +0530 Subject: [PATCH 02/26] Exported azcopyAppPathFolder variable. --- cmd/credentialUtil.go | 4 ++-- cmd/root.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go index 8ab2b6eee..f160a9651 100644 --- a/cmd/credentialUtil.go +++ b/cmd/credentialUtil.go @@ -58,11 +58,11 @@ const oauthLoginSessionCacheAccountName = "AzCopyOAuthTokenCache" // Note: Currently, only support to have TokenManager for one user mapping to one tenantID. func GetUserOAuthTokenManagerInstance() *common.UserOAuthTokenManager { once.Do(func() { - if azcopyAppPathFolder == "" { + if AzcopyAppPathFolder == "" { panic("invalid state, azcopyAppPathFolder should be initialized by root") } currentUserOAuthTokenManager = common.NewUserOAuthTokenManagerInstance(common.CredCacheOptions{ - DPAPIFilePath: azcopyAppPathFolder, + DPAPIFilePath: AzcopyAppPathFolder, KeyName: oauthLoginSessionCacheKeyName, ServiceName: oauthLoginSessionCacheServiceName, AccountName: oauthLoginSessionCacheAccountName, diff --git a/cmd/root.go b/cmd/root.go index f7fd4344f..589b83da1 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -40,7 +40,7 @@ import ( "github.com/spf13/cobra" ) -var azcopyAppPathFolder string +var AzcopyAppPathFolder string var azcopyLogPathFolder string var azcopyMaxFileAndSocketHandles int var outputFormatRaw string @@ -156,7 +156,7 @@ var glcmSwapOnce = &sync.Once{} // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute(azsAppPathFolder, logPathFolder string, jobPlanFolder string, maxFileAndSocketHandles int, jobID common.JobID) { - azcopyAppPathFolder = azsAppPathFolder + AzcopyAppPathFolder = azsAppPathFolder azcopyLogPathFolder = logPathFolder common.AzcopyJobPlanFolder = jobPlanFolder azcopyMaxFileAndSocketHandles = maxFileAndSocketHandles From cefe16f3f8e134bdc326a997c306a4629e606eb7 Mon Sep 17 00:00:00 2001 From: Nitin Singla Date: Wed, 6 Apr 2022 18:46:08 +0530 Subject: [PATCH 03/26] Fix for large file blocksize calculation. - This fix in respect to support 190TB files. As of now due to wrong calculation after 24T azcopy fails. --- ste/mgr-JobPartTransferMgr.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index cf3e24e5d..ee66c2057 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -345,7 +345,11 @@ func (jptm *jobPartTransferMgr) Info() TransferInfo { * we can have 4 blocks in core, waiting for a disk or n/w operation. Any higher block size would *sort of* * serialize n/w and disk operations, and is better avoided. */ - blockSize = sourceSize / common.MaxNumberOfBlocksPerBlob + if (sourceSize % common.MaxNumberOfBlocksPerBlob == 0) { + blockSize = sourceSize/common.MaxNumberOfBlocksPerBlob + } else { + blockSize = sourceSize/common.MaxNumberOfBlocksPerBlob +1 + } break } } From 1bc304d49819eb1f3b3690e62aebb1bbbde50bb6 Mon Sep 17 00:00:00 2001 From: Nitin Singla Date: Wed, 6 Apr 2022 18:47:53 +0530 Subject: [PATCH 04/26] Handle panic error when plan file is empty. --- jobsAdmin/JobsAdmin.go | 6 +++--- jobsAdmin/init.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jobsAdmin/JobsAdmin.go b/jobsAdmin/JobsAdmin.go index aa10e5489..87130391c 100644 --- a/jobsAdmin/JobsAdmin.go +++ b/jobsAdmin/JobsAdmin.go @@ -323,12 +323,12 @@ func (ja *jobsAdmin) SuccessfulBytesInActiveFiles() uint64 { func (ja *jobsAdmin) ResurrectJob(jobId common.JobID, sourceSAS string, destinationSAS string) bool { // Search the existing plan files for the PartPlans for the given jobId - // only the files which have JobId has prefix and DataSchemaVersion as Suffix + // only the files which are not empty and have JobId has prefix and DataSchemaVersion as Suffix // are include in the result files := func(prefix, ext string) []os.FileInfo { var files []os.FileInfo filepath.Walk(ja.planDir, func(path string, fileInfo os.FileInfo, _ error) error { - if !fileInfo.IsDir() && strings.HasPrefix(fileInfo.Name(), prefix) && strings.HasSuffix(fileInfo.Name(), ext) { + if !fileInfo.IsDir() && fileInfo.Size() != 0 && strings.HasPrefix(fileInfo.Name(), prefix) && strings.HasSuffix(fileInfo.Name(), ext) { files = append(files, fileInfo) } return nil @@ -365,7 +365,7 @@ func (ja *jobsAdmin) ResurrectJobParts() { files := func(ext string) []os.FileInfo { var files []os.FileInfo filepath.Walk(ja.planDir, func(path string, fileInfo os.FileInfo, _ error) error { - if !fileInfo.IsDir() && strings.HasSuffix(fileInfo.Name(), ext) { + if !fileInfo.IsDir() && fileInfo.Size() != 0 && strings.HasSuffix(fileInfo.Name(), ext) { files = append(files, fileInfo) } return nil diff --git a/jobsAdmin/init.go b/jobsAdmin/init.go index 195671bc2..390805a3c 100644 --- a/jobsAdmin/init.go +++ b/jobsAdmin/init.go @@ -776,7 +776,7 @@ func GetJobFromTo(r common.GetJobFromToRequest) common.GetJobFromToResponse { // Search the plan files in Azcopy folder and resurrect the Job. if !JobsAdmin.ResurrectJob(r.JobID, EMPTY_SAS_STRING, EMPTY_SAS_STRING) { return common.GetJobFromToResponse{ - ErrorMsg: fmt.Sprintf("no job with JobID %v exists", r.JobID), + ErrorMsg: fmt.Sprintf("Job with JobID %v does not exist or is invalid", r.JobID), } } jm, _ = JobsAdmin.JobMgr(r.JobID) From b5e2fd068093a0bd80e4f5c02bc7bc2e362a7216 Mon Sep 17 00:00:00 2001 From: "microsoft-github-policy-service[bot]" <77245923+microsoft-github-policy-service[bot]@users.noreply.github.com> Date: Wed, 18 May 2022 22:04:43 +0000 Subject: [PATCH 05/26] Microsoft mandatory file --- SECURITY.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..869fdfe2b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). + + From 82845b5f7ddc677d011db25f0560dafc83b6d77b Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Thu, 26 May 2022 11:14:03 +0530 Subject: [PATCH 06/26] adding time based flags to remove (#1787) --- cmd/remove.go | 2 ++ cmd/removeEnumerator.go | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/cmd/remove.go b/cmd/remove.go index e31191ede..0d28d4ab7 100644 --- a/cmd/remove.go +++ b/cmd/remove.go @@ -116,4 +116,6 @@ func init() { deleteCmd.PersistentFlags().BoolVar(&raw.dryrun, "dry-run", false, "Prints the path files that would be removed by the command. This flag does not trigger the removal of the files.") deleteCmd.PersistentFlags().StringVar(&raw.fromTo, "from-to", "", "Optionally specifies the source destination combination. For Example: BlobTrash, FileTrash, BlobFSTrash") deleteCmd.PersistentFlags().StringVar(&raw.permanentDeleteOption, "permanent-delete", "none", "This is a preview feature that PERMANENTLY deletes soft-deleted snapshots/versions. Possible values include 'snapshots', 'versions', 'snapshotsandversions', 'none'.") + deleteCmd.PersistentFlags().StringVar(&raw.includeBefore, common.IncludeBeforeFlagName, "", "Include only those files modified before or on the given date/time. The value should be in ISO8601 format. If no timezone is specified, the value is assumed to be in the local timezone of the machine running AzCopy. E.g. '2020-08-19T15:04:00Z' for a UTC time, or '2020-08-19' for midnight (00:00) in the local timezone. As of AzCopy 10.7, this flag applies only to files, not folders, so folder properties won't be copied when using this flag with --preserve-smb-info or --preserve-smb-permissions.") + deleteCmd.PersistentFlags().StringVar(&raw.includeAfter, common.IncludeAfterFlagName, "", "Include only those files modified on or after the given date/time. The value should be in ISO8601 format. If no timezone is specified, the value is assumed to be in the local timezone of the machine running AzCopy. E.g. '2020-08-19T15:04:00Z' for a UTC time, or '2020-08-19' for midnight (00:00) in the local timezone. As of AzCopy 10.5, this flag applies only to files, not folders, so folder properties won't be copied when using this flag with --preserve-smb-info or --preserve-smb-permissions.") } diff --git a/cmd/removeEnumerator.go b/cmd/removeEnumerator.go index c05fe1a4e..29505120c 100644 --- a/cmd/removeEnumerator.go +++ b/cmd/removeEnumerator.go @@ -66,6 +66,13 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er filters := append(includeFilters, excludeFilters...) filters = append(filters, excludePathFilters...) filters = append(filters, includeSoftDelete...) + if cca.IncludeBefore != nil { + filters = append(filters, &IncludeBeforeDateFilter{Threshold: *cca.IncludeBefore}) + } + + if cca.IncludeAfter != nil { + filters = append(filters, &IncludeAfterDateFilter{Threshold: *cca.IncludeAfter}) + } // decide our folder transfer strategy // (Must enumerate folders when deleting from a folder-aware location. Can't do folder deletion just based on file From 3652e2336afd59f312eaa1b3ca719e87589959a7 Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Thu, 26 May 2022 23:28:18 +0530 Subject: [PATCH 07/26] output-level-tiverma (#1772) * output-level=default;essential;quiet * default value of flag * solving conversations * adding quiet mode (output-level) to rm and sync * adding quiet mode (output-level) to rm and sync * adding dry-run vs output-level validation to copy, rm, sync * move setting outputVerbosity to root.go * removing redundant code from Prompt() * cleaning code in processOutputMessage() --- cmd/copy.go | 11 +++++++++++ cmd/root.go | 9 +++++++++ cmd/sync.go | 12 ++++++++++++ cmd/zt_interceptors_for_test.go | 3 +++ common/fe-ste-models.go | 24 ++++++++++++++++++++++-- common/lifecyleMgr.go | 26 ++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 2 deletions(-) diff --git a/cmd/copy.go b/cmd/copy.go index e57628473..a38580a35 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -853,6 +853,17 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { cooked.dryrunMode = raw.dryrun + if azcopyOutputVerbosity == common.EOutputVerbosity.Quiet() || azcopyOutputVerbosity == common.EOutputVerbosity.Essential() { + if cooked.ForceWrite == common.EOverwriteOption.Prompt() { + err = fmt.Errorf("cannot set output level '%s' with overwrite option '%s'", azcopyOutputVerbosity.String(), cooked.ForceWrite.String()) + } else if cooked.dryrunMode { + err = fmt.Errorf("cannot set output level '%s' with dry-run mode", azcopyOutputVerbosity.String()) + } + } + if err != nil { + return cooked, err + } + return cooked, nil } diff --git a/cmd/root.go b/cmd/root.go index 589b83da1..bbe6f676f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -44,8 +44,10 @@ var AzcopyAppPathFolder string var azcopyLogPathFolder string var azcopyMaxFileAndSocketHandles int var outputFormatRaw string +var outputVerbosityRaw string var cancelFromStdin bool var azcopyOutputFormat common.OutputFormat +var azcopyOutputVerbosity common.OutputVerbosity var cmdLineCapMegaBitsPerSecond float64 var azcopyAwaitContinue bool var azcopyAwaitAllowOpenFiles bool @@ -87,6 +89,12 @@ var rootCmd = &cobra.Command{ return err } + err = azcopyOutputVerbosity.Parse(outputVerbosityRaw) + glcm.SetOutputVerbosity(azcopyOutputVerbosity) + if err != nil { + return err + } + glcm.SetForceLogging() // warn Windows users re quoting (since our docs all use single quotes, but CMD needs double) @@ -185,6 +193,7 @@ func init() { rootCmd.PersistentFlags().Float64Var(&cmdLineCapMegaBitsPerSecond, "cap-mbps", 0, "Caps the transfer rate, in megabits per second. Moment-by-moment throughput might vary slightly from the cap. If this option is set to zero, or it is omitted, the throughput isn't capped.") rootCmd.PersistentFlags().StringVar(&outputFormatRaw, "output-type", "text", "Format of the command's output. The choices include: text, json. The default value is 'text'.") + rootCmd.PersistentFlags().StringVar(&outputVerbosityRaw, "output-level", "default", "Define the output verbosity. Available levels: essential, quiet.") rootCmd.PersistentFlags().StringVar(&cmdLineExtraSuffixesAAD, trustedSuffixesNameAAD, "", "Specifies additional domain suffixes where Azure Active Directory login tokens may be sent. The default is '"+ trustedSuffixesAAD+"'. Any listed here are added to the default. For security, you should only put Microsoft Azure domains here. Separate multiple entries with semi-colons.") diff --git a/cmd/sync.go b/cmd/sync.go index a37150ce2..a210ea7a0 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -329,6 +329,17 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { cooked.dryrunMode = raw.dryrun + if azcopyOutputVerbosity == common.EOutputVerbosity.Quiet() || azcopyOutputVerbosity == common.EOutputVerbosity.Essential() { + if cooked.deleteDestination == common.EDeleteDestination.Prompt() { + err = fmt.Errorf("cannot set output level '%s' with delete-destination option '%s'", azcopyOutputVerbosity.String(), cooked.deleteDestination.String()) + } else if cooked.dryrunMode { + err = fmt.Errorf("cannot set output level '%s' with dry-run mode", azcopyOutputVerbosity.String()) + } + } + if err != nil { + return cooked, err + } + return cooked, nil } @@ -718,6 +729,7 @@ func init() { if err != nil { glcm.Error("error parsing the input given by the user. Failed with error " + err.Error()) } + cooked.commandString = copyHandlerUtil{}.ConstructCommandStringFromArgs() err = cooked.process() if err != nil { diff --git a/cmd/zt_interceptors_for_test.go b/cmd/zt_interceptors_for_test.go index b4945f25c..35eb16331 100644 --- a/cmd/zt_interceptors_for_test.go +++ b/cmd/zt_interceptors_for_test.go @@ -89,6 +89,9 @@ func (m *mockedLifecycleManager) DownloadToTempPath() bool { func (m *mockedLifecycleManager) ReportAllJobPartsDone() { } +func (m *mockedLifecycleManager) SetOutputVerbosity(mode common.OutputVerbosity) { +} + func (m *mockedLifecycleManager) Progress(o common.OutputBuilder) { select { case m.progressLog <- o(common.EOutputFormat.Text()): diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 51270bdc8..cbc0ee865 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -843,6 +843,28 @@ func (ct *CredentialType) Parse(s string) error { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +var EOutputVerbosity = OutputVerbosity(0) + +type OutputVerbosity uint8 + +func (OutputVerbosity) Default() OutputVerbosity { return OutputVerbosity(0) } +func (OutputVerbosity) Essential() OutputVerbosity { return OutputVerbosity(1) } // no progress, no info, no prompts. Print everything else +func (OutputVerbosity) Quiet() OutputVerbosity { return OutputVerbosity(2) } // nothing at all + +func (qm *OutputVerbosity) Parse(s string) error { + val, err := enum.ParseInt(reflect.TypeOf(qm), s, true, true) + if err == nil { + *qm = val.(OutputVerbosity) + } + return err +} + +func (qm OutputVerbosity) String() string { + return enum.StringInt(qm, reflect.TypeOf(qm)) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + var EHashValidationOption = HashValidationOption(0) var DefaultHashValidationOption = EHashValidationOption.FailIfDifferent() @@ -1495,5 +1517,3 @@ func GetClientProvidedKey(options CpkOptions) azblob.ClientProvidedKeyOptions { _cpkScopeInfo := GetCpkScopeInfo(options.CpkScopeInfo) return ToClientProvidedKeyOptions(_cpkInfo, _cpkScopeInfo) } - - diff --git a/common/lifecyleMgr.go b/common/lifecyleMgr.go index 83c484dc0..02d57fedd 100644 --- a/common/lifecyleMgr.go +++ b/common/lifecyleMgr.go @@ -74,6 +74,7 @@ type LifecycleMgr interface { DownloadToTempPath() bool MsgHandlerChannel() <-chan *LCMMsg ReportAllJobPartsDone() + SetOutputVerbosity(mode OutputVerbosity) } func GetLifecycleMgr() LifecycleMgr { @@ -100,6 +101,7 @@ type lifecycleMgr struct { disableSyslog bool waitForUserResponse chan bool msgHandlerChannel chan *LCMMsg + OutputVerbosityType OutputVerbosity } type userInput struct { @@ -276,6 +278,7 @@ func (lcm *lifecycleMgr) Info(msg string) { } func (lcm *lifecycleMgr) Prompt(message string, details PromptDetails) ResponseOption { + expectedInputChannel := make(chan string, 1) lcm.msgQueue <- outputMessage{ msgContent: message, @@ -400,6 +403,10 @@ func (lcm *lifecycleMgr) processOutputMessage() { for { msgToPrint := <-lcm.msgQueue + if shouldQuietMessage(msgToPrint, lcm.OutputVerbosityType) { + lcm.processNoneOutput(msgToPrint) + continue + } switch lcm.outputFormat { case EOutputFormat.Json(): lcm.processJSONOutput(msgToPrint) @@ -668,9 +675,28 @@ func (lcm *lifecycleMgr) ReportAllJobPartsDone() { lcm.doneChannel <- true } +func (lcm *lifecycleMgr) SetOutputVerbosity(mode OutputVerbosity) { + lcm.OutputVerbosityType = mode +} + // captures the common logic of exiting if there's an expected error func PanicIfErr(err error) { if err != nil { panic(err) } } + +func shouldQuietMessage(msgToOutput outputMessage, quietMode OutputVerbosity) bool { + messageType := msgToOutput.msgType + + switch quietMode { + case EOutputVerbosity.Default(): + return false + case EOutputVerbosity.Essential(): + return messageType == eOutputMessageType.Progress() || messageType == eOutputMessageType.Info() || messageType == eOutputMessageType.Prompt() + case EOutputVerbosity.Quiet(): + return true + default: + return false + } +} From 698f69aed3f7274a3415fa999822029fed288e47 Mon Sep 17 00:00:00 2001 From: adreed-msft <49764384+adreed-msft@users.noreply.github.com> Date: Tue, 31 May 2022 23:30:58 -0700 Subject: [PATCH 08/26] Linux/POSIX properties persistence (#1780) * SIP Implementation * Opt for an interface instead of a struct * Implement upload * Implement file creation * Implement property setting; todo: error handling * Error handling * Add upload support to append/page blob * Implement flag * Somewhat fix download * Handle folder uploading/copying * Remove download * Fix getUNIXProperties * Remove device filtering * Convert flag to bool * Attempt to fix accidental folder transfers * Fix test function signatures * Add preserve-posix-properties to sync * Limit auto-setting to sync and copy * Default to false * Potentially fix testing * Add check for flag * Shift back to single-call * Handle comments on PR * Add includeDirectoryStubs to folder property considerations * Address final comments --- cmd/copy.go | 18 + cmd/copyEnumeratorInit.go | 16 +- cmd/list.go | 4 +- cmd/removeEnumerator.go | 7 +- cmd/sync.go | 40 ++- cmd/syncEnumerator.go | 13 +- cmd/syncProcessor.go | 1 + cmd/zc_enumerator.go | 5 +- cmd/zc_traverser_blob.go | 8 +- cmd/zc_traverser_blob_account.go | 4 +- cmd/zc_traverser_list.go | 4 +- cmd/zt_copy_blob_download_test.go | 32 +- cmd/zt_generic_service_traverser_test.go | 6 +- cmd/zt_generic_traverser_test.go | 15 +- common/fe-ste-models.go | 22 +- common/rpc-models.go | 1 + common/unixStatAdapter.go | 421 +++++++++++++++++++++++ e2etest/declarativeHelpers.go | 1 + e2etest/declarativeScenario.go | 2 +- e2etest/runner.go | 3 + e2etest/validator.go | 4 +- ste/JobPartPlan.go | 9 +- ste/JobPartPlanFileName.go | 5 +- ste/downloader-blob.go | 11 +- ste/mgr-JobPartTransferMgr.go | 18 +- ste/s2sCopier-URLToBlob.go | 4 + ste/sender-appendBlobFromLocal.go | 18 +- ste/sender-blobFolders.go | 177 ++++++++++ ste/sender-blobFolders_linux.go | 21 ++ ste/sender-blobFolders_other.go | 7 + ste/sender-blockBlobFromLocal.go | 15 + ste/sender-pageBlobFromLocal.go | 18 +- ste/sender.go | 18 + ste/sourceInfoProvider-Blob.go | 25 ++ ste/sourceInfoProvider-Local_linux.go | 163 +++++++++ ste/sourceInfoProvider.go | 9 +- ste/xfer-anyToRemote-folder.go | 12 +- ste/xfer-remoteToLocal-file.go | 33 +- 38 files changed, 1062 insertions(+), 128 deletions(-) create mode 100644 common/unixStatAdapter.go create mode 100644 ste/sender-blobFolders.go create mode 100644 ste/sender-blobFolders_linux.go create mode 100644 ste/sender-blobFolders_other.go create mode 100644 ste/sourceInfoProvider-Local_linux.go diff --git a/cmd/copy.go b/cmd/copy.go index a38580a35..d52562367 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -129,6 +129,8 @@ type rawCopyCmdArgs struct { // Opt-in flag to persist additional SMB properties to Azure Files. Named ...info instead of ...properties // because the latter was similar enough to preserveSMBPermissions to induce user error preserveSMBInfo bool + // Opt-in flag to persist additional POSIX properties + preservePOSIXProperties bool // Opt-in flag to preserve the blob index tags during service to service transfer. s2sPreserveBlobTags bool // Flag to enable Window's special privileges @@ -638,6 +640,11 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { cooked.preserveSMBInfo = false } + cooked.preservePOSIXProperties = raw.preservePOSIXProperties + if cooked.preservePOSIXProperties && !areBothLocationsPOSIXAware(cooked.FromTo) { + return cooked, fmt.Errorf("in order to use --preserve-posix-properties, both the source and destination must be POSIX-aware (Linux->Blob, Blob->Linux, Blob->Blob)") + } + if err = validatePreserveSMBPropertyOption(cooked.preserveSMBInfo, cooked.FromTo, &cooked.ForceWrite, "preserve-smb-info"); err != nil { return cooked, err } @@ -918,6 +925,14 @@ func areBothLocationsSMBAware(fromTo common.FromTo) bool { } } +func areBothLocationsPOSIXAware(fromTo common.FromTo) bool { + // POSIX properties are stored in blob metadata-- They don't need a special persistence strategy for BlobBlob. + return runtime.GOOS == "linux" && ( + // fromTo == common.EFromTo.BlobLocal() || TODO + fromTo == common.EFromTo.LocalBlob()) || + fromTo == common.EFromTo.BlobBlob() +} + func validatePreserveSMBPropertyOption(toPreserve bool, fromTo common.FromTo, overwrite *common.OverwriteOption, flagName string) error { if toPreserve && !(fromTo == common.EFromTo.LocalFile() || fromTo == common.EFromTo.FileLocal() || @@ -1115,6 +1130,8 @@ type CookedCopyCmdArgs struct { preservePermissions common.PreservePermissionsOption // Whether the user wants to preserve the SMB properties ... preserveSMBInfo bool + // Whether the user wants to preserve the POSIX properties ... + preservePOSIXProperties bool // Whether to enable Windows special privileges backupMode bool @@ -1880,6 +1897,7 @@ func init() { cpCmd.PersistentFlags().BoolVar(&raw.asSubdir, "as-subdir", true, "True by default. Places folder sources as subdirectories under the destination.") cpCmd.PersistentFlags().BoolVar(&raw.preserveOwner, common.PreserveOwnerFlagName, common.PreserveOwnerDefault, "Only has an effect in downloads, and only when --preserve-smb-permissions is used. If true (the default), the file Owner and Group are preserved in downloads. If set to false, --preserve-smb-permissions will still preserve ACLs but Owner and Group will be based on the user running AzCopy") cpCmd.PersistentFlags().BoolVar(&raw.preserveSMBInfo, "preserve-smb-info", true, "For SMB-aware locations, flag will be set to true by default. Preserves SMB property info (last write time, creation time, attribute bits) between SMB-aware resources (Windows and Azure Files). Only the attribute bits supported by Azure Files will be transferred; any others will be ignored. This flag applies to both files and folders, unless a file-only filter is specified (e.g. include-pattern). The info transferred for folders is the same as that for files, except for Last Write Time which is never preserved for folders.") + cpCmd.PersistentFlags().BoolVar(&raw.preservePOSIXProperties, "preserve-posix-properties", false, "'Preserves' property info gleaned from stat or statx into object metadata.") cpCmd.PersistentFlags().BoolVar(&raw.forceIfReadOnly, "force-if-read-only", false, "When overwriting an existing file on Windows or Azure Files, force the overwrite to work even if the existing file has its read-only attribute set") cpCmd.PersistentFlags().BoolVar(&raw.backupMode, common.BackupModeFlagName, false, "Activates Windows' SeBackupPrivilege for uploads, or SeRestorePrivilege for downloads, to allow AzCopy to see read all files, regardless of their file system permissions, and to restore all permissions. Requires that the account running AzCopy already has these permissions (e.g. has Administrator rights or is a member of the 'Backup Operators' group). All this flag does is activate privileges that the account already has") cpCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. (By default the hash is NOT created.) Only available when uploading.") diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index 870f972db..4f484383c 100644 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -65,6 +65,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde jobPartOrder.CpkOptions = cca.CpkOptions jobPartOrder.PreserveSMBPermissions = cca.preservePermissions jobPartOrder.PreserveSMBInfo = cca.preserveSMBInfo + jobPartOrder.PreservePOSIXProperties = cca.preservePOSIXProperties // Infer on download so that we get LMT and MD5 on files download // On S2S transfers the following rules apply: @@ -80,10 +81,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde jobPartOrder.S2SInvalidMetadataHandleOption = cca.s2sInvalidMetadataHandleOption jobPartOrder.S2SPreserveBlobTags = cca.S2sPreserveBlobTags - traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, - &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, - cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, - cca.S2sPreserveBlobTags, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, cca.S2sPreserveBlobTags, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) if err != nil { return nil, err @@ -236,7 +234,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde // decide our folder transfer strategy var message string - jobPartOrder.Fpo, message = newFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), cca.isHNStoHNS, strings.EqualFold(cca.Destination.Value, common.Dev_Null)) + jobPartOrder.Fpo, message = newFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), cca.preservePOSIXProperties, cca.isHNStoHNS, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) if !cca.dryrunMode { glcm.Info(message) } @@ -354,9 +352,7 @@ func (cca *CookedCopyCmdArgs) isDestDirectory(dst common.ResourceString, ctx *co return false } - rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil, - nil, false, false, false, common.EPermanentDeleteOption.None(), - func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions) + rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil, nil, false, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions) if err != nil { return false @@ -678,7 +674,7 @@ func (cca *CookedCopyCmdArgs) MakeEscapedRelativePath(source bool, dstIsDir bool } // we assume that preserveSmbPermissions and preserveSmbInfo have already been validated, such that they are only true if both resource types support them -func newFolderPropertyOption(fromTo common.FromTo, recursive bool, stripTopDir bool, filters []ObjectFilter, preserveSmbInfo, preserveSmbPermissions, isDfsDfs, isDstNull bool) (common.FolderPropertyOption, string) { +func newFolderPropertyOption(fromTo common.FromTo, recursive, stripTopDir bool, filters []ObjectFilter, preserveSmbInfo, preserveSmbPermissions, preservePosixProperties, isDfsDfs, isDstNull, includeDirectoryStubs bool) (common.FolderPropertyOption, string) { getSuffix := func(willProcess bool) string { willProcessString := common.IffString(willProcess, "will be processed", "will not be processed") @@ -696,7 +692,7 @@ func newFolderPropertyOption(fromTo common.FromTo, recursive bool, stripTopDir b } } - bothFolderAware := (fromTo.AreBothFolderAware() || isDfsDfs) && !isDstNull // Copying folders to dev null doesn't make sense. + bothFolderAware := (fromTo.AreBothFolderAware() || isDfsDfs || preservePosixProperties || includeDirectoryStubs) && !isDstNull isRemoveFromFolderAware := fromTo == common.EFromTo.FileTrash() if bothFolderAware || isRemoveFromFolderAware { if !recursive { diff --git a/cmd/list.go b/cmd/list.go index f0aafff40..bb9a3e92b 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -218,9 +218,7 @@ func (cooked cookedListCmdArgs) HandleListContainerCommand() (err error) { } } - traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil, - true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, - nil, false, pipeline2.LogNone, common.CpkOptions{}) + traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil, true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, nil, false, pipeline2.LogNone, common.CpkOptions{}) if err != nil { return fmt.Errorf("failed to initialize traverser: %s", err.Error()) diff --git a/cmd/removeEnumerator.go b/cmd/removeEnumerator.go index 29505120c..3d271a7bc 100644 --- a/cmd/removeEnumerator.go +++ b/cmd/removeEnumerator.go @@ -47,10 +47,7 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) // Include-path is handled by ListOfFilesChannel. - sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, - nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, - cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, - cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) // report failure to create traverser if err != nil { @@ -78,7 +75,7 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er // (Must enumerate folders when deleting from a folder-aware location. Can't do folder deletion just based on file // deletion, because that would not handle folders that were empty at the start of the job). // isHNStoHNS is IGNORED here, because BlobFS locations don't take this route currently. - fpo, message := newFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, false) + fpo, message := newFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, false, false, cca.IncludeDirectoryStubs) // do not print Info message if in dry run mode if !cca.dryrunMode { glcm.Info(message) diff --git a/cmd/sync.go b/cmd/sync.go index a210ea7a0..573f2050a 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -56,14 +56,15 @@ type rawSyncCmdArgs struct { includeRegex string excludeRegex string - preservePermissions bool - preserveSMBPermissions bool // deprecated and synonymous with preservePermissions - preserveOwner bool - preserveSMBInfo bool - followSymlinks bool - backupMode bool - putMd5 bool - md5ValidationOption string + preservePermissions bool + preserveSMBPermissions bool // deprecated and synonymous with preservePermissions + preserveOwner bool + preserveSMBInfo bool + preservePOSIXProperties bool + followSymlinks bool + backupMode bool + putMd5 bool + md5ValidationOption string // this flag indicates the user agreement with respect to deleting the extra files at the destination // which do not exists at source. With this flag turned on/off, users will not be asked for permission. // otherwise the user is prompted to make a decision @@ -270,6 +271,11 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { cooked.isHNSToHNS = true // override HNS settings, since if a user is tx'ing blob->blob and copying permissions, it's DEFINITELY going to be HNS (since perms don't exist w/o HNS). } + cooked.preservePOSIXProperties = raw.preservePOSIXProperties + if cooked.preservePOSIXProperties && !areBothLocationsPOSIXAware(cooked.fromTo) { + return cooked, fmt.Errorf("in order to use --preserve-posix-properties, both the source and destination must be POSIX-aware (valid pairings are Linux->Blob, Blob->Linux, Blob->Blob)") + } + cooked.putMd5 = raw.putMd5 if err = validatePutMd5(cooked.putMd5, cooked.fromTo); err != nil { return cooked, err @@ -380,14 +386,15 @@ type cookedSyncCmdArgs struct { excludeRegex []string // options - preservePermissions common.PreservePermissionsOption - preserveSMBInfo bool - putMd5 bool - md5ValidationOption common.HashValidationOption - blockSize int64 - logVerbosity common.LogLevel - forceIfReadOnly bool - backupMode bool + preservePermissions common.PreservePermissionsOption + preserveSMBInfo bool + preservePOSIXProperties bool + putMd5 bool + md5ValidationOption common.HashValidationOption + blockSize int64 + logVerbosity common.LogLevel + forceIfReadOnly bool + backupMode bool // commandString hold the user given command which is logged to the Job log file commandString string @@ -751,6 +758,7 @@ func init() { // smb info/permissions can be persisted in the scenario of File -> File syncCmd.PersistentFlags().BoolVar(&raw.preserveSMBPermissions, "preserve-smb-permissions", false, "False by default. Preserves SMB ACLs between aware resources (Azure Files). This flag applies to both files and folders, unless a file-only filter is specified (e.g. include-pattern).") syncCmd.PersistentFlags().BoolVar(&raw.preserveSMBInfo, "preserve-smb-info", true, "For SMB-aware locations, flag will be set to true by default. Preserves SMB property info (last write time, creation time, attribute bits) between SMB-aware resources (Azure Files). This flag applies to both files and folders, unless a file-only filter is specified (e.g. include-pattern). The info transferred for folders is the same as that for files, except for Last Write Time which is not preserved for folders. ") + syncCmd.PersistentFlags().BoolVar(&raw.preservePOSIXProperties, "preserve-posix-properties", false, "'Preserves' property info gleaned from stat or statx into object metadata.") // TODO: enable when we support local <-> File // syncCmd.PersistentFlags().BoolVar(&raw.forceIfReadOnly, "force-if-read-only", false, "When overwriting an existing file on Windows or Azure Files, force the overwrite to work even if the existing file has its read-only attribute set") diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index 42424bbf5..b42649a95 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -58,12 +58,11 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s // TODO: enable symlink support in a future release after evaluating the implications // GetProperties is enabled by default as sync supports both upload and download. // This property only supports Files and S3 at the moment, but provided that Files sync is coming soon, enable to avoid stepping on Files sync work - sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, - nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) { - if entityType == common.EEntityType.File() { - atomic.AddUint64(&cca.atomicSourceFilesScanned, 1) - } - }, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions) + sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) { + if entityType == common.EEntityType.File() { + atomic.AddUint64(&cca.atomicSourceFilesScanned, 1) + } + }, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions) if err != nil { return nil, err @@ -124,7 +123,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s } // decide our folder transfer strategy - fpo, folderMessage := newFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), cca.isHNSToHNS, strings.EqualFold(cca.destination.Value, common.Dev_Null)) // sync always acts like stripTopDir=true + fpo, folderMessage := newFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, cca.isHNSToHNS, strings.EqualFold(cca.destination.Value, common.Dev_Null), false) // sync always acts like stripTopDir=true if !cca.dryrunMode { glcm.Info(folderMessage) } diff --git a/cmd/syncProcessor.go b/cmd/syncProcessor.go index 05daa0bf8..71e9f3eb3 100644 --- a/cmd/syncProcessor.go +++ b/cmd/syncProcessor.go @@ -58,6 +58,7 @@ func newSyncTransferProcessor(cca *cookedSyncCmdArgs, numOfTransfersPerPart int, LogLevel: cca.logVerbosity, PreserveSMBPermissions: cca.preservePermissions, PreserveSMBInfo: cca.preserveSMBInfo, + PreservePOSIXProperties: cca.preservePOSIXProperties, S2SSourceChangeValidation: true, DestLengthValidation: true, S2SGetPropertiesInBackend: true, diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index a2f54c49c..c2f91c52c 100644 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -305,10 +305,7 @@ type enumerationCounterFunc func(entityType common.EntityType) // followSymlinks is only required for local resources (defaults to false) // errorOnDirWOutRecursive is used by copy. -func InitResourceTraverser(resource common.ResourceString, location common.Location, ctx *context.Context, - credential *common.CredentialInfo, followSymlinks *bool, listOfFilesChannel chan string, recursive, getProperties, - includeDirectoryStubs bool, permanentDeleteOption common.PermanentDeleteOption, incrementEnumerationCounter enumerationCounterFunc, listOfVersionIds chan string, - s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions) (ResourceTraverser, error) { +func InitResourceTraverser(resource common.ResourceString, location common.Location, ctx *context.Context, credential *common.CredentialInfo, followSymlinks *bool, listOfFilesChannel chan string, recursive, getProperties, includeDirectoryStubs bool, permanentDeleteOption common.PermanentDeleteOption, incrementEnumerationCounter enumerationCounterFunc, listOfVersionIds chan string, s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions) (ResourceTraverser, error) { var output ResourceTraverser var p *pipeline.Pipeline diff --git a/cmd/zc_traverser_blob.go b/cmd/zc_traverser_blob.go index b1d56d67b..ea8fc415a 100644 --- a/cmd/zc_traverser_blob.go +++ b/cmd/zc_traverser_blob.go @@ -175,7 +175,7 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro preprocessor, getObjectNameOnly(strings.TrimSuffix(blobUrlParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)), "", - common.EEntityType.File(), + common.EntityType(common.IffUint8(isBlob, uint8(common.EEntityType.File()), uint8(common.EEntityType.Folder()))), blobProperties.LastModified(), blobProperties.ContentLength(), blobProperties, @@ -261,7 +261,7 @@ func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, container preprocessor, getObjectNameOnly(strings.TrimSuffix(virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)), folderRelativePath, - common.EEntityType.File(), // folder stubs are treated like files in in the serial lister as well + common.EEntityType.Folder(), resp.LastModified(), resp.ContentLength(), resp, @@ -363,11 +363,13 @@ func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, container func (t *blobTraverser) createStoredObjectForBlob(preprocessor objectMorpher, blobInfo azblob.BlobItemInternal, relativePath string, containerName string) StoredObject { adapter := blobPropertiesAdapter{blobInfo.Properties} + + _, isFolder := blobInfo.Metadata["hdi_isfolder"] object := newStoredObject( preprocessor, getObjectNameOnly(blobInfo.Name), relativePath, - common.EEntityType.File(), + common.EntityType(common.IffUint8(isFolder, uint8(common.EEntityType.Folder()), uint8(common.EEntityType.File()))), blobInfo.Properties.LastModified, *blobInfo.Properties.ContentLength, adapter, diff --git a/cmd/zc_traverser_blob_account.go b/cmd/zc_traverser_blob_account.go index 114ef7384..10c27637a 100644 --- a/cmd/zc_traverser_blob_account.go +++ b/cmd/zc_traverser_blob_account.go @@ -115,9 +115,7 @@ func (t *blobAccountTraverser) Traverse(preprocessor objectMorpher, processor ob return nil } -func newBlobAccountTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, - includeDirectoryStubs bool, incrementEnumerationCounter enumerationCounterFunc, - s2sPreserveSourceTags bool, cpkOptions common.CpkOptions) (t *blobAccountTraverser) { +func newBlobAccountTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, includeDirectoryStubs bool, incrementEnumerationCounter enumerationCounterFunc, s2sPreserveSourceTags bool, cpkOptions common.CpkOptions) (t *blobAccountTraverser) { bURLParts := azblob.NewBlobURLParts(*rawURL) cPattern := bURLParts.ContainerName diff --git a/cmd/zc_traverser_list.go b/cmd/zc_traverser_list.go index d6f0cfe96..cb61a44dd 100644 --- a/cmd/zc_traverser_list.go +++ b/cmd/zc_traverser_list.go @@ -107,9 +107,7 @@ func newListTraverser(parent common.ResourceString, parentType common.Location, } // Construct a traverser that goes through the child - traverser, err := InitResourceTraverser(source, parentType, ctx, credential, &followSymlinks, - nil, recursive, getProperties, includeDirectoryStubs, common.EPermanentDeleteOption.None(), incrementEnumerationCounter, - nil, s2sPreserveBlobTags, logLevel, cpkOptions) + traverser, err := InitResourceTraverser(source, parentType, ctx, credential, &followSymlinks, nil, recursive, getProperties, includeDirectoryStubs, common.EPermanentDeleteOption.None(), incrementEnumerationCounter, nil, s2sPreserveBlobTags, logLevel, cpkOptions) if err != nil { return nil, err } diff --git a/cmd/zt_copy_blob_download_test.go b/cmd/zt_copy_blob_download_test.go index 2c42b091d..62b488ee4 100644 --- a/cmd/zt_copy_blob_download_test.go +++ b/cmd/zt_copy_blob_download_test.go @@ -494,7 +494,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithRegexInclude(c *chk.C c.Assert(err, chk.IsNil) // validate that the right number of transfers were scheduled c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) - //comparing is names of files match + // comparing is names of files match actualTransfer := []string{} for i := 0; i < len(mockedRPC.transfers); i++ { actualTransfer = append(actualTransfer, strings.Trim(mockedRPC.transfers[i].Source, "/")) @@ -509,7 +509,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithRegexInclude(c *chk.C }) } -//test multiple regular expression with include +// test multiple regular expression with include func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithMultRegexInclude(c *chk.C) { bsu := getBSU() @@ -547,7 +547,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithMultRegexInclude(c *c c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) // validate that the right transfers were sent - //comparing is names of files, since not in order need to sort each string and the compare them + // comparing is names of files, since not in order need to sort each string and the compare them actualTransfer := []string{} for i := 0; i < len(mockedRPC.transfers); i++ { actualTransfer = append(actualTransfer, strings.Trim(mockedRPC.transfers[i].Source, "/")) @@ -561,13 +561,13 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithMultRegexInclude(c *c }) } -//testing empty expressions for both include and exclude +// testing empty expressions for both include and exclude func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithEmptyRegex(c *chk.C) { bsu := getBSU() // set up the container with blobs containerURL, containerName := createNewContainer(c, bsu) - //test empty regex flag so all blobs will be included since there is no filter + // test empty regex flag so all blobs will be included since there is no filter blobsToInclude := scenarioHelper{}.generateCommonRemoteScenarioForBlob(c, containerURL, "") defer deleteContainer(c, containerURL) c.Assert(containerURL, chk.NotNil) @@ -595,14 +595,14 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithEmptyRegex(c *chk.C) c.Assert(err, chk.IsNil) // validate that the right number of transfers were scheduled c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) - //do not need to check file names since all files for blobsToInclude are passed bc flags are empty + // do not need to check file names since all files for blobsToInclude are passed bc flags are empty // validate that the right transfers were sent validateDownloadTransfersAreScheduled(c, common.AZCOPY_PATH_SEPARATOR_STRING, common.AZCOPY_PATH_SEPARATOR_STRING, blobsToInclude, mockedRPC) }) } -//testing exclude with one regular expression +// testing exclude with one regular expression func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithRegexExclude(c *chk.C) { bsu := getBSU() @@ -638,7 +638,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithRegexExclude(c *chk.C c.Assert(err, chk.IsNil) // validate that only blobsTo c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) - //comparing is names of files, since not in order need to sort each string and the compare them + // comparing is names of files, since not in order need to sort each string and the compare them actualTransfer := []string{} for i := 0; i < len(mockedRPC.transfers); i++ { actualTransfer = append(actualTransfer, strings.Trim(mockedRPC.transfers[i].Destination, "/")) @@ -653,7 +653,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithRegexExclude(c *chk.C }) } -//testing exclude with multiple regular expressions +// testing exclude with multiple regular expressions func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithMultRegexExclude(c *chk.C) { bsu := getBSU() @@ -689,7 +689,7 @@ func (s *cmdIntegrationSuite) TestDownloadBlobContainerWithMultRegexExclude(c *c c.Assert(err, chk.IsNil) // validate that the right number of transfers were scheduled c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) - //comparing is names of files, since not in order need to sort each string and the compare them + // comparing is names of files, since not in order need to sort each string and the compare them actualTransfer := []string{} for i := 0; i < len(mockedRPC.transfers); i++ { actualTransfer = append(actualTransfer, strings.Trim(mockedRPC.transfers[i].Destination, "/")) @@ -723,7 +723,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyLocalToBlob(c *chk.C) { mockedRPC := interceptor{} Rpc = mockedRPC.intercept mockedLcm := mockedLifecycleManager{dryrunLog: make(chan string, 50)} - mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) //text format + mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) // text format glcm = &mockedLcm // construct the raw input to simulate user input @@ -767,7 +767,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyBlobToBlob(c *chk.C) { mockedRPC := interceptor{} Rpc = mockedRPC.intercept mockedLcm := mockedLifecycleManager{dryrunLog: make(chan string, 50)} - mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) //text format + mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) // text format glcm = &mockedLcm // construct the raw input to simulate user input @@ -811,7 +811,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyBlobToBlobJson(c *chk.C) { mockedRPC := interceptor{} Rpc = mockedRPC.intercept mockedLcm := mockedLifecycleManager{dryrunLog: make(chan string, 50)} - mockedLcm.SetOutputFormat(common.EOutputFormat.Json()) //json format + mockedLcm.SetOutputFormat(common.EOutputFormat.Json()) // json format glcm = &mockedLcm // construct the raw input to simulate user input @@ -830,7 +830,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyBlobToBlobJson(c *chk.C) { copyMessage := common.CopyTransfer{} errMarshal := json.Unmarshal([]byte(msg), ©Message) c.Assert(errMarshal, chk.IsNil) - //comparing some values of copyMessage + // comparing some values of copyMessage c.Check(strings.Compare(strings.Trim(copyMessage.Source, "/"), blobsToInclude[0]), chk.Equals, 0) c.Check(strings.Compare(strings.Trim(copyMessage.Destination, "/"), blobsToInclude[0]), chk.Equals, 0) c.Check(strings.Compare(copyMessage.EntityType.String(), common.EEntityType.File().String()), chk.Equals, 0) @@ -859,7 +859,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyS3toBlob(c *chk.C) { mockedRPC := interceptor{} Rpc = mockedRPC.intercept mockedLcm := mockedLifecycleManager{dryrunLog: make(chan string, 50)} - mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) //text format + mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) // text format glcm = &mockedLcm // construct the raw input to simulate user input @@ -905,7 +905,7 @@ func (s *cmdIntegrationSuite) TestDryrunCopyGCPtoBlob(c *chk.C) { mockedRPC := interceptor{} Rpc = mockedRPC.intercept mockedLcm := mockedLifecycleManager{dryrunLog: make(chan string, 50)} - mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) //text format + mockedLcm.SetOutputFormat(common.EOutputFormat.Text()) // text format glcm = &mockedLcm // construct the raw input to simulate user input diff --git a/cmd/zt_generic_service_traverser_test.go b/cmd/zt_generic_service_traverser_test.go index c4b63bd08..06683819f 100644 --- a/cmd/zt_generic_service_traverser_test.go +++ b/cmd/zt_generic_service_traverser_test.go @@ -182,8 +182,7 @@ func (s *genericTraverserSuite) TestServiceTraverserWithManyObjects(c *chk.C) { // construct a blob account traverser blobPipeline := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawBSU := scenarioHelper{}.getRawBlobServiceURLWithSAS(c) - blobAccountTraverser := newBlobAccountTraverser(&rawBSU, blobPipeline, ctx, false, - func(common.EntityType) {}, false, common.CpkOptions{}) + blobAccountTraverser := newBlobAccountTraverser(&rawBSU, blobPipeline, ctx, false, func(common.EntityType) {}, false, common.CpkOptions{}) // invoke the blob account traversal with a dummy processor blobDummyProcessor := dummyProcessor{} @@ -368,8 +367,7 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { blobPipeline := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawBSU := scenarioHelper{}.getRawBlobServiceURLWithSAS(c) rawBSU.Path = "/objectmatch*" // set the container name to contain a wildcard - blobAccountTraverser := newBlobAccountTraverser(&rawBSU, blobPipeline, ctx, false, - func(common.EntityType) {}, false, common.CpkOptions{}) + blobAccountTraverser := newBlobAccountTraverser(&rawBSU, blobPipeline, ctx, false, func(common.EntityType) {}, false, common.CpkOptions{}) // invoke the blob account traversal with a dummy processor blobDummyProcessor := dummyProcessor{} diff --git a/cmd/zt_generic_traverser_test.go b/cmd/zt_generic_traverser_test.go index ac1a09056..a92c20c5c 100644 --- a/cmd/zt_generic_traverser_test.go +++ b/cmd/zt_generic_traverser_test.go @@ -496,8 +496,7 @@ func (s *genericTraverserSuite) TestTraverserWithSingleObject(c *chk.C) { ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) - blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, false, false, - func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, false, false, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) // invoke the blob traversal with a dummy processor blobDummyProcessor := dummyProcessor{} @@ -657,8 +656,7 @@ func (s *genericTraverserSuite) TestTraverserContainerAndLocalDirectory(c *chk.C ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) - blobTraverser := newBlobTraverser(&rawContainerURLWithSAS, p, ctx, isRecursiveOn, false, - func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + blobTraverser := newBlobTraverser(&rawContainerURLWithSAS, p, ctx, isRecursiveOn, false, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) // invoke the local traversal with a dummy processor blobDummyProcessor := dummyProcessor{} @@ -819,8 +817,7 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawVirDirURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, virDirName) - blobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, - func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + blobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) // invoke the local traversal with a dummy processor blobDummyProcessor := dummyProcessor{} @@ -928,12 +925,10 @@ func (s *genericTraverserSuite) TestSerialAndParallelBlobTraverser(c *chk.C) { ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) rawVirDirURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, virDirName) - parallelBlobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, - func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + parallelBlobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) // construct a serial blob traverser - serialBlobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, - func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + serialBlobTraverser := newBlobTraverser(&rawVirDirURLWithSAS, p, ctx, isRecursiveOn, false, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) serialBlobTraverser.parallelListing = false // invoke the parallel traversal with a dummy processor diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index cbc0ee865..18124e828 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -50,7 +50,7 @@ const ( // this is the perm that AzCopy has used throughout its preview. So, while we considered relaxing it to 0666 // we decided that the best option was to leave it as is, and only relax it if user feedback so requires. - DEFAULT_FILE_PERM = 0644 + DEFAULT_FILE_PERM = 0644 // the os package will handle base-10 for us. // Since we haven't updated the Go SDKs to handle CPK just yet, we need to detect CPK related errors // and inform the user that we don't support CPK yet. @@ -72,7 +72,7 @@ func NewJobID() JobID { return JobID(NewUUID()) } -//var EmptyJobId JobID = JobID{} +// var EmptyJobId JobID = JobID{} func (j JobID) IsEmpty() bool { return j == JobID{} } @@ -977,7 +977,7 @@ type CopyTransfer struct { Source string Destination string EntityType EntityType - LastModifiedTime time.Time //represents the last modified time of source which ensures that source hasn't changed while transferring + LastModifiedTime time.Time // represents the last modified time of source which ensures that source hasn't changed while transferring SourceSize int64 // size of the source entity in bytes. // Properties for service to service copy (some also used in upload or download too) @@ -1004,6 +1004,16 @@ type CopyTransfer struct { // Metadata used in AzCopy. type Metadata map[string]string +func (m Metadata) Clone() Metadata { + out := make(Metadata) + + for k, v := range m { + out[k] = v + } + + return out +} + // ToAzBlobMetadata converts metadata to azblob's metadata. func (m Metadata) ToAzBlobMetadata() azblob.Metadata { return azblob.Metadata(m) @@ -1107,9 +1117,9 @@ func (bt BlobTags) ToAzBlobTagsMap() azblob.BlobTagsMap { } //// FromAzBlobTagsMapToCommonBlobTags converts azblob's BlobTagsMap to common BlobTags -//func FromAzBlobTagsMapToCommonBlobTags(azbt azblob.BlobTagsMap) BlobTags { +// func FromAzBlobTagsMapToCommonBlobTags(azbt azblob.BlobTagsMap) BlobTags { // return BlobTags(azbt) -//} +// } func (bt BlobTags) ToString() string { lst := make([]string, 0) @@ -1321,7 +1331,7 @@ const SizePerFileParam = "size-per-file" const FileCountParam = "file-count" const FileCountDefault = 100 -//BenchMarkMode enumerates values for Azcopy bench command. Valid values Upload or Download +// BenchMarkMode enumerates values for Azcopy bench command. Valid values Upload or Download type BenchMarkMode uint8 var EBenchMarkMode = BenchMarkMode(0) diff --git a/common/rpc-models.go b/common/rpc-models.go index 7e1f3a9e1..588881aed 100644 --- a/common/rpc-models.go +++ b/common/rpc-models.go @@ -142,6 +142,7 @@ type CopyJobPartOrderRequest struct { PreserveSMBPermissions PreservePermissionsOption PreserveSMBInfo bool + PreservePOSIXProperties bool S2SGetPropertiesInBackend bool S2SSourceChangeValidation bool DestLengthValidation bool diff --git a/common/unixStatAdapter.go b/common/unixStatAdapter.go new file mode 100644 index 000000000..c55a3c744 --- /dev/null +++ b/common/unixStatAdapter.go @@ -0,0 +1,421 @@ +package common + +import ( + "github.com/Azure/azure-storage-blob-go/azblob" + "strconv" + "time" +) + +const ( // POSIX property metadata + POSIXNlinkMeta = "posix_nlink" + POSIXINodeMeta = "posix_ino" + POSIXCTimeMeta = "posix_ctime" + LINUXBTimeMeta = "linux_btime" + POSIXBlockDeviceMeta = "is_block_dev" // todo: read & use these + POSIXCharDeviceMeta = "is_char_dev" + POSIXSocketMeta = "is_socket" + POSIXFIFOMeta = "is_fifo" + POSIXDevMeta = "posix_dev" + POSIXRDevMeta = "posix_rdev" + POSIXATimeMeta = "posix_atime" + POSIXFolderMeta = "hdi_isfolder" // todo: read & use these + POSIXSymlinkMeta = "is_symlink" + POSIXOwnerMeta = "posix_owner" + POSIXGroupMeta = "posix_group" + POSIXModeMeta = "permissions" + POSIXModTimeMeta = "modtime" + LINUXAttributeMeta = "linux_attribute" + LINUXAttributeMaskMeta = "linux_attribute_mask" + LINUXStatxMaskMeta = "linux_statx_mask" +) + +var AllLinuxProperties = []string{ + POSIXNlinkMeta, + POSIXINodeMeta, + LINUXBTimeMeta, + POSIXBlockDeviceMeta, + POSIXCharDeviceMeta, + POSIXSocketMeta, + POSIXFIFOMeta, + POSIXDevMeta, + POSIXRDevMeta, + POSIXATimeMeta, + POSIXFolderMeta, + POSIXSymlinkMeta, + POSIXOwnerMeta, + POSIXGroupMeta, + POSIXModeMeta, +} + +//goland:noinspection GoCommentStart +type UnixStatAdapter interface { + Extended() bool // Did this call come from StatX? + + // Statx properties + StatxMask() uint32 // Mask determines the availability of all stat/stax properties (except the device ID!). It is only used in statx though. + Attribute() uint64 // Attribute is masked by AttributeMask. + AttributeMask() uint64 + BTime() time.Time // BTime may not always be available on every filesystem. It's important to check Mask first! + // ========== + + // Base Stat properties + NLink() uint64 + Owner() uint32 + Group() uint32 + FileMode() uint32 // Mode may not always be available to check in a Statx call (though it should be, since we requested it.) Best safe than sorry; check Mask! + INode() uint64 + Device() uint64 + RDevice() uint64 // RDevice is ONLY useful when Mode has S_IFCHR or S_IFBLK; as those determine if the file is a representitive of a block or character device. + ATime() time.Time + MTime() time.Time + CTime() time.Time +} + +type UnixStatContainer struct { // Created for downloads + statx bool // Does the call contain extended properties (attributes, birthTime)? + + mask uint32 + attributes uint64 + numLinks uint64 + ownerUID uint32 + groupGID uint32 + mode uint32 + + iNode uint64 + size uint64 + attributesMask uint64 + + accessTime time.Time // atime + birthTime time.Time // btime, statx only + changeTime time.Time // ctime + modTime time.Time // mtime + + repDevID uint64 + devID uint64 +} + +func (u UnixStatContainer) Extended() bool { + return u.statx +} + +func (u UnixStatContainer) StatxMask() uint32 { + return u.mask +} + +func (u UnixStatContainer) Attribute() uint64 { + return u.attributes +} + +func (u UnixStatContainer) AttributeMask() uint64 { + return u.attributesMask +} + +func (u UnixStatContainer) BTime() time.Time { + return u.birthTime +} + +func (u UnixStatContainer) NLink() uint64 { + return u.numLinks +} + +func (u UnixStatContainer) Owner() uint32 { + return u.ownerUID +} + +func (u UnixStatContainer) Group() uint32 { + return u.groupGID +} + +func (u UnixStatContainer) FileMode() uint32 { + return u.mode +} + +func (u UnixStatContainer) INode() uint64 { + return u.iNode +} + +func (u UnixStatContainer) Device() uint64 { + return u.devID +} + +func (u UnixStatContainer) RDevice() uint64 { + return u.repDevID +} + +func (u UnixStatContainer) ATime() time.Time { + return u.accessTime +} + +func (u UnixStatContainer) MTime() time.Time { + return u.modTime +} + +func (u UnixStatContainer) CTime() time.Time { + return u.changeTime +} + +// ReadStatFromMetadata is not fault-tolerant. If any given article does not parse, +// it will throw an error instead of continuing on, as it may be considered incorrect to attempt to persist the rest of the data. +// despite this function being used only in Downloads at the current moment, it still attempts to re-create as complete of a UnixStatAdapter as possible. +func ReadStatFromMetadata(metadata azblob.Metadata, contentLength int64) (UnixStatAdapter, error) { + s := UnixStatContainer{size: uint64(contentLength)} + + if mask, ok := metadata[LINUXStatxMaskMeta]; ok { + m, err := strconv.ParseUint(mask, 10, 32) + if err != nil { + return s, err + } + s.statx = true + s.mask = uint32(m) + } + + // cover additional statx properties here + if attr, ok := metadata[LINUXAttributeMeta]; ok { + a, err := strconv.ParseUint(attr, 10, 64) + if err != nil { + return s, err + } + s.attributes = a + } + + if attr, ok := metadata[LINUXAttributeMaskMeta]; ok { + a, err := strconv.ParseUint(attr, 10, 64) + if err != nil { + return s, err + } + s.attributesMask = a + } + + if btime, ok := metadata[LINUXBTimeMeta]; ok { + b, err := strconv.ParseInt(btime, 10, 64) + if err != nil { + return s, err + } + s.birthTime = time.Unix(0, b) + } + + // base stat properties + if nlink, ok := metadata[POSIXNlinkMeta]; ok { + n, err := strconv.ParseUint(nlink, 10, 64) + if err != nil { + return s, err + } + s.numLinks = n + } + + if owner, ok := metadata[POSIXOwnerMeta]; ok { + o, err := strconv.ParseUint(owner, 10, 32) + if err != nil { + return s, err + } + s.ownerUID = uint32(o) + } + + if group, ok := metadata[POSIXGroupMeta]; ok { + g, err := strconv.ParseUint(group, 10, 32) + if err != nil { + return s, err + } + s.groupGID = uint32(g) + } + + if mode, ok := metadata[POSIXModeMeta]; ok { + m, err := strconv.ParseUint(mode, 10, 32) + if err != nil { + return s, err + } + + s.mode = uint32(m) + } + + if inode, ok := metadata[POSIXINodeMeta]; ok { + ino, err := strconv.ParseUint(inode, 10, 64) + if err != nil { + return s, err + } + + s.iNode = ino + } + + if dev, ok := metadata[POSIXDevMeta]; ok { + d, err := strconv.ParseUint(dev, 10, 64) + if err != nil { + return s, err + } + + s.devID = d + } + + if rdev, ok := metadata[POSIXRDevMeta]; ok { + rd, err := strconv.ParseUint(rdev, 10, 64) + if err != nil { + return s, err + } + + s.repDevID = rd + } + + if atime, ok := metadata[POSIXATimeMeta]; ok { + at, err := strconv.ParseInt(atime, 10, 64) + if err != nil { + return s, err + } + + s.accessTime = time.Unix(0, at) + } + + if mtime, ok := metadata[POSIXModTimeMeta]; ok { + mt, err := strconv.ParseInt(mtime, 10, 64) + if err != nil { + return s, err + } + + s.modTime = time.Unix(0, mt) + } + + if ctime, ok := metadata[POSIXCTimeMeta]; ok { + ct, err := strconv.ParseInt(ctime, 10, 64) + if err != nil { + return s, err + } + + s.changeTime = time.Unix(0, ct) + } + + return s, nil +} + +const ( // Values cloned from x/sys/unix to avoid dependency + STATX_ALL = 0xfff + STATX_ATIME = 0x20 + STATX_ATTR_APPEND = 0x20 + STATX_ATTR_AUTOMOUNT = 0x1000 + STATX_ATTR_COMPRESSED = 0x4 + STATX_ATTR_DAX = 0x200000 + STATX_ATTR_ENCRYPTED = 0x800 + STATX_ATTR_IMMUTABLE = 0x10 + STATX_ATTR_MOUNT_ROOT = 0x2000 + STATX_ATTR_NODUMP = 0x40 + STATX_ATTR_VERITY = 0x100000 + STATX_BASIC_STATS = 0x7ff + STATX_BLOCKS = 0x400 + STATX_BTIME = 0x800 + STATX_CTIME = 0x80 + STATX_GID = 0x10 + STATX_INO = 0x100 + STATX_MNT_ID = 0x1000 + STATX_MODE = 0x2 + STATX_MTIME = 0x40 + STATX_NLINK = 0x4 + STATX_SIZE = 0x200 + STATX_TYPE = 0x1 + STATX_UID = 0x8 + + S_IFSOCK = 0xc000 + S_IFBLK = 0x6000 + S_IFCHR = 0x2000 + S_IFDIR = 0x4000 + S_IFIFO = 0x1000 + S_IFLNK = 0xa000 + + S_IRUSR = 0x400 + S_IWUSR = 0x200 + S_IXUSR = 0x100 + S_IRGRP = 0x040 + S_IWGRP = 0x020 + S_IXGRP = 0x010 + S_IROTH = 0x004 + S_IWOTH = 0x002 + S_IXOTH = 0x001 + + S_ALLPERM = 0x777 +) + +func ClearStatFromBlobMetadata(metadata azblob.Metadata) { + for _, v := range AllLinuxProperties { + delete(metadata, v) + } +} + +func AddStatToBlobMetadata(s UnixStatAdapter, metadata azblob.Metadata) { + // TODO: File mode properties (hdi_isfolder, etc.) + if s.Extended() { // try to poll the other properties + mask := s.StatxMask() + + tryAddMetadata(metadata, LINUXStatxMaskMeta, strconv.FormatUint(uint64(mask), 10)) + tryAddMetadata(metadata, LINUXAttributeMeta, strconv.FormatUint(s.Attribute()&s.AttributeMask(), 10)) // AttributesMask indicates what attributes are supported by the filesystem + tryAddMetadata(metadata, LINUXAttributeMaskMeta, strconv.FormatUint(s.AttributeMask(), 10)) + + if StatXReturned(mask, STATX_BTIME) { + tryAddMetadata(metadata, LINUXBTimeMeta, strconv.FormatInt(s.BTime().UnixNano(), 10)) + } + + if StatXReturned(mask, STATX_MODE) { + tryAddMetadata(metadata, POSIXNlinkMeta, strconv.FormatUint(s.NLink(), 10)) + } + + if StatXReturned(mask, STATX_UID) { + tryAddMetadata(metadata, POSIXOwnerMeta, strconv.FormatUint(uint64(s.Owner()), 10)) + } + + if StatXReturned(mask, STATX_GID) { + tryAddMetadata(metadata, POSIXGroupMeta, strconv.FormatUint(uint64(s.Group()), 10)) + } + + if StatXReturned(mask, STATX_MODE) { + tryAddMetadata(metadata, POSIXModeMeta, strconv.FormatUint(uint64(s.FileMode()), 10)) + } + + if StatXReturned(mask, STATX_INO) { + tryAddMetadata(metadata, POSIXINodeMeta, strconv.FormatUint(s.INode(), 10)) + } + + // This is not optional. + tryAddMetadata(metadata, POSIXDevMeta, strconv.FormatUint(s.Device(), 10)) + + if StatXReturned(mask, STATX_MODE) && ((s.FileMode()&S_IFCHR) == S_IFCHR || (s.FileMode()&S_IFBLK) == S_IFBLK) { + tryAddMetadata(metadata, POSIXRDevMeta, strconv.FormatUint(s.RDevice(), 10)) + } + + // Sometimes, the filesystem will return ATime, but the vfs layer will overwrite it in the mask. It's still accurate, so we can use it. + // e.g. ext4+noatime will still return & properly store atimes, but won't be included in the statx mask. + if StatXReturned(mask, STATX_ATIME) || s.ATime().UnixNano() > 0 { + tryAddMetadata(metadata, POSIXATimeMeta, strconv.FormatInt(s.ATime().UnixNano(), 10)) + } + + if StatXReturned(mask, STATX_MTIME) { + tryAddMetadata(metadata, POSIXModTimeMeta, strconv.FormatInt(s.MTime().UnixNano(), 10)) + } + + if StatXReturned(mask, STATX_CTIME) { + tryAddMetadata(metadata, POSIXCTimeMeta, strconv.FormatInt(s.CTime().UnixNano(), 10)) + } + } else { + tryAddMetadata(metadata, POSIXNlinkMeta, strconv.FormatUint(s.NLink(), 10)) + tryAddMetadata(metadata, POSIXOwnerMeta, strconv.FormatUint(uint64(s.Owner()), 10)) + tryAddMetadata(metadata, POSIXGroupMeta, strconv.FormatUint(uint64(s.Group()), 10)) + tryAddMetadata(metadata, POSIXModeMeta, strconv.FormatUint(uint64(s.FileMode()), 10)) + tryAddMetadata(metadata, POSIXINodeMeta, strconv.FormatUint(s.INode(), 10)) + tryAddMetadata(metadata, POSIXDevMeta, strconv.FormatUint(s.Device(), 10)) + + if (s.FileMode()&S_IFCHR) == S_IFCHR || (s.FileMode()&S_IFBLK) == S_IFBLK { // this is not relevant unless the file is a block or character device. + tryAddMetadata(metadata, POSIXRDevMeta, strconv.FormatUint(s.RDevice(), 10)) + } + + tryAddMetadata(metadata, POSIXATimeMeta, strconv.FormatInt(s.ATime().UnixNano(), 10)) + tryAddMetadata(metadata, POSIXModTimeMeta, strconv.FormatInt(s.MTime().UnixNano(), 10)) + tryAddMetadata(metadata, POSIXCTimeMeta, strconv.FormatInt(s.CTime().UnixNano(), 10)) + } +} + +func StatXReturned(mask uint32, want uint32) bool { + return (mask & want) == want +} + +func tryAddMetadata(metadata azblob.Metadata, key, value string) { + if _, ok := metadata[key]; ok { + return // Don't overwrite the user's metadata + } + + metadata[key] = value +} diff --git a/e2etest/declarativeHelpers.go b/e2etest/declarativeHelpers.go index e0812acd3..ea01fb8af 100644 --- a/e2etest/declarativeHelpers.go +++ b/e2etest/declarativeHelpers.go @@ -158,6 +158,7 @@ type params struct { backupMode bool preserveSMBPermissions bool preserveSMBInfo bool + preservePOSIXProperties bool relativeSourcePath string blobTags string blobType string diff --git a/e2etest/declarativeScenario.go b/e2etest/declarativeScenario.go index 160bb28d3..7f27c9708 100644 --- a/e2etest/declarativeScenario.go +++ b/e2etest/declarativeScenario.go @@ -293,7 +293,7 @@ func (s *scenario) validateTransferStates() { actualTransfers, err := s.state.result.GetTransferList(statusToTest) s.a.AssertNoErr(err) - Validator{}.ValidateCopyTransfersAreScheduled(s.a, isSrcEncoded, isDstEncoded, srcRoot, dstRoot, expectedTransfers, actualTransfers, statusToTest, s.FromTo()) + Validator{}.ValidateCopyTransfersAreScheduled(s.a, isSrcEncoded, isDstEncoded, srcRoot, dstRoot, expectedTransfers, actualTransfers, statusToTest, s.FromTo(), s.srcAccountType, s.destAccountType) // TODO: how are we going to validate folder transfers???? } diff --git a/e2etest/runner.go b/e2etest/runner.go index 4a4a61e10..895ce3c65 100644 --- a/e2etest/runner.go +++ b/e2etest/runner.go @@ -95,6 +95,9 @@ func (t *TestRunner) SetAllFlags(p params, o Operation) { set("check-md5", p.checkMd5.String(), "FailIfDifferent") if o == eOperation.Copy() { set("s2s-preserve-access-tier", p.s2sPreserveAccessTier, true) + set("preserve-posix-properties", p.preservePOSIXProperties, "") + } else if o == eOperation.Sync() { + set("preserve-posix-properties", p.preservePOSIXProperties, false) } } diff --git a/e2etest/validator.go b/e2etest/validator.go index 3396fac98..c72a2445c 100644 --- a/e2etest/validator.go +++ b/e2etest/validator.go @@ -55,7 +55,7 @@ func (Validator) ValidateRemoveTransfer(c asserter, isSrcEncoded bool, isDstEnco // TODO: Think of how to validate files in case of remove } func (Validator) ValidateCopyTransfersAreScheduled(c asserter, isSrcEncoded bool, isDstEncoded bool, - sourcePrefix string, destinationPrefix string, expectedTransfers []*testObject, actualTransfers []common.TransferDetail, statusToTest common.TransferStatus, fromTo common.FromTo) { + sourcePrefix string, destinationPrefix string, expectedTransfers []*testObject, actualTransfers []common.TransferDetail, statusToTest common.TransferStatus, fromTo common.FromTo, srcAccountType, dstAccountType AccountType) { sourcePrefix = makeSlashesComparable(sourcePrefix) destinationPrefix = makeSlashesComparable(destinationPrefix) @@ -83,7 +83,7 @@ func (Validator) ValidateCopyTransfersAreScheduled(c asserter, isSrcEncoded bool return s + "/" } lookupMap := scenarioHelper{}.convertListToMap(expectedTransfers, func(to *testObject) string { - if to.isFolder() && fromTo != common.EFromTo.BlobBlob() { // Blob has no concept of folders, except in ADLSG2. However, internally, they're treated as blobs anyway. + if to.isFolder() && (fromTo.To() != common.ELocation.Blob() || dstAccountType == EAccountType.HierarchicalNamespaceEnabled()) { return addFolderSuffix(to.name) } else { return to.name diff --git a/ste/JobPartPlan.go b/ste/JobPartPlan.go index 6a4223ff7..acefdaf8f 100644 --- a/ste/JobPartPlan.go +++ b/ste/JobPartPlan.go @@ -63,8 +63,9 @@ type JobPartPlanHeader struct { DstBlobData JobPartPlanDstBlob // Additional data for blob destinations DstLocalData JobPartPlanDstLocal // Additional data for local destinations - PreservePermissions common.PreservePermissionsOption - PreserveSMBInfo bool + PreservePermissions common.PreservePermissionsOption + PreserveSMBInfo bool + PreservePOSIXProperties bool // S2SGetPropertiesInBackend represents whether to enable get S3 objects' or Azure files' properties during s2s copy in backend. S2SGetPropertiesInBackend bool // S2SSourceChangeValidation represents whether user wants to check if source has changed after enumerating. @@ -80,7 +81,7 @@ type JobPartPlanHeader struct { // jobStatus_doNotUse represents the current status of JobPartPlan // jobStatus_doNotUse is a private member whose value can be accessed by Status and SetJobStatus // jobStatus_doNotUse should not be directly accessed anywhere except by the Status and SetJobStatus - atomicJobStatus common.JobStatus + atomicJobStatus common.JobStatus atomicPartStatus common.JobStatus // For delete operation specify what to do with snapshots @@ -106,7 +107,7 @@ func (jpph *JobPartPlanHeader) JobPartStatus() common.JobStatus { func (jpph *JobPartPlanHeader) SetJobPartStatus(newJobStatus common.JobStatus) { jpph.atomicPartStatus.AtomicStore(newJobStatus) -} +} // Transfer api gives memory map JobPartPlanTransfer header for given index func (jpph *JobPartPlanHeader) Transfer(transferIndex uint32) *JobPartPlanTransfer { diff --git a/ste/JobPartPlanFileName.go b/ste/JobPartPlanFileName.go index 2652835cd..5a04fb25c 100644 --- a/ste/JobPartPlanFileName.go +++ b/ste/JobPartPlanFileName.go @@ -206,8 +206,9 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { PreserveLastModifiedTime: order.BlobAttributes.PreserveLastModifiedTime, MD5VerificationOption: order.BlobAttributes.MD5ValidationOption, // here because it relates to downloads (file destination) }, - PreservePermissions: order.PreserveSMBPermissions, - PreserveSMBInfo: order.PreserveSMBInfo, + PreservePermissions: order.PreserveSMBPermissions, + PreserveSMBInfo: order.PreserveSMBInfo, + PreservePOSIXProperties: order.PreservePOSIXProperties, // For S2S copy, per JobPartPlan info S2SGetPropertiesInBackend: order.S2SGetPropertiesInBackend, S2SSourceChangeValidation: order.S2SSourceChangeValidation, diff --git a/ste/downloader-blob.go b/ste/downloader-blob.go index e037977d9..2be80b73b 100644 --- a/ste/downloader-blob.go +++ b/ste/downloader-blob.go @@ -37,16 +37,25 @@ type blobDownloader struct { // used to avoid downloading zero ranges of page blobs pageRangeOptimizer *pageRangeOptimizer + + // used to avoid re-setting file mode + setMode bool + + jptm IJobPartTransferMgr + txInfo TransferInfo + fileMode uint32 } func newBlobDownloader() downloader { return &blobDownloader{ filePacer: NewNullAutoPacer(), // defer creation of real one, if needed, to Prologue } - } func (bd *blobDownloader) Prologue(jptm IJobPartTransferMgr, srcPipeline pipeline.Pipeline) { + bd.txInfo = jptm.Info() + bd.jptm = jptm + if jptm.Info().SrcBlobType == azblob.BlobPageBlob { // page blobs need a file-specific pacer // See comments in uploader-pageBlob for the reasons, since the same reasons apply are are explained there diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index ee66c2057..e7ddea7cf 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -95,14 +95,15 @@ type IJobPartTransferMgr interface { } type TransferInfo struct { - JobID common.JobID - BlockSize int64 - Source string - SourceSize int64 - Destination string - EntityType common.EntityType - PreserveSMBPermissions common.PreservePermissionsOption - PreserveSMBInfo bool + JobID common.JobID + BlockSize int64 + Source string + SourceSize int64 + Destination string + EntityType common.EntityType + PreserveSMBPermissions common.PreservePermissionsOption + PreserveSMBInfo bool + PreservePOSIXProperties bool // Transfer info for S2S copy SrcProperties @@ -375,6 +376,7 @@ func (jptm *jobPartTransferMgr) Info() TransferInfo { EntityType: entityType, PreserveSMBPermissions: plan.PreservePermissions, PreserveSMBInfo: plan.PreserveSMBInfo, + PreservePOSIXProperties: plan.PreservePOSIXProperties, S2SGetPropertiesInBackend: s2sGetPropertiesInBackend, S2SSourceChangeValidation: s2sSourceChangeValidation, S2SInvalidMetadataHandleOption: s2sInvalidMetadataHandleOption, diff --git a/ste/s2sCopier-URLToBlob.go b/ste/s2sCopier-URLToBlob.go index 2db98ed77..750717ce8 100644 --- a/ste/s2sCopier-URLToBlob.go +++ b/ste/s2sCopier-URLToBlob.go @@ -77,6 +77,10 @@ func newURLToBlobCopier(jptm IJobPartTransferMgr, destination string, p pipeline fmt.Sprintf("BlobType %q is set for destination blob.", targetBlobType)) } + if jptm.Info().IsFolderPropertiesTransfer() { + return newBlobFolderSender(jptm, destination, p, pacer, srcInfoProvider) + } + switch targetBlobType { case azblob.BlobBlockBlob: return newURLToBlockBlobCopier(jptm, destination, p, pacer, srcInfoProvider) diff --git a/ste/sender-appendBlobFromLocal.go b/ste/sender-appendBlobFromLocal.go index c1be06af5..bf08649c2 100644 --- a/ste/sender-appendBlobFromLocal.go +++ b/ste/sender-appendBlobFromLocal.go @@ -30,6 +30,22 @@ type appendBlobUploader struct { appendBlobSenderBase md5Channel chan []byte + sip ISourceInfoProvider +} + +func (u *appendBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { + if u.jptm.Info().PreservePOSIXProperties { + if unixSIP, ok := u.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + statAdapter, err := unixSIP.GetUNIXProperties() + if err != nil { + u.jptm.FailActiveSend("GetUNIXProperties", err) + } + + common.AddStatToBlobMetadata(statAdapter, u.metadataToApply) + } + } + + return u.appendBlobSenderBase.Prologue(ps) } func newAppendBlobUploader(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) { @@ -38,7 +54,7 @@ func newAppendBlobUploader(jptm IJobPartTransferMgr, destination string, p pipel return nil, err } - return &appendBlobUploader{appendBlobSenderBase: *senderBase, md5Channel: newMd5Channel()}, nil + return &appendBlobUploader{appendBlobSenderBase: *senderBase, md5Channel: newMd5Channel(), sip: sip}, nil } func (u *appendBlobUploader) Md5Channel() chan<- []byte { diff --git a/ste/sender-blobFolders.go b/ste/sender-blobFolders.go new file mode 100644 index 000000000..151c9efda --- /dev/null +++ b/ste/sender-blobFolders.go @@ -0,0 +1,177 @@ +package ste + +import ( + "fmt" + "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-blob-go/azblob" + "net/url" + "strings" + "time" +) + +type blobFolderSender struct { + destination azblob.BlockBlobURL // We'll treat all folders as block blobs + jptm IJobPartTransferMgr + sip ISourceInfoProvider + metadataToApply azblob.Metadata + headersToAppply azblob.BlobHTTPHeaders + blobTagsToApply azblob.BlobTagsMap + cpkToApply azblob.ClientProvidedKeyOptions +} + +func newBlobFolderSender(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) { + destURL, err := url.Parse(destination) + if err != nil { + return nil, err + } + + destBlockBlobURL := azblob.NewBlockBlobURL(*destURL, p) + + props, err := sip.Properties() + if err != nil { + return nil, err + } + + var out sender + fsend := blobFolderSender{ + jptm: jptm, + sip: sip, + destination: destBlockBlobURL, + metadataToApply: props.SrcMetadata.Clone().ToAzBlobMetadata(), // We're going to modify it, so we should clone it. + headersToAppply: props.SrcHTTPHeaders.ToAzBlobHTTPHeaders(), + blobTagsToApply: props.SrcBlobTags.ToAzBlobTagsMap(), + cpkToApply: common.ToClientProvidedKeyOptions(jptm.CpkInfo(), jptm.CpkScopeInfo()), + } + fromTo := jptm.FromTo() + if fromTo.IsUpload() { + out = &dummyUploader{fsend} + } else { + out = &dummys2sCopier{fsend} + } + + return out, nil +} + +func (b *blobFolderSender) EnsureFolderExists() error { + t := b.jptm.GetFolderCreationTracker() + + _, err := b.destination.GetProperties(b.jptm.Context(), azblob.BlobAccessConditions{}, b.cpkToApply) + if err != nil { + if stgErr, ok := err.(azblob.StorageError); !(ok && stgErr.ServiceCode() == azblob.ServiceCodeBlobNotFound) { + return fmt.Errorf("when checking if blob exists: %w", err) + } + } else { + /* + There's a low likelihood of a blob ending in a / being anything but a folder, but customers can do questionable + things with their own time and money. So, we should safeguard against that. Rather than simply writing to the + destination blob a set of properties, we should be responsible and check if overwriting is intended. + + If so, we should delete the old blob, and create a new one in it's place with all of our fancy new properties. + */ + if t.ShouldSetProperties(b.DirUrlToString(), b.jptm.GetOverwriteOption(), b.jptm.GetOverwritePrompter()) { + _, err := b.destination.Delete(b.jptm.Context(), azblob.DeleteSnapshotsOptionNone, azblob.BlobAccessConditions{}) + if err != nil { + return fmt.Errorf("when deleting existing blob: %w", err) + } + } else { + /* + We don't want to prompt the user again, and we're not going to write properties. So, we should kill the + transfer where it stands and prevent the process from going further. + This will be caught by ShouldSetProperties in the folder property tracker. + */ + return folderPropertiesNotOverwroteInCreation{} + } + } + + b.metadataToApply["hdi_isfolder"] = "true" // Set folder metadata flag + err = b.getExtraProperties() + if err != nil { + return fmt.Errorf("when getting additional folder properties: %w", err) + } + + _, err = b.destination.Upload(b.jptm.Context(), + strings.NewReader(""), + b.headersToAppply, + b.metadataToApply, + azblob.BlobAccessConditions{}, + azblob.DefaultAccessTier, // It doesn't make sense to use a special access tier, the blob will be 0 bytes. + b.blobTagsToApply, + b.cpkToApply, + azblob.ImmutabilityPolicyOptions{}) + if err != nil { + return fmt.Errorf("when creating folder: %w", err) + } + + t.RecordCreation(b.DirUrlToString()) + + return folderPropertiesSetInCreation{} +} + +func (b *blobFolderSender) SetFolderProperties() error { + return nil // unnecessary, all properties were set on creation. +} + +func (b *blobFolderSender) DirUrlToString() string { + url := b.destination.URL() + url.RawQuery = "" + return url.String() +} + +// ===== Implement sender so that it can be returned in newBlobUploader. ===== +/* + It's OK to just panic all of these out, as they will never get called in a folder transfer. +*/ + +func (b *blobFolderSender) ChunkSize() int64 { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) NumChunks() uint32 { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) RemoteFileExists() (bool, time.Time, error) { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) Prologue(state common.PrologueState) (destinationModified bool) { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) Epilogue() { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) Cleanup() { + panic("this sender only sends folders.") +} + +func (b *blobFolderSender) GetDestinationLength() (int64, error) { + panic("this sender only sends folders.") +} + +// implement uploader to handle commonSenderCompletion + +type dummyUploader struct { + blobFolderSender +} + +func (d dummyUploader) GenerateUploadFunc(chunkID common.ChunkID, blockIndex int32, reader common.SingleChunkReader, chunkIsWholeFile bool) chunkFunc { + panic("this sender only sends folders.") +} + +func (d dummyUploader) Md5Channel() chan<- []byte { + panic("this sender only sends folders.") +} + +// ditto for s2sCopier + +type dummys2sCopier struct { + blobFolderSender +} + +func (d dummys2sCopier) GenerateCopyFunc(chunkID common.ChunkID, blockIndex int32, adjustedChunkSize int64, chunkIsWholeFile bool) chunkFunc { + // TODO implement me + panic("implement me") +} diff --git a/ste/sender-blobFolders_linux.go b/ste/sender-blobFolders_linux.go new file mode 100644 index 000000000..4ad0e941c --- /dev/null +++ b/ste/sender-blobFolders_linux.go @@ -0,0 +1,21 @@ +// +build linux + +package ste + +import "github.com/Azure/azure-storage-azcopy/v10/common" + +func (b blobFolderSender) getExtraProperties() error { + if b.jptm.Info().PreservePOSIXProperties { + if sip, ok := b.sip.(*localFileSourceInfoProvider); ok { // has UNIX properties for sure; Blob metadata gets handled as expected. + statAdapter, err := sip.GetUNIXProperties() + + if err != nil { + return err + } + + common.AddStatToBlobMetadata(statAdapter, b.metadataToApply) + } + } + + return nil +} diff --git a/ste/sender-blobFolders_other.go b/ste/sender-blobFolders_other.go new file mode 100644 index 000000000..7680a7a14 --- /dev/null +++ b/ste/sender-blobFolders_other.go @@ -0,0 +1,7 @@ +// +build !linux + +package ste + +func (b blobFolderSender) getExtraProperties() error { + return nil +} diff --git a/ste/sender-blockBlobFromLocal.go b/ste/sender-blockBlobFromLocal.go index d93ed3d31..b2682b3ab 100644 --- a/ste/sender-blockBlobFromLocal.go +++ b/ste/sender-blockBlobFromLocal.go @@ -45,6 +45,21 @@ func newBlockBlobUploader(jptm IJobPartTransferMgr, destination string, p pipeli return &blockBlobUploader{blockBlobSenderBase: *senderBase, md5Channel: newMd5Channel()}, nil } +func (s *blockBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { + if s.jptm.Info().PreservePOSIXProperties { + if unixSIP, ok := s.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + statAdapter, err := unixSIP.GetUNIXProperties() + if err != nil { + s.jptm.FailActiveSend("GetUNIXProperties", err) + } + + common.AddStatToBlobMetadata(statAdapter, s.metadataToApply) + } + } + + return s.blockBlobSenderBase.Prologue(ps) +} + func (u *blockBlobUploader) Md5Channel() chan<- []byte { return u.md5Channel } diff --git a/ste/sender-pageBlobFromLocal.go b/ste/sender-pageBlobFromLocal.go index 3fe6e6fa6..2dd1ebf6c 100644 --- a/ste/sender-pageBlobFromLocal.go +++ b/ste/sender-pageBlobFromLocal.go @@ -32,6 +32,7 @@ type pageBlobUploader struct { pageBlobSenderBase md5Channel chan []byte + sip ISourceInfoProvider } func newPageBlobUploader(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) { @@ -40,7 +41,22 @@ func newPageBlobUploader(jptm IJobPartTransferMgr, destination string, p pipelin return nil, err } - return &pageBlobUploader{pageBlobSenderBase: *senderBase, md5Channel: newMd5Channel()}, nil + return &pageBlobUploader{pageBlobSenderBase: *senderBase, md5Channel: newMd5Channel(), sip: sip}, nil +} + +func (u *pageBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { + if u.jptm.Info().PreservePOSIXProperties { + if unixSIP, ok := u.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + statAdapter, err := unixSIP.GetUNIXProperties() + if err != nil { + u.jptm.FailActiveSend("GetUNIXProperties", err) + } + + common.AddStatToBlobMetadata(statAdapter, u.metadataToApply) + } + } + + return u.pageBlobSenderBase.Prologue(ps) } func (u *pageBlobUploader) Md5Channel() chan<- []byte { diff --git a/ste/sender.go b/ste/sender.go index fb7b7644f..446ecdc1d 100644 --- a/ste/sender.go +++ b/ste/sender.go @@ -75,6 +75,20 @@ type folderSender interface { DirUrlToString() string // This is only used in folder tracking, so this should trim the SAS token. } +// We wrote properties at creation time. +type folderPropertiesSetInCreation struct{} + +func (f folderPropertiesSetInCreation) Error() string { + panic("Not a real error") +} + +// ShouldSetProperties was called in creation and we got back a no. +type folderPropertiesNotOverwroteInCreation struct{} + +func (f folderPropertiesNotOverwroteInCreation) Error() string { + panic("Not a real error") +} + type senderFactory func(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -193,6 +207,10 @@ func newBlobUploader(jptm IJobPartTransferMgr, destination string, p pipeline.Pi // TODO: Perhaps we should log it only if it isn't a block blob? } + if jptm.Info().IsFolderPropertiesTransfer() { + return newBlobFolderSender(jptm, destination, p, pacer, sip) + } + switch intendedType { case azblob.BlobBlockBlob: return newBlockBlobUploader(jptm, destination, p, pacer, sip) diff --git a/ste/sourceInfoProvider-Blob.go b/ste/sourceInfoProvider-Blob.go index d078a63a3..4e9b3d27a 100644 --- a/ste/sourceInfoProvider-Blob.go +++ b/ste/sourceInfoProvider-Blob.go @@ -35,6 +35,31 @@ type blobSourceInfoProvider struct { defaultRemoteSourceInfoProvider } +func (p *blobSourceInfoProvider) GetUNIXProperties() (common.UnixStatAdapter, error) { + prop, err := p.Properties() + if err != nil { + return nil, err + } + + return common.ReadStatFromMetadata(prop.SrcMetadata.ToAzBlobMetadata(), p.SourceSize()) +} + +func (p *blobSourceInfoProvider) HasUNIXProperties() bool { + prop, err := p.Properties() + if err != nil { + return false // This transfer is probably going to fail anyway. + } + + for _, v := range common.AllLinuxProperties { + _, ok := prop.SrcMetadata[v] + if ok { + return true + } + } + + return false +} + func newBlobSourceInfoProvider(jptm IJobPartTransferMgr) (ISourceInfoProvider, error) { base, err := newDefaultRemoteSourceInfoProvider(jptm) if err != nil { diff --git a/ste/sourceInfoProvider-Local_linux.go b/ste/sourceInfoProvider-Local_linux.go new file mode 100644 index 000000000..1007a23b8 --- /dev/null +++ b/ste/sourceInfoProvider-Local_linux.go @@ -0,0 +1,163 @@ +// +build linux + +package ste + +import ( + "github.com/Azure/azure-storage-azcopy/v10/common" + "golang.org/x/sys/unix" + "time" +) + +func (f localFileSourceInfoProvider) HasUNIXProperties() bool { + return true +} + +func (f localFileSourceInfoProvider) GetUNIXProperties() (common.UnixStatAdapter, error) { + { // attempt to call statx, if ENOSYS is returned, statx is unavailable + var stat unix.Statx_t + // dirfd is a null pointer, because we should only ever be passing relative paths here, and directories will be passed via transferInfo.Source. + // AT_SYMLINK_NOFOLLOW is not used, because we automagically resolve symlinks. TODO: Add option to not follow symlinks, and use AT_SYMLINK_NOFOLLOW when resolving is disabled. + err := unix.Statx(0, f.transferInfo.Source, + unix.AT_STATX_SYNC_AS_STAT, + unix.STATX_ALL, + &stat) + + if err != nil && err != unix.ENOSYS { + return nil, err + } else if err == nil { + return statxTAdapter(stat), nil + } + } + + var stat unix.Stat_t + err := unix.Stat(f.transferInfo.Source, &stat) + if err != nil { + return nil, err + } + + return statTAdapter(stat), nil +} + +type statxTAdapter unix.Statx_t + +func (s statxTAdapter) Extended() bool { + return true +} + +func (s statxTAdapter) StatxMask() uint32 { + return s.Mask +} + +func (s statxTAdapter) Attribute() uint64 { + return s.Attributes +} + +func (s statxTAdapter) AttributeMask() uint64 { + return s.Attributes_mask +} + +func (s statxTAdapter) BTime() time.Time { + return time.Unix(s.Btime.Sec, int64(s.Btime.Nsec)) +} + +func (s statxTAdapter) NLink() uint64 { + return uint64(s.Nlink) +} + +func (s statxTAdapter) Owner() uint32 { + return s.Uid +} + +func (s statxTAdapter) Group() uint32 { + return s.Gid +} + +func (s statxTAdapter) FileMode() uint32 { + return uint32(s.Mode) +} + +func (s statxTAdapter) INode() uint64 { + return s.Ino +} + +func (s statxTAdapter) Device() uint64 { + return unix.Mkdev(s.Dev_major, s.Dev_minor) +} + +func (s statxTAdapter) RDevice() uint64 { + return unix.Mkdev(s.Rdev_major, s.Rdev_minor) +} + +func (s statxTAdapter) ATime() time.Time { + return time.Unix(s.Atime.Sec, int64(s.Atime.Nsec)) +} + +func (s statxTAdapter) MTime() time.Time { + return time.Unix(s.Mtime.Sec, int64(s.Mtime.Nsec)) +} + +func (s statxTAdapter) CTime() time.Time { + return time.Unix(s.Btime.Sec, int64(s.Ctime.Nsec)) +} + +type statTAdapter unix.Stat_t + +func (s statTAdapter) Extended() bool { + return false +} + +func (s statTAdapter) StatxMask() uint32 { + return 0 +} + +func (s statTAdapter) Attribute() uint64 { + return 0 +} + +func (s statTAdapter) AttributeMask() uint64 { + return 0 +} + +func (s statTAdapter) BTime() time.Time { + return time.Time{} +} + +func (s statTAdapter) NLink() uint64 { + return s.Nlink +} + +func (s statTAdapter) Owner() uint32 { + return s.Uid +} + +func (s statTAdapter) Group() uint32 { + return s.Gid +} + +func (s statTAdapter) FileMode() uint32 { + return s.Mode +} + +func (s statTAdapter) INode() uint64 { + return s.Ino +} + +func (s statTAdapter) Device() uint64 { + return s.Dev +} + +func (s statTAdapter) RDevice() uint64 { + return s.Rdev +} + +func (s statTAdapter) ATime() time.Time { + return time.Unix(s.Atim.Unix()) +} + +func (s statTAdapter) MTime() time.Time { + return time.Unix(s.Mtim.Unix()) +} + +func (s statTAdapter) CTime() time.Time { + return time.Unix(s.Ctim.Unix()) +} diff --git a/ste/sourceInfoProvider.go b/ste/sourceInfoProvider.go index 8a5baea1b..c0b552214 100644 --- a/ste/sourceInfoProvider.go +++ b/ste/sourceInfoProvider.go @@ -91,6 +91,13 @@ type ISMBPropertyBearingSourceInfoProvider interface { GetSMBProperties() (TypedSMBPropertyHolder, error) } +type IUNIXPropertyBearingSourceInfoProvider interface { + ISourceInfoProvider + + GetUNIXProperties() (common.UnixStatAdapter, error) + HasUNIXProperties() bool +} + type ICustomLocalOpener interface { ISourceInfoProvider Open(path string) (*os.File, error) @@ -98,7 +105,7 @@ type ICustomLocalOpener interface { type sourceInfoProviderFactory func(jptm IJobPartTransferMgr) (ISourceInfoProvider, error) -///////////////////////////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////////////////////////// // Default copy remote source info provider which provides info sourced from transferInfo. // It implements all methods of ISourceInfoProvider except for GetFreshLastModifiedTime. // It's never correct to implement that based on the transfer info, because the whole point is that it should diff --git a/ste/xfer-anyToRemote-folder.go b/ste/xfer-anyToRemote-folder.go index b94af5954..a58362345 100644 --- a/ste/xfer-anyToRemote-folder.go +++ b/ste/xfer-anyToRemote-folder.go @@ -69,7 +69,17 @@ func anyToRemote_folder(jptm IJobPartTransferMgr, info TransferInfo, p pipeline. // of those issues apply to folders. err = s.EnsureFolderExists() // we may create it here, or possible there's already a file transfer for the folder that has created it, or maybe it already existed before this job if err != nil { - jptm.FailActiveSend("ensuring destination folder exists", err) + switch err { + case folderPropertiesSetInCreation{}: + // Continue to standard completion. + case folderPropertiesNotOverwroteInCreation{}: + jptm.LogAtLevelForCurrentTransfer(pipeline.LogWarning, "Folder already exists, so due to the --overwrite option, its properties won't be set") + jptm.SetStatus(common.ETransferStatus.SkippedEntityAlreadyExists()) // using same status for both files and folders, for simplicity + jptm.ReportTransferDone() + return + default: + jptm.FailActiveSend("ensuring destination folder exists", err) + } } else { t := jptm.GetFolderCreationTracker() diff --git a/ste/xfer-remoteToLocal-file.go b/ste/xfer-remoteToLocal-file.go index 5b93c8a26..84283d1bd 100644 --- a/ste/xfer-remoteToLocal-file.go +++ b/ste/xfer-remoteToLocal-file.go @@ -110,6 +110,17 @@ func remoteToLocal_file(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer pac // step 4a: mark destination as modified before we take our first action there (which is to create the destination file) jptm.SetDestinationIsModified() + writeThrough := false + // TODO: consider cases where we might set it to true. It might give more predictable and understandable disk throughput. + // But can't be used in the cases shown in the if statement below (one of which is only pseudocode, at this stage) + // if fileSize <= 1*1024*1024 || jptm.JobHasLowFileCount() || { + // // but, for very small files, testing indicates that we can need it in at least some cases. (Presumably just can't get enough queue depth to physical disk without it.) + // // And also, for very low file counts, we also need it. Presumably for same reasons of queue depth (given our sequential write strategy as at March 2019) + // // And for very short-running jobs, it looks and feels faster for the user to just let the OS cache flush out after the job appears to have finished. + // writeThrough = false + // } + + var dstFile io.WriteCloser // step 4b: special handling for empty files if fileSize == 0 { if strings.EqualFold(info.Destination, common.Dev_Null) { @@ -135,15 +146,6 @@ func remoteToLocal_file(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer pac } // step 4c: normal file creation when source has content - writeThrough := false - // TODO: consider cases where we might set it to true. It might give more predictable and understandable disk throughput. - // But can't be used in the cases shown in the if statement below (one of which is only pseudocode, at this stage) - // if fileSize <= 1*1024*1024 || jptm.JobHasLowFileCount() || { - // // but, for very small files, testing indicates that we can need it in at least some cases. (Presumably just can't get enough queue depth to physical disk without it.) - // // And also, for very low file counts, we also need it. Presumably for same reasons of queue depth (given our sequential write strategy as at March 2019) - // // And for very short-running jobs, it looks and feels faster for the user to just let the OS cache flush out after the job appears to have finished. - // writeThrough = false - // } failFileCreation := func(err error) { jptm.LogDownloadError(info.Source, info.Destination, "File Creation Error "+err.Error(), 0) @@ -158,7 +160,6 @@ func remoteToLocal_file(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer pac return } - var dstFile io.WriteCloser if strings.EqualFold(info.Destination, common.Dev_Null) { // the user wants to discard the downloaded data dstFile = devNullWriter{} @@ -351,11 +352,11 @@ func epilogueWithCleanupDownload(jptm IJobPartTransferMgr, dl downloader, active } } - //check if we need to rename back to original name. At this point, we're sure the file is completely - //downloaded and not corrupt. Infact, post this point we should only log errors and - //not fail the transfer. - renameNecessary := !strings.EqualFold(info.getDownloadPath(), info.Destination) && - !strings.EqualFold(info.Destination, common.Dev_Null) + // check if we need to rename back to original name. At this point, we're sure the file is completely + // downloaded and not corrupt. Infact, post this point we should only log errors and + // not fail the transfer. + renameNecessary := !strings.EqualFold(info.getDownloadPath(), info.Destination) && + !strings.EqualFold(info.Destination, common.Dev_Null) if err == nil && renameNecessary { renameErr := os.Rename(info.getDownloadPath(), info.Destination) if renameErr != nil { @@ -471,7 +472,7 @@ func tryDeleteFile(info TransferInfo, jptm IJobPartTransferMgr) { } // Returns the path of file to be downloaded. If we want to -// download to a temp path we return a temp paht in format +// download to a temp path we return a temp paht in format // /actual/parent/path/.azDownload-- func (info *TransferInfo) getDownloadPath() string { if common.GetLifecycleMgr().DownloadToTempPath() { From 9af831ab21592c3065c75c90aca1efeaa29f6d18 Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:16:10 +0530 Subject: [PATCH 09/26] Moving log-level to root.go (#1804) * location of bug * moving log-level to root command * making logVerbosity variable in root.go * passing correct values while making logger * removing logVerbosity from raw and cooked * removing logVerbosity from zt files raw args * some prob with conflict resolution --- cmd/benchmark.go | 21 +++++++-------------- cmd/copy.go | 12 ++---------- cmd/copyEnumeratorInit.go | 11 +++++++---- cmd/remove.go | 1 - cmd/removeEnumerator.go | 7 +++++-- cmd/removeProcessor.go | 2 +- cmd/root.go | 19 +++++++++++++++++-- cmd/sync.go | 11 ++--------- cmd/syncEnumerator.go | 13 +++++++------ cmd/syncProcessor.go | 4 ++-- cmd/zt_copy_s2smigration_test.go | 1 - cmd/zt_scenario_helpers_for_test.go | 3 --- 12 files changed, 50 insertions(+), 55 deletions(-) diff --git a/cmd/benchmark.go b/cmd/benchmark.go index eaab9b7b0..234dac301 100644 --- a/cmd/benchmark.go +++ b/cmd/benchmark.go @@ -47,13 +47,12 @@ type rawBenchmarkCmdArgs struct { numOfFolders uint // options from flags - blockSizeMB float64 - putMd5 bool - checkLength bool - blobType string - output string - logVerbosity string - mode string + blockSizeMB float64 + putMd5 bool + checkLength bool + blobType string + output string + mode string } const ( @@ -153,7 +152,6 @@ func (raw rawBenchmarkCmdArgs) cook() (CookedCopyCmdArgs, error) { c.CheckLength = raw.checkLength c.blobType = raw.blobType c.output = raw.output - c.logVerbosity = raw.logVerbosity cooked, err := c.cook() if err != nil { @@ -168,7 +166,7 @@ func (raw rawBenchmarkCmdArgs) cook() (CookedCopyCmdArgs, error) { if !downloadMode && raw.deleteTestData { // set up automatic cleanup - cooked.followupJobArgs, err = raw.createCleanupJobArgs(cooked.Destination, raw.logVerbosity) + cooked.followupJobArgs, err = raw.createCleanupJobArgs(cooked.Destination, logVerbosityRaw) if err != nil { return dummyCooked, err } @@ -225,7 +223,6 @@ func (raw rawBenchmarkCmdArgs) createCleanupJobArgs(benchmarkDest common.Resourc u, _ := benchmarkDest.FullURL() // don't check error, because it was parsed already in main job rc.src = u.String() // the SOURCE for the deletion is the the dest from the benchmark rc.recursive = true - rc.logVerbosity = logVerbosity switch InferArgumentLocation(rc.src) { case common.ELocation.Blob(): @@ -345,8 +342,4 @@ func init() { benchCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob/file. (By default the hash is NOT created.) Identical to the same-named parameter in the copy command") benchCmd.PersistentFlags().BoolVar(&raw.checkLength, "check-length", true, "Check the length of a file on the destination after the transfer. If there is a mismatch between source and destination, the transfer is marked as failed.") benchCmd.PersistentFlags().StringVar(&raw.mode, "mode", "upload", "Defines if Azcopy should test uploads or downloads from this target. Valid values are 'upload' and 'download'. Defaulted option is 'upload'.") - - // TODO use constant for default value or, better, move loglevel param to root cmd? - benchCmd.PersistentFlags().StringVar(&raw.logVerbosity, "log-level", "INFO", "define the log verbosity for the log file, available levels: INFO(all requests/responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs).") - } diff --git a/cmd/copy.go b/cmd/copy.go index d52562367..7c6face3b 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -117,7 +117,6 @@ type rawCopyCmdArgs struct { blockBlobTier string pageBlobTier string output string // TODO: Is this unused now? replaced with param at root level? - logVerbosity string // list of blobTypes to exclude while enumerating the transfer excludeBlobType string // Opt-in flag to persist SMB ACLs to Azure Files. @@ -257,13 +256,8 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { jobID: azcopyCurrentJobID, } - err := cooked.LogVerbosity.Parse(raw.logVerbosity) - if err != nil { - return cooked, err - } - // set up the front end scanning logger - azcopyScanningLogger = common.NewJobLogger(azcopyCurrentJobID, cooked.LogVerbosity, azcopyLogPathFolder, "-scanning") + azcopyScanningLogger = common.NewJobLogger(azcopyCurrentJobID, azcopyLogVerbosity, azcopyLogPathFolder, "-scanning") azcopyScanningLogger.OpenLog() glcm.RegisterCloseFunc(func() { azcopyScanningLogger.CloseLog() @@ -1102,7 +1096,6 @@ type CookedCopyCmdArgs struct { putMd5 bool md5ValidationOption common.HashValidationOption CheckLength bool - LogVerbosity common.LogLevel // commandString hold the user given command which is logged to the Job log file commandString string @@ -1384,7 +1377,7 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { ForceIfReadOnly: cca.ForceIfReadOnly, AutoDecompress: cca.autoDecompress, Priority: common.EJobPriority.Normal(), - LogLevel: cca.LogVerbosity, + LogLevel: azcopyLogVerbosity, ExcludeBlobType: cca.excludeBlobType, BlobAttributes: common.BlobTransferAttributes{ BlobType: cca.blobType, @@ -1880,7 +1873,6 @@ func init() { "or the account. Use of this flag is not applicable for copying data from non azure-service to service. More than one blob should be separated by ';'. ") // options change how the transfers are performed cpCmd.PersistentFlags().Float64Var(&raw.blockSizeMB, "block-size-mb", 0, "Use this block size (specified in MiB) when uploading to Azure Storage, and downloading from Azure Storage. The default value is automatically calculated based on file size. Decimal fractions are allowed (For example: 0.25).") - cpCmd.PersistentFlags().StringVar(&raw.logVerbosity, "log-level", "INFO", "Define the log verbosity for the log file, available levels: INFO(all requests/responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs). (default 'INFO').") cpCmd.PersistentFlags().StringVar(&raw.blobType, "blob-type", "Detect", "Defines the type of blob at the destination. This is used for uploading blobs and when copying between accounts (default 'Detect'). Valid values include 'Detect', 'BlockBlob', 'PageBlob', and 'AppendBlob'. "+ "When copying between accounts, a value of 'Detect' causes AzCopy to use the type of source blob to determine the type of the destination blob. When uploading a file, 'Detect' determines if the file is a VHD or a VHDX file based on the file extension. If the file is either a VHD or VHDX file, AzCopy treats the file as a page blob.") cpCmd.PersistentFlags().StringVar(&raw.blockBlobTier, "block-blob-tier", "None", "upload block blob to Azure Storage using this blob tier.") diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index 4f484383c..98f228a4b 100644 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -81,7 +81,10 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde jobPartOrder.S2SInvalidMetadataHandleOption = cca.s2sInvalidMetadataHandleOption jobPartOrder.S2SPreserveBlobTags = cca.S2sPreserveBlobTags - traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, cca.S2sPreserveBlobTags, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, + &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, + cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, + cca.S2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) if err != nil { return nil, err @@ -159,7 +162,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde // only create the destination container in S2S scenarios if cca.FromTo.From().IsRemote() && dstContainerName != "" { // if the destination has a explicit container name // Attempt to create the container. If we fail, fail silently. - err = cca.createDstContainer(dstContainerName, cca.Destination, ctx, existingContainers, cca.LogVerbosity) + err = cca.createDstContainer(dstContainerName, cca.Destination, ctx, existingContainers, azcopyLogVerbosity) // check against seenFailedContainers so we don't spam the job log with initialization failed errors if _, ok := seenFailedContainers[dstContainerName]; err != nil && jobsAdmin.JobsAdmin != nil && !ok { @@ -193,7 +196,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde continue } - err = cca.createDstContainer(bucketName, cca.Destination, ctx, existingContainers, cca.LogVerbosity) + err = cca.createDstContainer(bucketName, cca.Destination, ctx, existingContainers, azcopyLogVerbosity) // if JobsAdmin is nil, we're probably in testing mode. // As a result, container creation failures are expected as we don't give the SAS tokens adequate permissions. @@ -216,7 +219,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde resName, err := containerResolver.ResolveName(cName) if err == nil { - err = cca.createDstContainer(resName, cca.Destination, ctx, existingContainers, cca.LogVerbosity) + err = cca.createDstContainer(resName, cca.Destination, ctx, existingContainers, azcopyLogVerbosity) if _, ok := seenFailedContainers[dstContainerName]; err != nil && jobsAdmin.JobsAdmin != nil && !ok { logDstContainerCreateFailureOnce.Do(func() { diff --git a/cmd/remove.go b/cmd/remove.go index 0d28d4ab7..7f37454d8 100644 --- a/cmd/remove.go +++ b/cmd/remove.go @@ -102,7 +102,6 @@ func init() { rootCmd.AddCommand(deleteCmd) deleteCmd.PersistentFlags().BoolVar(&raw.recursive, "recursive", false, "Look into sub-directories recursively when syncing between directories.") - deleteCmd.PersistentFlags().StringVar(&raw.logVerbosity, "log-level", "INFO", "Define the log verbosity for the log file. Available levels include: INFO(all requests/responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs). (default 'INFO')") deleteCmd.PersistentFlags().StringVar(&raw.include, "include-pattern", "", "Include only files where the name matches the pattern list. For example: *.jpg;*.pdf;exactName") deleteCmd.PersistentFlags().StringVar(&raw.includePath, "include-path", "", "Include only these paths when removing. "+ "This option does not support wildcard characters (*). Checks relative path prefix. For example: myFolder;myFolder/subDirName/file.pdf") diff --git a/cmd/removeEnumerator.go b/cmd/removeEnumerator.go index 3d271a7bc..53e112591 100644 --- a/cmd/removeEnumerator.go +++ b/cmd/removeEnumerator.go @@ -47,7 +47,10 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) // Include-path is handled by ListOfFilesChannel. - sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, + nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, + cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, + azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) // report failure to create traverser if err != nil { @@ -132,7 +135,7 @@ func removeBfsResources(cca *CookedCopyCmdArgs) (err error) { } // create bfs pipeline - p, err := createBlobFSPipeline(ctx, cca.credentialInfo, cca.LogVerbosity.ToPipelineLogLevel()) + p, err := createBlobFSPipeline(ctx, cca.credentialInfo, azcopyLogVerbosity.ToPipelineLogLevel()) if err != nil { return err } diff --git a/cmd/removeProcessor.go b/cmd/removeProcessor.go index 964957824..ea95d2535 100644 --- a/cmd/removeProcessor.go +++ b/cmd/removeProcessor.go @@ -36,7 +36,7 @@ func newRemoveTransferProcessor(cca *CookedCopyCmdArgs, numOfTransfersPerPart in ForceIfReadOnly: cca.ForceIfReadOnly, // flags - LogLevel: cca.LogVerbosity, + LogLevel: azcopyLogVerbosity, BlobAttributes: common.BlobTransferAttributes{DeleteSnapshotsOption: cca.deleteSnapshotsOption, PermanentDeleteOption: cca.permanentDeleteOption}, } diff --git a/cmd/root.go b/cmd/root.go index bbe6f676f..9ce8401ce 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -45,15 +45,23 @@ var azcopyLogPathFolder string var azcopyMaxFileAndSocketHandles int var outputFormatRaw string var outputVerbosityRaw string +var logVerbosityRaw string var cancelFromStdin bool var azcopyOutputFormat common.OutputFormat var azcopyOutputVerbosity common.OutputVerbosity +var azcopyLogVerbosity common.LogLevel +var loggerInfo jobLoggerInfo var cmdLineCapMegaBitsPerSecond float64 var azcopyAwaitContinue bool var azcopyAwaitAllowOpenFiles bool var azcopyScanningLogger common.ILoggerResetable var azcopyCurrentJobID common.JobID +type jobLoggerInfo struct { + jobID common.JobID + logFileFolder string +} + // It's not pretty that this one is read directly by credential util. // But doing otherwise required us passing it around in many places, even though really // it can be thought of as an "ambient" property. That's the (weak?) justification for implementing @@ -95,6 +103,13 @@ var rootCmd = &cobra.Command{ return err } + err = azcopyLogVerbosity.Parse(logVerbosityRaw) + if err != nil { + return err + } + common.AzcopyCurrentJobLogger = common.NewJobLogger(loggerInfo.jobID, azcopyLogVerbosity, loggerInfo.logFileFolder, "") + common.AzcopyCurrentJobLogger.OpenLog() + glcm.SetForceLogging() // warn Windows users re quoting (since our docs all use single quotes, but CMD needs double) @@ -169,8 +184,7 @@ func Execute(azsAppPathFolder, logPathFolder string, jobPlanFolder string, maxFi common.AzcopyJobPlanFolder = jobPlanFolder azcopyMaxFileAndSocketHandles = maxFileAndSocketHandles azcopyCurrentJobID = jobID - common.AzcopyCurrentJobLogger = common.NewJobLogger(jobID, common.ELogLevel.Debug(), logPathFolder, "") - common.AzcopyCurrentJobLogger.OpenLog() + loggerInfo = jobLoggerInfo{jobID, logPathFolder} if err := rootCmd.Execute(); err != nil { glcm.Error(err.Error()) @@ -194,6 +208,7 @@ func init() { rootCmd.PersistentFlags().Float64Var(&cmdLineCapMegaBitsPerSecond, "cap-mbps", 0, "Caps the transfer rate, in megabits per second. Moment-by-moment throughput might vary slightly from the cap. If this option is set to zero, or it is omitted, the throughput isn't capped.") rootCmd.PersistentFlags().StringVar(&outputFormatRaw, "output-type", "text", "Format of the command's output. The choices include: text, json. The default value is 'text'.") rootCmd.PersistentFlags().StringVar(&outputVerbosityRaw, "output-level", "default", "Define the output verbosity. Available levels: essential, quiet.") + rootCmd.PersistentFlags().StringVar(&logVerbosityRaw, "log-level", "INFO", "Define the log verbosity for the log file, available levels: INFO(all requests/responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs). (default 'INFO').") rootCmd.PersistentFlags().StringVar(&cmdLineExtraSuffixesAAD, trustedSuffixesNameAAD, "", "Specifies additional domain suffixes where Azure Active Directory login tokens may be sent. The default is '"+ trustedSuffixesAAD+"'. Any listed here are added to the default. For security, you should only put Microsoft Azure domains here. Separate multiple entries with semi-colons.") diff --git a/cmd/sync.go b/cmd/sync.go index 573f2050a..37e9f657c 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -45,7 +45,6 @@ type rawSyncCmdArgs struct { // options from flags blockSizeMB float64 - logVerbosity string include string exclude string excludePath string @@ -123,13 +122,8 @@ func (raw *rawSyncCmdArgs) validateURLIsNotServiceLevel(url string, location com func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { cooked := cookedSyncCmdArgs{} - err := cooked.logVerbosity.Parse(raw.logVerbosity) - if err != nil { - return cooked, err - } - // set up the front end scanning logger - azcopyScanningLogger = common.NewJobLogger(azcopyCurrentJobID, cooked.logVerbosity, azcopyLogPathFolder, "-scanning") + azcopyScanningLogger = common.NewJobLogger(azcopyCurrentJobID, azcopyLogVerbosity, azcopyLogPathFolder, "-scanning") azcopyScanningLogger.OpenLog() glcm.RegisterCloseFunc(func() { azcopyScanningLogger.CloseLog() @@ -155,6 +149,7 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { cooked.isHNSToHNS = srcHNS && dstHNS + var err error cooked.fromTo, err = ValidateFromTo(raw.src, raw.dst, raw.fromTo) if err != nil { return cooked, err @@ -392,7 +387,6 @@ type cookedSyncCmdArgs struct { putMd5 bool md5ValidationOption common.HashValidationOption blockSize int64 - logVerbosity common.LogLevel forceIfReadOnly bool backupMode bool @@ -774,7 +768,6 @@ func init() { syncCmd.PersistentFlags().StringVar(&raw.excludeFileAttributes, "exclude-attributes", "", "(Windows only) Exclude files whose attributes match the attribute list. For example: A;S;R") syncCmd.PersistentFlags().StringVar(&raw.includeRegex, "include-regex", "", "Include the relative path of the files that match with the regular expressions. Separate regular expressions with ';'.") syncCmd.PersistentFlags().StringVar(&raw.excludeRegex, "exclude-regex", "", "Exclude the relative path of the files that match with the regular expressions. Separate regular expressions with ';'.") - syncCmd.PersistentFlags().StringVar(&raw.logVerbosity, "log-level", "INFO", "Define the log verbosity for the log file, available levels: INFO(all requests and responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs). (default INFO).") syncCmd.PersistentFlags().StringVar(&raw.deleteDestination, "delete-destination", "false", "Defines whether to delete extra files from the destination that are not present at the source. Could be set to true, false, or prompt. "+ "If set to prompt, the user will be asked a question before scheduling files and blobs for deletion. (default 'false').") syncCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. (By default the hash is NOT created.) Only available when uploading.") diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index b42649a95..13ff2d88c 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -58,11 +58,12 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s // TODO: enable symlink support in a future release after evaluating the implications // GetProperties is enabled by default as sync supports both upload and download. // This property only supports Files and S3 at the moment, but provided that Files sync is coming soon, enable to avoid stepping on Files sync work - sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) { - if entityType == common.EEntityType.File() { - atomic.AddUint64(&cca.atomicSourceFilesScanned, 1) - } - }, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions) + sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, + nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) { + if entityType == common.EEntityType.File() { + atomic.AddUint64(&cca.atomicSourceFilesScanned, 1) + } + }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions) if err != nil { return nil, err @@ -83,7 +84,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s if entityType == common.EEntityType.File() { atomic.AddUint64(&cca.atomicDestinationFilesScanned, 1) } - }, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions) + }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions) if err != nil { return nil, err } diff --git a/cmd/syncProcessor.go b/cmd/syncProcessor.go index 71e9f3eb3..364cf5b83 100644 --- a/cmd/syncProcessor.go +++ b/cmd/syncProcessor.go @@ -55,7 +55,7 @@ func newSyncTransferProcessor(cca *cookedSyncCmdArgs, numOfTransfersPerPart int, BlockSizeInBytes: cca.blockSize}, ForceWrite: common.EOverwriteOption.True(), // once we decide to transfer for a sync operation, we overwrite the destination regardless ForceIfReadOnly: cca.forceIfReadOnly, - LogLevel: cca.logVerbosity, + LogLevel: azcopyLogVerbosity, PreserveSMBPermissions: cca.preservePermissions, PreserveSMBInfo: cca.preserveSMBInfo, PreservePOSIXProperties: cca.preservePOSIXProperties, @@ -258,7 +258,7 @@ func newSyncDeleteProcessor(cca *cookedSyncCmdArgs) (*interactiveDeleteProcessor ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) - p, err := InitPipeline(ctx, cca.fromTo.To(), cca.credentialInfo, cca.logVerbosity.ToPipelineLogLevel()) + p, err := InitPipeline(ctx, cca.fromTo.To(), cca.credentialInfo, azcopyLogVerbosity.ToPipelineLogLevel()) if err != nil { return nil, err } diff --git a/cmd/zt_copy_s2smigration_test.go b/cmd/zt_copy_s2smigration_test.go index 97807507e..2e90b2b22 100644 --- a/cmd/zt_copy_s2smigration_test.go +++ b/cmd/zt_copy_s2smigration_test.go @@ -76,7 +76,6 @@ func getDefaultRawCopyInput(src, dst string) rawCopyCmdArgs { src: src, dst: dst, recursive: true, - logVerbosity: defaultLogVerbosityForCopy, output: defaultOutputFormatForCopy, blobType: defaultBlobTypeForCopy, blockBlobTier: defaultBlockBlobTierForCopy, diff --git a/cmd/zt_scenario_helpers_for_test.go b/cmd/zt_scenario_helpers_for_test.go index 5b82241d8..eb70d3b9b 100644 --- a/cmd/zt_scenario_helpers_for_test.go +++ b/cmd/zt_scenario_helpers_for_test.go @@ -865,7 +865,6 @@ func getDefaultSyncRawInput(src, dst string) rawSyncCmdArgs { src: src, dst: dst, recursive: true, - logVerbosity: defaultLogVerbosityForSync, deleteDestination: deleteDestination.String(), md5ValidationOption: common.DefaultHashValidationOption.String(), } @@ -875,7 +874,6 @@ func getDefaultCopyRawInput(src string, dst string) rawCopyCmdArgs { return rawCopyCmdArgs{ src: src, dst: dst, - logVerbosity: defaultLogVerbosityForSync, blobType: common.EBlobType.Detect().String(), blockBlobTier: common.EBlockBlobTier.None().String(), pageBlobTier: common.EPageBlobTier.None().String(), @@ -900,7 +898,6 @@ func getDefaultRemoveRawInput(src string) rawCopyCmdArgs { return rawCopyCmdArgs{ src: src, fromTo: fromTo.String(), - logVerbosity: defaultLogVerbosityForSync, blobType: common.EBlobType.Detect().String(), blockBlobTier: common.EBlockBlobTier.None().String(), pageBlobTier: common.EPageBlobTier.None().String(), From 8346fe90989e181808fe32e937e3da6ca8efa100 Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:17:33 +0530 Subject: [PATCH 10/26] Avoid creating .azcopy under HOME if plan/log location is specified elsewhere (#1782) * avoiding creation of .azcopy if log and plan files have user defined loc * making azcopyAppPathFolder string irrespective of folder creation * accessToken to be created where plan files are * adding TODO * making same changes in main_unix.go * getting rid of azcopyAppPathFolder * making azcopyAppPathFolder before making plan folder to avoid err --- cmd/credentialUtil.go | 6 +++--- cmd/root.go | 4 ++-- main.go | 15 ++++++++++----- main_unix.go | 5 +---- main_windows.go | 5 +---- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go index f160a9651..b727b6c2f 100644 --- a/cmd/credentialUtil.go +++ b/cmd/credentialUtil.go @@ -58,11 +58,11 @@ const oauthLoginSessionCacheAccountName = "AzCopyOAuthTokenCache" // Note: Currently, only support to have TokenManager for one user mapping to one tenantID. func GetUserOAuthTokenManagerInstance() *common.UserOAuthTokenManager { once.Do(func() { - if AzcopyAppPathFolder == "" { - panic("invalid state, azcopyAppPathFolder should be initialized by root") + if common.AzcopyJobPlanFolder == "" { + panic("invalid state, AzcopyJobPlanFolder should not be an empty string") } currentUserOAuthTokenManager = common.NewUserOAuthTokenManagerInstance(common.CredCacheOptions{ - DPAPIFilePath: AzcopyAppPathFolder, + DPAPIFilePath: common.AzcopyJobPlanFolder, KeyName: oauthLoginSessionCacheKeyName, ServiceName: oauthLoginSessionCacheServiceName, AccountName: oauthLoginSessionCacheAccountName, diff --git a/cmd/root.go b/cmd/root.go index 9ce8401ce..f09b40997 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -178,8 +178,8 @@ var glcmSwapOnce = &sync.Once{} // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute(azsAppPathFolder, logPathFolder string, jobPlanFolder string, maxFileAndSocketHandles int, jobID common.JobID) { - AzcopyAppPathFolder = azsAppPathFolder + +func Execute(logPathFolder, jobPlanFolder string, maxFileAndSocketHandles int, jobID common.JobID) { azcopyLogPathFolder = logPathFolder common.AzcopyJobPlanFolder = jobPlanFolder azcopyMaxFileAndSocketHandles = maxFileAndSocketHandles diff --git a/main.go b/main.go index 4fbeb75a8..c47c38aaf 100644 --- a/main.go +++ b/main.go @@ -42,12 +42,14 @@ func main() { rand.Seed(time.Now().UnixNano()) // make sure our random numbers actually are random (but remember, use crypto/rand for anything where strong/reliable randomness is required + azcopyLogPathFolder := common.GetLifecycleMgr().GetEnvironmentVariable(common.EEnvironmentVariable.LogLocation()) // user specified location for log files + azcopyJobPlanFolder := common.GetLifecycleMgr().GetEnvironmentVariable(common.EEnvironmentVariable.JobPlanLocation()) // user specified location for plan files + // note: azcopyAppPathFolder is the default location for all AzCopy data (logs, job plans, oauth token on Windows) - // but both logs and job plans can be put elsewhere as they can become very large + // but all the above can be put elsewhere as they can become very large azcopyAppPathFolder := GetAzCopyAppPath() // the user can optionally put the log files somewhere else - azcopyLogPathFolder := common.GetLifecycleMgr().GetEnvironmentVariable(common.EEnvironmentVariable.LogLocation()) if azcopyLogPathFolder == "" { azcopyLogPathFolder = azcopyAppPathFolder } @@ -56,8 +58,11 @@ func main() { } // the user can optionally put the plan files somewhere else - azcopyJobPlanFolder := common.GetLifecycleMgr().GetEnvironmentVariable(common.EEnvironmentVariable.JobPlanLocation()) if azcopyJobPlanFolder == "" { + // make the app path folder ".azcopy" first so we can make a plans folder in it + if err := os.Mkdir(azcopyAppPathFolder, os.ModeDir); err != nil && !os.IsExist(err) { + common.PanicIfErr(err) + } azcopyJobPlanFolder = path.Join(azcopyAppPathFolder, "plans") } if err := os.Mkdir(azcopyJobPlanFolder, os.ModeDir|os.ModePerm); err != nil && !os.IsExist(err) { @@ -67,7 +72,7 @@ func main() { jobID := common.NewJobID() // If insufficient arguments, show usage & terminate if len(os.Args) == 1 { - cmd.Execute(azcopyAppPathFolder, azcopyLogPathFolder, azcopyJobPlanFolder, 0, jobID) + cmd.Execute(azcopyLogPathFolder, azcopyJobPlanFolder, 0, jobID) return } @@ -80,7 +85,7 @@ func main() { log.Fatalf("initialization failed: %v", err) } - cmd.Execute(azcopyAppPathFolder, azcopyLogPathFolder, azcopyJobPlanFolder, maxFileAndSocketHandles, jobID) + cmd.Execute(azcopyLogPathFolder, azcopyJobPlanFolder, maxFileAndSocketHandles, jobID) glcm.Exit(nil, common.EExitCode.Success()) } diff --git a/main_unix.go b/main_unix.go index 64c553a4f..a83400df3 100644 --- a/main_unix.go +++ b/main_unix.go @@ -1,3 +1,4 @@ +//go:build linux || darwin // +build linux darwin // Copyright © 2017 Microsoft @@ -24,7 +25,6 @@ package main import ( "math" - "os" "path" "syscall" @@ -74,8 +74,5 @@ func GetAzCopyAppPath() string { lcm := common.GetLifecycleMgr() localAppData := lcm.GetEnvironmentVariable(common.EEnvironmentVariable.UserDir()) azcopyAppDataFolder := path.Join(localAppData, ".azcopy") - if err := os.Mkdir(azcopyAppDataFolder, os.ModeDir|os.ModePerm); err != nil && !os.IsExist(err) { - return "" - } return azcopyAppDataFolder } diff --git a/main_windows.go b/main_windows.go index 72119c3aa..169780bb6 100644 --- a/main_windows.go +++ b/main_windows.go @@ -23,7 +23,6 @@ package main import ( "math" "net/http" - "os" "os/exec" "path" "strings" @@ -61,9 +60,7 @@ func GetAzCopyAppPath() string { lcm := common.GetLifecycleMgr() userProfile := lcm.GetEnvironmentVariable(common.EEnvironmentVariable.UserDir()) azcopyAppDataFolder := strings.ReplaceAll(path.Join(userProfile, ".azcopy"), "/", `\`) - if err := os.Mkdir(azcopyAppDataFolder, os.ModeDir); err != nil && !os.IsExist(err) { - return "" - } + return azcopyAppDataFolder } From 09106260bd653f824a5173f7c580afd97b0171e0 Mon Sep 17 00:00:00 2001 From: Ishaan Verma <99612568+tiverma-msft@users.noreply.github.com> Date: Mon, 6 Jun 2022 08:29:20 +0530 Subject: [PATCH 11/26] set-properties command (#1778) * adding new files setProperties * updating copy.go and helpMessages.go * updating cmd files for setProperties * updating ste files for setProperties * updating ste files for setProperties * solving issues with code * fixing problem with rehydrate priority * adding archiveStatus to list * adding test files * adding dry-run mode * reformatting dry-run code * adding metadata and blobtier to tests * removing redundant code from makeTransferEnum() * validating metadata * assuring empty metadata is handled in set-props * assuring tier is not archive if BlobFS * Files and SAS token not allowed * Files and SAS token not allowed * adding tests for empty metadata and blob tags * printing unescaped URL in dry-run * showing failed when using auto-login resolution * the specified blob does not exist error and not hang * totalBytesTransferred = 0 * oopsie * showing failed when using auto-login resolution - 2 * empty blob tags given to clear existing tags * removing done TODO * moving resetSourceSize so that progress is displayed correctly * complying with pulled changes * introducing "flush" to flush blobtags and metadata * avoiding import cycle * getting up to date with latest merges in dev * getting up to date with latest merges in dev -2 * printing metadata if error in it * printing metadata if error in it * moving flushflag to fe-ste-models.go * only metadata key should be a C# identifier * changing flush flag to clear flag * using strings.EqualFold for case insensitivity * pretty output * description and checkIfChangesPossible() change * metadata can't be set if blob is set to be archived * doing TODOs and silenting ValidateTier() * adding short and long descr. and examples * fixing usage definition * bytes transferred = 0 not showing correctly, fix * removing invalid comment --- cmd/copy.go | 54 +- cmd/helpMessages.go | 40 + cmd/list.go | 5 +- cmd/pathUtils.go | 3 +- cmd/setProperties.go | 172 +++ cmd/setPropertiesEnumerator.go | 105 ++ cmd/setPropertiesProcessor.go | 60 + cmd/zc_enumerator.go | 3 + cmd/zc_newobjectadapters.go | 12 + cmd/zc_processor.go | 45 +- cmd/zc_traverser_blob.go | 1 + cmd/zt_scenario_helpers_for_test.go | 55 + cmd/zt_set_properties_test.go | 1707 +++++++++++++++++++++++++++ common/fe-ste-models.go | 88 +- common/rpc-models.go | 2 + ste/JobPartPlan.go | 4 + ste/JobPartPlanFileName.go | 2 + ste/mgr-JobPartMgr.go | 16 +- ste/mgr-JobPartTransferMgr.go | 24 +- ste/sender-blockBlob.go | 2 +- ste/sender-blockBlobFromLocal.go | 2 +- ste/sender-blockBlobFromURL.go | 4 +- ste/sender-pageBlob.go | 2 +- ste/xfer-anyToRemote-file.go | 6 +- ste/xfer-setProperties.go | 214 ++++ ste/xfer.go | 8 +- 26 files changed, 2602 insertions(+), 34 deletions(-) create mode 100644 cmd/setProperties.go create mode 100644 cmd/setPropertiesEnumerator.go create mode 100644 cmd/setPropertiesProcessor.go create mode 100644 cmd/zt_set_properties_test.go create mode 100644 ste/xfer-setProperties.go diff --git a/cmd/copy.go b/cmd/copy.go index 7c6face3b..83f8951ec 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -171,6 +171,10 @@ type rawCopyCmdArgs struct { // Optional flag that permanently deletes soft-deleted snapshots/versions permanentDeleteOption string + + // Optional. Indicates the priority with which to rehydrate an archived blob. Valid values are High/Standard. + rehydratePriority string + // The priority setting can be changed from Standard to High by calling Set Blob Tier with this header set to High and setting x-ms-access-tier to the same value as previously set. The priority setting cannot be lowered from High to Standard. } func (raw *rawCopyCmdArgs) parsePatterns(pattern string) (cookedPatterns []string) { @@ -372,6 +376,14 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { return cooked, err } + if raw.rehydratePriority == "" { + raw.rehydratePriority = "standard" + } + err = cooked.rehydratePriority.Parse(raw.rehydratePriority) + if err != nil { + return cooked, err + } + // Everything uses the new implementation of list-of-files now. // This handles both list-of-files and include-path as a list enumerator. // This saves us time because we know *exactly* what we're looking for right off the bat. @@ -530,7 +542,13 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { cooked.ListOfVersionIDs = versionsChan } + if cooked.FromTo.To() == common.ELocation.None() && strings.EqualFold(raw.metadata, common.MetadataAndBlobTagsClearFlag) { // in case of Blob, BlobFS and Files + glcm.Info("*** WARNING *** Metadata will be cleared because of input --metadata=clear ") + } cooked.metadata = raw.metadata + if err = validateMetadataString(cooked.metadata); err != nil { + return cooked, err + } cooked.contentType = raw.contentType cooked.contentEncoding = raw.contentEncoding cooked.contentLanguage = raw.contentLanguage @@ -540,9 +558,12 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { cooked.preserveLastModifiedTime = raw.preserveLastModifiedTime cooked.disableAutoDecoding = raw.disableAutoDecoding - if cooked.FromTo.To() != common.ELocation.Blob() && raw.blobTags != "" { + if !(cooked.FromTo.To() == common.ELocation.Blob() || cooked.FromTo == common.EFromTo.BlobNone() || cooked.FromTo != common.EFromTo.BlobFSNone()) && raw.blobTags != "" { return cooked, errors.New("blob tags can only be set when transferring to blob storage") } + if cooked.FromTo.To() == common.ELocation.None() && strings.EqualFold(raw.blobTags, common.MetadataAndBlobTagsClearFlag) { // in case of Blob and BlobFS + glcm.Info("*** WARNING *** BlobTags will be cleared because of input --blob-tags=clear ") + } blobTags := common.ToCommonBlobTagsMap(raw.blobTags) err = validateBlobTagsKeyValue(blobTags) if err != nil { @@ -1040,6 +1061,23 @@ func validateBlobTagsKeyValue(bt common.BlobTags) error { return nil } +func validateMetadataString(metadata string) error { + if strings.EqualFold(metadata, common.MetadataAndBlobTagsClearFlag) { + return nil + } + metadataMap, err := common.StringToMetadata(metadata) + if err != nil { + return err + } + for k, _ := range metadataMap { + if strings.ContainsAny(k, " !#$%^&*,<>{}|\\:.()+'\"?/") { + return fmt.Errorf("invalid metadata key value '%s': can't have spaces or special characters", k) + } + } + + return nil +} + // represents the processed copy command input from the user type CookedCopyCmdArgs struct { // from arguments @@ -1171,6 +1209,12 @@ type CookedCopyCmdArgs struct { // Optional flag that permanently deletes soft deleted blobs permanentDeleteOption common.PermanentDeleteOption + + // Optional flag that sets rehydrate priority for rehydration + rehydratePriority common.RehydratePriorityType + + // Bitmasked uint checking which properties to transfer + propertiesToTransfer common.SetPropertiesFlags } func (cca *CookedCopyCmdArgs) isRedirection() bool { @@ -1464,6 +1508,14 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { // case common.EFromTo.FileBlob(): // e := copyFileToNEnumerator(jobPartOrder) // err = e.enumerate(cca) + + case common.EFromTo.BlobNone(), common.EFromTo.BlobFSNone(), common.EFromTo.FileNone(): + e, createErr := setPropertiesEnumerator(cca) + if createErr != nil { + return createErr + } + err = e.enumerate() + default: return fmt.Errorf("copy direction %v is not supported\n", cca.FromTo) } diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index 64caa5ada..59c788d7e 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -525,3 +525,43 @@ Run an upload that does not delete the transferred files. (These files can then - azcopy bench "https://[account].blob.core.windows.net/[container]?" --file-count 100 --delete-test-data=false ` + +// ===================================== SET-PROPERTIES COMMAND ===================================== // + +const setPropertiesCmdShortDescription = "Given a location, change all the valid system properties of that storage (blob or file)" + +const setPropertiesCmdLongDescription = ` +Sets properties of Blob, BlobFS, and File storage. The properties currently supported by this command are: + + Blobs -> Tier, Metadata, Tags + BlobFS -> Tier, Metadata, Tags + Files -> Metadata +` + +const setPropertiesCmdExample = ` +Change tier of blob to hot: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=hot + +Change tier of blob from archive to cool with rehydrate priority set to high: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=cool --rehydrate-priority=high + +Change tier of all files in a directory to archive: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --block-blob-tier=archive --recursive=true + +Change metadata of blob to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=abc=def;ghi=jkl + +Change metadata of all files in a directory to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --metadata=abc=def;ghi=jkl --recursive=true + +Clear all existing metadata of blob: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=clear + +Change blob-tags of blob to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=abc=def&ghi=jkl + - While setting tags on the blobs, there are additional permissions('t' for tags) in SAS without which the service will give authorization error back. + +Clear all existing blob-tags of blob: + - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=clear + - While setting tags on the blobs, there are additional permissions('t' for tags) in SAS without which the service will give authorization error back. +` diff --git a/cmd/list.go b/cmd/list.go index bb9a3e92b..cf799fad1 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -56,12 +56,13 @@ const ( leaseState validProperty = "LeaseState" leaseDuration validProperty = "LeaseDuration" leaseStatus validProperty = "LeaseStatus" + archiveStatus validProperty = "ArchiveStatus" ) // validProperties returns an array of possible values for the validProperty const type. func validProperties() []validProperty { return []validProperty{lastModifiedTime, versionId, blobType, blobAccessTier, - contentType, contentEncoding, leaseState, leaseDuration, leaseStatus} + contentType, contentEncoding, leaseState, leaseDuration, leaseStatus, archiveStatus} } func (raw *rawListCmdArgs) parseProperties(rawProperties string) []validProperty { @@ -181,6 +182,8 @@ func (cooked cookedListCmdArgs) processProperties(object StoredObject) string { builder.WriteString(propertyStr + ": " + string(object.leaseStatus) + "; ") case leaseDuration: builder.WriteString(propertyStr + ": " + string(object.leaseDuration) + "; ") + case archiveStatus: + builder.WriteString(propertyStr + ": " + string(object.archiveStatus) + "; ") } } return builder.String() diff --git a/cmd/pathUtils.go b/cmd/pathUtils.go index 1ca2655a9..d12aa87ee 100644 --- a/cmd/pathUtils.go +++ b/cmd/pathUtils.go @@ -221,7 +221,8 @@ func splitAuthTokenFromResource(resource string, location common.Location) (reso case common.ELocation.GCP(): return resource, "", nil case common.ELocation.Benchmark(), // cover for benchmark as we generate data for that - common.ELocation.Unknown(): // cover for unknown as we treat that as garbage + common.ELocation.Unknown(), // cover for unknown as we treat that as garbage + common.ELocation.None(): // Local and S3 don't feature URL-embedded tokens return resource, "", nil diff --git a/cmd/setProperties.go b/cmd/setProperties.go new file mode 100644 index 000000000..44032d4c0 --- /dev/null +++ b/cmd/setProperties.go @@ -0,0 +1,172 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "fmt" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/spf13/cobra" + "strings" +) + +func (raw *rawCopyCmdArgs) setMandatoryDefaultsForSetProperties() { + raw.blobType = common.EBlobType.Detect().String() + raw.md5ValidationOption = common.DefaultHashValidationOption.String() + raw.s2sInvalidMetadataHandleOption = common.DefaultInvalidMetadataHandleOption.String() + raw.forceWrite = common.EOverwriteOption.True().String() + raw.preserveOwner = common.PreserveOwnerDefault +} + +func (cca *CookedCopyCmdArgs) checkIfChangesPossible() error { + // tier or tags can't be set on files + if cca.FromTo.From() == common.ELocation.File() { + if cca.propertiesToTransfer.ShouldTransferTier() { + return fmt.Errorf("changing tier is not available for File Storage") + } + if cca.propertiesToTransfer.ShouldTransferBlobTags() { + return fmt.Errorf("blob tags are not available for File Storage") + } + } + + // tier of a BlobFS can't be set to Archive + if cca.FromTo.From() == common.ELocation.BlobFS() && cca.blockBlobTier == common.EBlockBlobTier.Archive() { + return fmt.Errorf("tier of a BlobFS can't be set to Archive") + } + + // metadata can't be set if blob is set to be archived (note that tags can still be set) + if cca.blockBlobTier == common.EBlockBlobTier.Archive() && cca.propertiesToTransfer.ShouldTransferMetaData() { + return fmt.Errorf("metadata can't be set if blob is set to be archived") + } + + return nil +} + +func (cca *CookedCopyCmdArgs) makeTransferEnum() error { + // ACCESS TIER + if cca.blockBlobTier != common.EBlockBlobTier.None() || cca.pageBlobTier != common.EPageBlobTier.None() { + cca.propertiesToTransfer |= common.ESetPropertiesFlags.SetTier() + } + + // METADATA + if cca.metadata != "" { + cca.propertiesToTransfer |= common.ESetPropertiesFlags.SetMetadata() + if strings.EqualFold(cca.metadata, common.MetadataAndBlobTagsClearFlag) { + cca.metadata = "" + } + } + + // BLOB TAGS + if cca.blobTags != nil { + // the fact that fromto is not filenone is taken care of by the cook function + cca.propertiesToTransfer |= common.ESetPropertiesFlags.SetBlobTags() + } + + return cca.checkIfChangesPossible() +} + +func init() { + raw := rawCopyCmdArgs{} + + setPropCmd := &cobra.Command{ + Use: "set-properties [source]", + Aliases: []string{"set-props", "sp", "setprops"}, + SuggestFor: []string{"props", "prop", "set"}, + Short: setPropertiesCmdShortDescription, + Long: setPropertiesCmdLongDescription, + Example: setPropertiesCmdExample, + Args: func(cmd *cobra.Command, args []string) error { + // we only want one arg, which is the source + if len(args) != 1 { + return fmt.Errorf("set-properties command only takes 1 argument (src). Passed %d argument(s)", len(args)) + } + + //the resource to set properties of is set as src + raw.src = args[0] + + srcLocationType := InferArgumentLocation(raw.src) + if raw.fromTo == "" { + switch srcLocationType { + case common.ELocation.Blob(): + raw.fromTo = common.EFromTo.BlobNone().String() + case common.ELocation.BlobFS(): + raw.fromTo = common.EFromTo.BlobFSNone().String() + case common.ELocation.File(): + raw.fromTo = common.EFromTo.FileNone().String() + default: + return fmt.Errorf("invalid source type %s. azcopy supports set-properties of blobs/files/adls gen2", srcLocationType.String()) + } + } else { + err := strings.Contains(raw.fromTo, "None") + if !err { + return fmt.Errorf("invalid destination. Please enter a valid destination, i.e. BlobNone, FileNone, BlobFSNone") + } + } + raw.setMandatoryDefaultsForSetProperties() + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + glcm.EnableInputWatcher() + if cancelFromStdin { + glcm.EnableCancelFromStdIn() + } + + cooked, err := raw.cook() + if err == nil { // do this only if error is nil. We would not want to overwrite err = nil if there was error in cook() + err = cooked.makeTransferEnum() // makes transfer enum and performs some checks that are specific to set-properties + } + + if err != nil { + glcm.Error("failed to parse user input due to error: " + err.Error()) + } + + cooked.commandString = copyHandlerUtil{}.ConstructCommandStringFromArgs() + err = cooked.process() + + if err != nil { + glcm.Error("failed to perform set-properties command due to error: " + err.Error()) + } + + if cooked.dryrunMode { + glcm.Exit(nil, common.EExitCode.Success()) + } + + glcm.SurrenderControl() + }, + } + + rootCmd.AddCommand(setPropCmd) + + setPropCmd.PersistentFlags().StringVar(&raw.metadata, "metadata", "", "Set the given location with these key-value pairs (separated by ';') as metadata.") + setPropCmd.PersistentFlags().StringVar(&raw.fromTo, "from-to", "", "Optionally specifies the source destination combination. Valid values : BlobNone, FileNone, BlobFSNone") + setPropCmd.PersistentFlags().StringVar(&raw.include, "include-pattern", "", "Include only files where the name matches the pattern list. For example: *.jpg;*.pdf;exactName") + setPropCmd.PersistentFlags().StringVar(&raw.includePath, "include-path", "", "Include only these paths when setting property. "+ + "This option does not support wildcard characters (*). Checks relative path prefix. For example: myFolder;myFolder/subDirName/file.pdf") + setPropCmd.PersistentFlags().StringVar(&raw.exclude, "exclude-pattern", "", "Exclude files where the name matches the pattern list. For example: *.jpg;*.pdf;exactName") + setPropCmd.PersistentFlags().StringVar(&raw.excludePath, "exclude-path", "", "Exclude these paths when removing. "+ + "This option does not support wildcard characters (*). Checks relative path prefix. For example: myFolder;myFolder/subDirName/file.pdf") + setPropCmd.PersistentFlags().StringVar(&raw.listOfFilesToCopy, "list-of-files", "", "Defines the location of text file which has the list of only files to be copied.") + setPropCmd.PersistentFlags().StringVar(&raw.blockBlobTier, "block-blob-tier", "None", "Changes the access tier of the blobs to the given tier") + setPropCmd.PersistentFlags().StringVar(&raw.pageBlobTier, "page-blob-tier", "None", "Upload page blob to Azure Storage using this blob tier. (default 'None').") + setPropCmd.PersistentFlags().BoolVar(&raw.recursive, "recursive", false, "Look into sub-directories recursively when uploading from local file system.") + setPropCmd.PersistentFlags().StringVar(&raw.rehydratePriority, "rehydrate-priority", "Standard", "Optional flag that sets rehydrate priority for rehydration. Valid values: Standard, High. Default- standard") + setPropCmd.PersistentFlags().BoolVar(&raw.dryrun, "dry-run", false, "Prints the file paths that would be affected by this command. This flag does not affect the actual files.") + setPropCmd.PersistentFlags().StringVar(&raw.blobTags, "blob-tags", "", "Set tags on blobs to categorize data in your storage account (separated by '&')") +} diff --git a/cmd/setPropertiesEnumerator.go b/cmd/setPropertiesEnumerator.go new file mode 100644 index 000000000..6656e1637 --- /dev/null +++ b/cmd/setPropertiesEnumerator.go @@ -0,0 +1,105 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "context" + "errors" + "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "github.com/Azure/azure-storage-azcopy/v10/ste" + "strings" +) + +// provides an enumerator that lists a given resource and schedules setProperties on them + +func setPropertiesEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, err error) { + var sourceTraverser ResourceTraverser + + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + + srcCredInfo := common.CredentialInfo{} + + if srcCredInfo, _, err = GetCredentialInfoForLocation(ctx, cca.FromTo.From(), cca.Source.Value, cca.Source.SAS, true, cca.CpkOptions); err != nil { + return nil, err + } + if cca.FromTo == common.EFromTo.FileNone() && (srcCredInfo.CredentialType == common.ECredentialType.Anonymous() && cca.Source.SAS == "") { + return nil, errors.New("a SAS token (or S3 access key) is required as a part of the input for set-properties on File Storage") + } + + // Include-path is handled by ListOfFilesChannel. + sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, + nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, + cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, + azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + + // report failure to create traverser + if err != nil { + return nil, err + } + + includeFilters := buildIncludeFilters(cca.IncludePatterns) + excludeFilters := buildExcludeFilters(cca.ExcludePatterns, false) + excludePathFilters := buildExcludeFilters(cca.ExcludePathPatterns, true) + includeSoftDelete := buildIncludeSoftDeleted(cca.permanentDeleteOption) + + // set up the filters in the right order + filters := append(includeFilters, excludeFilters...) + filters = append(filters, excludePathFilters...) + filters = append(filters, includeSoftDelete...) + + fpo, message := newFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, cca.isHNStoHNS, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) + // do not print Info message if in dry run mode + if !cca.dryrunMode { + glcm.Info(message) + } + if jobsAdmin.JobsAdmin != nil { + jobsAdmin.JobsAdmin.LogToJobLog(message, pipeline.LogInfo) + } + + transferScheduler := setPropertiesTransferProcessor(cca, NumOfFilesPerDispatchJobPart, fpo) + + finalize := func() error { + jobInitiated, err := transferScheduler.dispatchFinalPart() + if err != nil { + if cca.dryrunMode { + return nil + } else if err == NothingScheduledError { + // No log file needed. Logging begins as a part of awaiting job completion. + return NothingToRemoveError + } + + return err + } + + if !jobInitiated { + if cca.isCleanupJob { + glcm.Error("Cleanup completed (nothing needed to be deleted)") + } else { + glcm.Error("Nothing to delete. Please verify that recursive flag is set properly if targeting a directory.") + } + } + + return nil + } + return NewCopyEnumerator(sourceTraverser, filters, transferScheduler.scheduleCopyTransfer, finalize), nil +} diff --git a/cmd/setPropertiesProcessor.go b/cmd/setPropertiesProcessor.go new file mode 100644 index 000000000..721d1b1a6 --- /dev/null +++ b/cmd/setPropertiesProcessor.go @@ -0,0 +1,60 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "github.com/Azure/azure-storage-azcopy/v10/common" +) + +func setPropertiesTransferProcessor(cca *CookedCopyCmdArgs, numOfTransfersPerPart int, fpo common.FolderPropertyOption) *copyTransferProcessor { + copyJobTemplate := &common.CopyJobPartOrderRequest{ + JobID: cca.jobID, + CommandString: cca.commandString, + FromTo: cca.FromTo, + Fpo: fpo, + SourceRoot: cca.Source.CloneWithConsolidatedSeparators(), + CredentialInfo: cca.credentialInfo, + ForceIfReadOnly: cca.ForceIfReadOnly, + + // flags + LogLevel: azcopyLogVerbosity, + BlobAttributes: common.BlobTransferAttributes{ + BlockBlobTier: cca.blockBlobTier, + PageBlobTier: cca.pageBlobTier, + Metadata: cca.metadata, + BlobTagsString: cca.blobTags.ToString(), + RehydratePriority: cca.rehydratePriority, + }, + SetPropertiesFlags: cca.propertiesToTransfer, + } + + reportFirstPart := func(jobStarted bool) { + if jobStarted { + cca.waitUntilJobCompletion(false) + } + } + reportFinalPart := func() { cca.isEnumerationComplete = true } + + // note that the source and destination, along with the template are given to the generic processor's constructor + // this means that given an object with a relative path, this processor already knows how to schedule the right kind of transfers + return newCopyTransferProcessor(copyJobTemplate, numOfTransfersPerPart, cca.Source, cca.Destination, + reportFirstPart, reportFinalPart, false, cca.dryrunMode) +} diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index c2f91c52c..dad37376a 100644 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -77,6 +77,7 @@ type StoredObject struct { DstContainerName string // access tier, only included by blob traverser. blobAccessTier azblob.AccessTierType + archiveStatus azblob.ArchiveStatusType // metadata, included in S2S transfers Metadata common.Metadata blobVersionID string @@ -214,6 +215,7 @@ type blobPropsProvider interface { LeaseStatus() azblob.LeaseStatusType LeaseDuration() azblob.LeaseDurationType LeaseState() azblob.LeaseStateType + ArchiveStatus() azblob.ArchiveStatusType } // a constructor is used so that in case the StoredObject has to change, the callers would get a compilation error @@ -233,6 +235,7 @@ func newStoredObject(morpher objectMorpher, name string, relativePath string, en md5: props.ContentMD5(), blobType: blobProps.BlobType(), blobAccessTier: blobProps.AccessTier(), + archiveStatus: blobProps.ArchiveStatus(), Metadata: meta, ContainerName: containerName, // Additional lease properties. To be used in listing diff --git a/cmd/zc_newobjectadapters.go b/cmd/zc_newobjectadapters.go index c064254c4..b3dfb13a1 100644 --- a/cmd/zc_newobjectadapters.go +++ b/cmd/zc_newobjectadapters.go @@ -65,6 +65,10 @@ func (e emptyPropertiesAdapter) AccessTier() azblob.AccessTierType { return azblob.AccessTierNone } +func (e emptyPropertiesAdapter) ArchiveStatus() azblob.ArchiveStatusType { + return azblob.ArchiveStatusNone +} + func (e emptyPropertiesAdapter) LeaseDuration() azblob.LeaseDurationType { return azblob.LeaseDurationNone } @@ -97,6 +101,10 @@ func (a blobPropertiesResponseAdapter) AccessTier() azblob.AccessTierType { return azblob.AccessTierType(a.BlobGetPropertiesResponse.AccessTier()) } +func (a blobPropertiesResponseAdapter) ArchiveStatus() azblob.ArchiveStatusType { + return azblob.ArchiveStatusType(a.BlobGetPropertiesResponse.ArchiveStatus()) +} + // blobPropertiesAdapter adapts a BlobProperties object to both the // contentPropsProvider and blobPropsProvider interfaces type blobPropertiesAdapter struct { @@ -149,3 +157,7 @@ func (a blobPropertiesAdapter) LeaseState() azblob.LeaseStateType { func (a blobPropertiesAdapter) LeaseStatus() azblob.LeaseStatusType { return a.BlobProperties.LeaseStatus } + +func (a blobPropertiesAdapter) ArchiveStatus() azblob.ArchiveStatusType { + return a.BlobProperties.ArchiveStatus +} diff --git a/cmd/zc_processor.go b/cmd/zc_processor.go index 8853b299f..2d4720fe6 100644 --- a/cmd/zc_processor.go +++ b/cmd/zc_processor.go @@ -24,6 +24,7 @@ import ( "encoding/json" "fmt" "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "net/url" "runtime" "strings" @@ -80,6 +81,22 @@ func (s *copyTransferProcessor) scheduleCopyTransfer(storedObject StoredObject) s.folderPropertiesOption, ) + if s.copyJobTemplate.FromTo.To() == common.ELocation.None() { + copyTransfer.BlobTier = s.copyJobTemplate.BlobAttributes.BlockBlobTier.ToAccessTierType() + + metadataString := s.copyJobTemplate.BlobAttributes.Metadata + metadataMap := common.Metadata{} + if len(metadataString) > 0 { + for _, keyAndValue := range strings.Split(metadataString, ";") { // key/value pairs are separated by ';' + kv := strings.Split(keyAndValue, "=") // key/value are separated by '=' + metadataMap[kv[0]] = kv[1] + } + } + copyTransfer.Metadata = metadataMap + + copyTransfer.BlobTags = common.ToCommonBlobTagsMap(s.copyJobTemplate.BlobAttributes.BlobTagsString) + } + if !shouldSendToSte { return nil // skip this one } @@ -91,39 +108,49 @@ func (s *copyTransferProcessor) scheduleCopyTransfer(storedObject StoredObject) common.PanicIfErr(err) return string(jsonOutput) } else { + prettySrcRelativePath, err := url.QueryUnescape(srcRelativePath) + common.PanicIfErr(err) + prettyDstRelativePath, err := url.QueryUnescape(dstRelativePath) + common.PanicIfErr(err) + // if remove then To() will equal to common.ELocation.Unknown() if s.copyJobTemplate.FromTo.To() == common.ELocation.Unknown() { //remove return fmt.Sprintf("DRYRUN: remove %v/%v", s.copyJobTemplate.SourceRoot.Value, - srcRelativePath) + prettySrcRelativePath) + } + if s.copyJobTemplate.FromTo.To() == common.ELocation.None() { //set-properties + return fmt.Sprintf("DRYRUN: set-properties %v/%v", + s.copyJobTemplate.SourceRoot.Value, + prettySrcRelativePath) } else { //copy for sync if s.copyJobTemplate.FromTo.From() == common.ELocation.Local() { // formatting from local source dryrunValue := fmt.Sprintf("DRYRUN: copy %v", common.ToShortPath(s.copyJobTemplate.SourceRoot.Value)) if runtime.GOOS == "windows" { - dryrunValue += "\\" + strings.ReplaceAll(srcRelativePath, "/", "\\") + dryrunValue += "\\" + strings.ReplaceAll(prettySrcRelativePath, "/", "\\") } else { //linux and mac - dryrunValue += "/" + srcRelativePath + dryrunValue += "/" + prettySrcRelativePath } - dryrunValue += fmt.Sprintf(" to %v/%v", strings.Trim(s.copyJobTemplate.DestinationRoot.Value, "/"), dstRelativePath) + dryrunValue += fmt.Sprintf(" to %v/%v", strings.Trim(s.copyJobTemplate.DestinationRoot.Value, "/"), prettyDstRelativePath) return dryrunValue } else if s.copyJobTemplate.FromTo.To() == common.ELocation.Local() { // formatting to local source dryrunValue := fmt.Sprintf("DRYRUN: copy %v/%v to %v", - strings.Trim(s.copyJobTemplate.SourceRoot.Value, "/"), srcRelativePath, + strings.Trim(s.copyJobTemplate.SourceRoot.Value, "/"), prettySrcRelativePath, common.ToShortPath(s.copyJobTemplate.DestinationRoot.Value)) if runtime.GOOS == "windows" { - dryrunValue += "\\" + strings.ReplaceAll(dstRelativePath, "/", "\\") + dryrunValue += "\\" + strings.ReplaceAll(prettyDstRelativePath, "/", "\\") } else { //linux and mac - dryrunValue += "/" + dstRelativePath + dryrunValue += "/" + prettyDstRelativePath } return dryrunValue } else { return fmt.Sprintf("DRYRUN: copy %v/%v to %v/%v", s.copyJobTemplate.SourceRoot.Value, - srcRelativePath, + prettySrcRelativePath, s.copyJobTemplate.DestinationRoot.Value, - dstRelativePath) + prettyDstRelativePath) } } } diff --git a/cmd/zc_traverser_blob.go b/cmd/zc_traverser_blob.go index ea8fc415a..ca98db05c 100644 --- a/cmd/zc_traverser_blob.go +++ b/cmd/zc_traverser_blob.go @@ -269,6 +269,7 @@ func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, container common.FromAzBlobMetadataToCommonMetadata(resp.NewMetadata()), containerName, ) + storedObject.archiveStatus = azblob.ArchiveStatusType(resp.ArchiveStatus()) if t.s2sPreserveSourceTags { var BlobTags *azblob.BlobTags diff --git a/cmd/zt_scenario_helpers_for_test.go b/cmd/zt_scenario_helpers_for_test.go index eb70d3b9b..3e66a9691 100644 --- a/cmd/zt_scenario_helpers_for_test.go +++ b/cmd/zt_scenario_helpers_for_test.go @@ -768,6 +768,9 @@ func runSyncAndVerify(c *chk.C, raw rawSyncCmdArgs, verifier func(err error)) { func runCopyAndVerify(c *chk.C, raw rawCopyCmdArgs, verifier func(err error)) { // the simulated user input should parse properly cooked, err := raw.cook() + if err == nil { + err = cooked.makeTransferEnum() + } if err != nil { verifier(err) return @@ -908,3 +911,55 @@ func getDefaultRemoveRawInput(src string) rawCopyCmdArgs { includeDirectoryStubs: true, } } + +func getDefaultSetPropertiesRawInput(src string, params transferParams) rawCopyCmdArgs { + fromTo := common.EFromTo.BlobNone() + srcURL, _ := url.Parse(src) + + srcLocationType := InferArgumentLocation(src) + switch srcLocationType { + case common.ELocation.Blob(): + fromTo = common.EFromTo.BlobNone() + case common.ELocation.BlobFS(): + fromTo = common.EFromTo.BlobFSNone() + case common.ELocation.File(): + fromTo = common.EFromTo.FileNone() + default: + panic(fmt.Sprintf("invalid source type %s to delete. azcopy support removing blobs/files/adls gen2", srcLocationType.String())) + + } + + if strings.Contains(srcURL.Host, "file") { + fromTo = common.EFromTo.FileNone() + } else if strings.Contains(srcURL.Host, "dfs") { + fromTo = common.EFromTo.BlobFSNone() + } + + rawArgs := rawCopyCmdArgs{ + src: src, + fromTo: fromTo.String(), + blobType: common.EBlobType.Detect().String(), + blockBlobTier: common.EBlockBlobTier.None().String(), + pageBlobTier: common.EPageBlobTier.None().String(), + md5ValidationOption: common.DefaultHashValidationOption.String(), + s2sInvalidMetadataHandleOption: defaultS2SInvalideMetadataHandleOption.String(), + forceWrite: common.EOverwriteOption.True().String(), + preserveOwner: common.PreserveOwnerDefault, + includeDirectoryStubs: true, + } + + if params.blockBlobTier != common.EBlockBlobTier.None() { + rawArgs.blockBlobTier = params.blockBlobTier.String() + } + if params.pageBlobTier != common.EPageBlobTier.None() { + rawArgs.pageBlobTier = params.pageBlobTier.String() + } + if params.metadata != "" { + rawArgs.metadata = params.metadata + } + if params.blobTags != nil { + rawArgs.blobTags = params.blobTags.ToString() + } + + return rawArgs +} diff --git a/cmd/zt_set_properties_test.go b/cmd/zt_set_properties_test.go new file mode 100644 index 000000000..37c944d6a --- /dev/null +++ b/cmd/zt_set_properties_test.go @@ -0,0 +1,1707 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-blob-go/azblob" + chk "gopkg.in/check.v1" + "net/url" + "strings" + "time" +) + +type transferParams struct { + blockBlobTier common.BlockBlobTier + pageBlobTier common.PageBlobTier + metadata string + blobTags common.BlobTags +} + +func (tp transferParams) getMetadata() common.Metadata { + metadataString := tp.metadata + + metadataMap, err := common.StringToMetadata(metadataString) + if err != nil { + panic("unable to form Metadata from string: " + err.Error()) + } + return metadataMap +} + +func (scenarioHelper) generateBlobsFromListWithAccessTier(c *chk.C, containerURL azblob.ContainerURL, blobList []string, data string, accessTier azblob.AccessTierType) { + for _, blobName := range blobList { + blob := containerURL.NewBlockBlobURL(blobName) + cResp, err := blob.Upload(ctx, strings.NewReader(data), azblob.BlobHTTPHeaders{}, + nil, azblob.BlobAccessConditions{}, accessTier, nil, azblob.ClientProvidedKeyOptions{}, azblob.ImmutabilityPolicyOptions{}) + c.Assert(err, chk.IsNil) + c.Assert(cResp.StatusCode(), chk.Equals, 201) + } + + // sleep a bit so that the blobs' lmts are guaranteed to be in the past + time.Sleep(time.Millisecond * 1050) +} + +func createNewBlockBlobWithAccessTier(c *chk.C, container azblob.ContainerURL, prefix string, accessTier azblob.AccessTierType) (blob azblob.BlockBlobURL, name string) { + blob, name = getBlockBlobURL(c, container, prefix) + + cResp, err := blob.Upload(ctx, strings.NewReader(blockBlobDefaultData), azblob.BlobHTTPHeaders{}, + nil, azblob.BlobAccessConditions{}, accessTier, nil, azblob.ClientProvidedKeyOptions{}, azblob.ImmutabilityPolicyOptions{}) + + c.Assert(err, chk.IsNil) + c.Assert(cResp.StatusCode(), chk.Equals, 201) + + return +} + +func (scenarioHelper) generateCommonRemoteScenarioForBlobWithAccessTier(c *chk.C, containerURL azblob.ContainerURL, prefix string, accessTier azblob.AccessTierType) (blobList []string) { + blobList = make([]string, 50) + + for i := 0; i < 10; i++ { + _, blobName1 := createNewBlockBlobWithAccessTier(c, containerURL, prefix+"top", accessTier) + _, blobName2 := createNewBlockBlobWithAccessTier(c, containerURL, prefix+"sub1/", accessTier) + _, blobName3 := createNewBlockBlobWithAccessTier(c, containerURL, prefix+"sub2/", accessTier) + _, blobName4 := createNewBlockBlobWithAccessTier(c, containerURL, prefix+"sub1/sub3/sub5/", accessTier) + _, blobName5 := createNewBlockBlobWithAccessTier(c, containerURL, prefix+specialNames[i], accessTier) + + blobList[5*i] = blobName1 + blobList[5*i+1] = blobName2 + blobList[5*i+2] = blobName3 + blobList[5*i+3] = blobName4 + blobList[5*i+4] = blobName5 + } + + // sleep a bit so that the blobs' lmts are guaranteed to be in the past + time.Sleep(time.Millisecond * 1050) + return +} + +func checkMapsEqual(c *chk.C, mapA map[string]string, mapB map[string]string) { + c.Assert(len(mapA), chk.Equals, len(mapB)) + for k, v := range mapA { + c.Assert(mapB[k], chk.Equals, v) + } +} + +func validateSetPropertiesTransfersAreScheduled(c *chk.C, isSrcEncoded bool, expectedTransfers []string, transferParams transferParams, mockedRPC interceptor) { + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(expectedTransfers)) + + // validate that the right transfers were sent + lookupMap := scenarioHelper{}.convertListToMap(expectedTransfers) + for _, transfer := range mockedRPC.transfers { + srcRelativeFilePath := transfer.Source + c.Assert(transfer.BlobTier, chk.Equals, transferParams.blockBlobTier.ToAccessTierType()) + checkMapsEqual(c, transfer.Metadata, transferParams.getMetadata()) + checkMapsEqual(c, transfer.BlobTags, transferParams.blobTags) + + if isSrcEncoded { + srcRelativeFilePath, _ = url.PathUnescape(srcRelativeFilePath) + } + + // look up the source from the expected transfers, make sure it exists + _, srcExist := lookupMap[srcRelativeFilePath] + c.Assert(srcExist, chk.Equals, true) + + delete(lookupMap, srcRelativeFilePath) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobForBlobTier(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + + // upload the data with given accessTier + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{"abc": "fgd"}, + } + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerForBlobTier(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + //TODO: I don't think we need to change ^ this function from remove, do we? + }) + + // turn off recursive, this time only top blobs should be changed + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +// TODO: func (s *cmdIntegrationSuite) TestRemoveBlobsUnderVirtualDir(c *chk.C) + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeFlagForBlobTier(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithExcludeFlagForBlobTier(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to exclude + blobsToExclude := []string{"notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.exclude = excludeString + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobList, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeAndExcludeFlagForBlobTier(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.exclude = excludeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +// note: list-of-files flag is used +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsAndVirtualDirsForBlobTier(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + blobListPart2 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + blobList := append(blobListPart1, blobListPart2...) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + source, err := url.PathUnescape(transfer.Source) + c.Assert(err, chk.IsNil) + + // if the transfer is under the given dir, make sure only the top level files were scheduled + if strings.HasPrefix(source, vdirName) { + trimmedSource := strings.TrimPrefix(source, vdirName+"/") + c.Assert(strings.Contains(trimmedSource, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsWithIncludeAndExcludeForBlobTier(c *chk.C) { + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + + raw.recursive = true + raw.include = includeString + raw.exclude = excludeString + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + + // add files to both include and exclude + listOfFiles = append(listOfFiles, blobsToInclude...) + listOfFiles = append(listOfFiles, blobsToExclude...) + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobWithFromToForBlobTier(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerWithFromToForBlobTier(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderVirtualDirWithFromToForBlobTier(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "vdir1/vdir2/vdir3/" + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName, azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawVirtualDirectoryURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, vdirName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.Cool(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawVirtualDirectoryURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + expectedTransfers := scenarioHelper{}.shaveOffPrefix(blobList, vdirName) + validateSetPropertiesTransfersAreScheduled(c, true, expectedTransfers, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +///////////////////////////////// METADATA ///////////////////////////////// + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobForMetadata(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + + // upload the data with given accessTier + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobForEmptyMetadata(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + + // upload the data with given accessTier + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerForMetadata(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be changed + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeFlagForMetadata(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithExcludeFlagForMetadata(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to exclude + blobsToExclude := []string{"notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.exclude = excludeString + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobList, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeAndExcludeFlagForMetadata(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.exclude = excludeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +// note: list-of-files flag is used +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsAndVirtualDirsForMetadata(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + blobListPart2 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + blobList := append(blobListPart1, blobListPart2...) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + source, err := url.PathUnescape(transfer.Source) + c.Assert(err, chk.IsNil) + + // if the transfer is under the given dir, make sure only the top level files were scheduled + if strings.HasPrefix(source, vdirName) { + trimmedSource := strings.TrimPrefix(source, vdirName+"/") + c.Assert(strings.Contains(trimmedSource, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsWithIncludeAndExcludeForMetadata(c *chk.C) { + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + + raw.recursive = true + raw.include = includeString + raw.exclude = excludeString + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + + // add files to both include and exclude + listOfFiles = append(listOfFiles, blobsToInclude...) + listOfFiles = append(listOfFiles, blobsToExclude...) + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobWithFromToForMetadata(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerWithFromToForMetadata(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderVirtualDirWithFromToForMetadata(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "vdir1/vdir2/vdir3/" + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName, azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawVirtualDirectoryURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, vdirName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "abc=def;metadata=value", + blobTags: common.BlobTags{}, + } + + raw := getDefaultSetPropertiesRawInput(rawVirtualDirectoryURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + expectedTransfers := scenarioHelper{}.shaveOffPrefix(blobList, vdirName) + validateSetPropertiesTransfersAreScheduled(c, true, expectedTransfers, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +///////////////////////////////// TAGS ///////////////////////////////// + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobForBlobTags(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + + // upload the data with given accessTier + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobForEmptyBlobTags(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + + // upload the data with given accessTier + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{}, + } + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerForBlobTags(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be changed + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeFlagForBlobTags(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithExcludeFlagForBlobTags(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to exclude + blobsToExclude := []string{"notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "*.pdf;*.jpeg;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.exclude = excludeString + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobList, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesWithIncludeAndExcludeFlagForBlobTags(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.include = includeString + raw.exclude = excludeString + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobsToInclude, mockedRPC) + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +// note: list-of-files flag is used +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsAndVirtualDirsForBlobTags(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + blobListPart2 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + blobList := append(blobListPart1, blobListPart2...) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.recursive = true + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + source, err := url.PathUnescape(transfer.Source) + c.Assert(err, chk.IsNil) + + // if the transfer is under the given dir, make sure only the top level files were scheduled + if strings.HasPrefix(source, vdirName) { + trimmedSource := strings.TrimPrefix(source, vdirName+"/") + c.Assert(strings.Contains(trimmedSource, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesListOfBlobsWithIncludeAndExcludeForBlobTags(c *chk.C) { + bsu := getBSU() + vdirName := "megadir" + + // set up the container with numerous blobs and a vdir + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + blobListPart1 := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName+"/", azblob.AccessTierHot) + + // add special blobs that we wish to include + blobsToInclude := []string{"important.pdf", "includeSub/amazing.jpeg"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToInclude, blockBlobDefaultData, azblob.AccessTierHot) + + includeString := "*.pdf;*.jpeg;exactName" + + // add special blobs that we wish to exclude + // note that the excluded files also match the include string + blobsToExclude := []string{"sorry.pdf", "exclude/notGood.jpeg", "exactName", "sub/exactName"} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobsToExclude, blockBlobDefaultData, azblob.AccessTierHot) + excludeString := "so*;not*;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + + raw.recursive = true + raw.include = includeString + raw.exclude = excludeString + + // make the input for list-of-files + listOfFiles := append(blobListPart1, vdirName) + + // add some random files that don't actually exist + listOfFiles = append(listOfFiles, "WUTAMIDOING") + listOfFiles = append(listOfFiles, "DONTKNOW") + + // add files to both include and exclude + listOfFiles = append(listOfFiles, blobsToInclude...) + listOfFiles = append(listOfFiles, blobsToExclude...) + raw.listOfFilesToCopy = scenarioHelper{}.generateListOfFiles(c, listOfFiles) + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobsToInclude)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobsToInclude, transferParams, mockedRPC) + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesSingleBlobWithFromToForBlobTags(c *chk.C) { + bsu := getBSU() + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + for _, blobName := range []string{"top/mid/low/singleblobisbest", "打麻将.txt", "%4509%4254$85140&"} { + // set up the container with a single blob + blobList := []string{blobName} + scenarioHelper{}.generateBlobsFromListWithAccessTier(c, containerURL, blobList, blockBlobDefaultData, azblob.AccessTierHot) + c.Assert(containerURL, chk.NotNil) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, blobList[0]) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawBlobURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // note that when we are targeting single blobs, the relative path is empty ("") since the root path already points to the blob + validateSetPropertiesTransfersAreScheduled(c, true, []string{""}, transferParams, mockedRPC) + }) + } +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderContainerWithFromToForBlobTags(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, "", azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawContainerURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + raw.includeDirectoryStubs = false // The test target is a DFS account, which coincidentally created our directory stubs. Thus, we mustn't include them, since this is a test of blob. + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + validateSetPropertiesTransfersAreScheduled(c, true, blobList, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} + +func (s *cmdIntegrationSuite) TestSetPropertiesBlobsUnderVirtualDirWithFromToForBlobTags(c *chk.C) { + c.Skip("Enable after setting Account to non-HNS") + bsu := getBSU() + vdirName := "vdir1/vdir2/vdir3/" + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlobWithAccessTier(c, containerURL, vdirName, azblob.AccessTierHot) + + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawVirtualDirectoryURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, vdirName) + transferParams := transferParams{ + blockBlobTier: common.EBlockBlobTier.None(), + pageBlobTier: common.EPageBlobTier.None(), + metadata: "", + blobTags: common.BlobTags{"abc": "fgd"}, + } + + raw := getDefaultSetPropertiesRawInput(rawVirtualDirectoryURLWithSAS.String(), transferParams) + raw.fromTo = "BlobNone" + raw.recursive = true + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + + // validate that the right number of transfers were scheduled + c.Assert(len(mockedRPC.transfers), chk.Equals, len(blobList)) + + // validate that the right transfers were sent + expectedTransfers := scenarioHelper{}.shaveOffPrefix(blobList, vdirName) + validateSetPropertiesTransfersAreScheduled(c, true, expectedTransfers, transferParams, mockedRPC) + }) + + // turn off recursive, this time only top blobs should be deleted + raw.recursive = false + mockedRPC.reset() + + runCopyAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + c.Assert(len(mockedRPC.transfers), chk.Not(chk.Equals), len(blobList)) + + for _, transfer := range mockedRPC.transfers { + c.Assert(strings.Contains(transfer.Source, common.AZCOPY_PATH_SEPARATOR_STRING), chk.Equals, false) + } + }) +} diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 18124e828..d7b827d69 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -451,6 +451,7 @@ func (Location) BlobFS() Location { return Location(5) } func (Location) S3() Location { return Location(6) } func (Location) Benchmark() Location { return Location(7) } func (Location) GCP() Location { return Location(8) } +func (Location) None() Location { return Location(9) } // None is used in case we're transferring properties func (l Location) String() string { return enum.StringInt(l, reflect.TypeOf(l)) @@ -479,7 +480,7 @@ func (l Location) IsRemote() bool { switch l { case ELocation.BlobFS(), ELocation.Blob(), ELocation.File(), ELocation.S3(), ELocation.GCP(): return true - case ELocation.Local(), ELocation.Benchmark(), ELocation.Pipe(), ELocation.Unknown(): + case ELocation.Local(), ELocation.Benchmark(), ELocation.Pipe(), ELocation.Unknown(), ELocation.None(): return false default: panic("unexpected location, please specify if it is remote") @@ -500,7 +501,7 @@ func (l Location) IsFolderAware() bool { switch l { case ELocation.BlobFS(), ELocation.File(), ELocation.Local(): return true - case ELocation.Blob(), ELocation.S3(), ELocation.GCP(), ELocation.Benchmark(), ELocation.Pipe(), ELocation.Unknown(): + case ELocation.Blob(), ELocation.S3(), ELocation.GCP(), ELocation.Benchmark(), ELocation.Pipe(), ELocation.Unknown(), ELocation.None(): return false default: panic("unexpected location, please specify if it is folder-aware") @@ -538,6 +539,9 @@ func (FromTo) BlobFile() FromTo { return FromTo(fromToValue(ELocation.Blob(), func (FromTo) FileFile() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.File())) } func (FromTo) S3Blob() FromTo { return FromTo(fromToValue(ELocation.S3(), ELocation.Blob())) } func (FromTo) GCPBlob() FromTo { return FromTo(fromToValue(ELocation.GCP(), ELocation.Blob())) } +func (FromTo) BlobNone() FromTo { return fromToValue(ELocation.Blob(), ELocation.None()) } +func (FromTo) BlobFSNone() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.None()) } +func (FromTo) FileNone() FromTo { return fromToValue(ELocation.File(), ELocation.None()) } // todo: to we really want these? Starts to look like a bit of a combinatorial explosion func (FromTo) BenchmarkBlob() FromTo { @@ -598,6 +602,10 @@ func (ft *FromTo) AreBothFolderAware() bool { return ft.From().IsFolderAware() && ft.To().IsFolderAware() } +func (ft *FromTo) IsPropertyOnlyTransfer() bool { + return *ft == EFromTo.BlobNone() || *ft == EFromTo.BlobFSNone() || *ft == EFromTo.FileNone() +} + // TODO: deletes are not covered by the above Is* routines var BenchmarkLmt = time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC) @@ -1002,6 +1010,8 @@ type CopyTransfer struct { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Metadata used in AzCopy. +const MetadataAndBlobTagsClearFlag = "clear" // clear flag used for metadata and tags + type Metadata map[string]string func (m Metadata) Clone() Metadata { @@ -1057,6 +1067,21 @@ func UnMarshalToCommonMetadata(metadataString string) (Metadata, error) { return result, nil } +func StringToMetadata(metadataString string) (Metadata, error) { + metadataMap := Metadata{} + if len(metadataString) > 0 { + for _, keyAndValue := range strings.Split(metadataString, ";") { // key/value pairs are separated by ';' + kv := strings.Split(keyAndValue, "=") // key/value are separated by '=' + // what if '=' not present? + if len(kv) != 2 { + return metadataMap, fmt.Errorf("invalid metadata string passed") + } + metadataMap[kv[0]] = kv[1] + } + } + return metadataMap, nil +} + // isValidMetadataKey checks if the given string is a valid metadata key for Azure. // For Azure, metadata key must adhere to the naming rules for C# identifiers. // As testing, reserved keywords for C# identifiers are also valid metadata key. (e.g. this, int) @@ -1130,9 +1155,12 @@ func (bt BlobTags) ToString() string { } func ToCommonBlobTagsMap(blobTagsString string) BlobTags { - if blobTagsString == "" { + if blobTagsString == "" { // default empty value set by coder return nil } + if strings.EqualFold(blobTagsString, MetadataAndBlobTagsClearFlag) { // "clear" value given by user as input (to signify clearing of tags in set-props cmd) + return BlobTags{} + } blobTagsMap := BlobTags{} for _, keyAndValue := range strings.Split(blobTagsString, "&") { // key/value pairs are separated by '&' @@ -1527,3 +1555,57 @@ func GetClientProvidedKey(options CpkOptions) azblob.ClientProvidedKeyOptions { _cpkScopeInfo := GetCpkScopeInfo(options.CpkScopeInfo) return ToClientProvidedKeyOptions(_cpkInfo, _cpkScopeInfo) } + +//////////////////////////////////////////////////////////////////////////////// +type SetPropertiesFlags uint32 // [0000000000...32 times] + +var ESetPropertiesFlags = SetPropertiesFlags(0) + +// functions to set values +func (SetPropertiesFlags) None() SetPropertiesFlags { return SetPropertiesFlags(0) } +func (SetPropertiesFlags) SetTier() SetPropertiesFlags { return SetPropertiesFlags(1) } +func (SetPropertiesFlags) SetMetadata() SetPropertiesFlags { return SetPropertiesFlags(2) } +func (SetPropertiesFlags) SetBlobTags() SetPropertiesFlags { return SetPropertiesFlags(4) } + +// functions to get values (to be used in sde) +// If Y is inside X then X & Y == Y +func (op *SetPropertiesFlags) ShouldTransferTier() bool { + return (*op)&ESetPropertiesFlags.SetTier() == ESetPropertiesFlags.SetTier() +} +func (op *SetPropertiesFlags) ShouldTransferMetaData() bool { + return (*op)&ESetPropertiesFlags.SetMetadata() == ESetPropertiesFlags.SetMetadata() +} +func (op *SetPropertiesFlags) ShouldTransferBlobTags() bool { + return (*op)&ESetPropertiesFlags.SetBlobTags() == ESetPropertiesFlags.SetBlobTags() +} + +//////////////////////////////////////////////////////////////////////////////// +type RehydratePriorityType uint8 + +var ERehydratePriorityType = RehydratePriorityType(0) // setting default as none + +func (RehydratePriorityType) None() RehydratePriorityType { return RehydratePriorityType(0) } +func (RehydratePriorityType) Standard() RehydratePriorityType { return RehydratePriorityType(1) } +func (RehydratePriorityType) High() RehydratePriorityType { return RehydratePriorityType(2) } + +func (rpt *RehydratePriorityType) Parse(s string) error { + val, err := enum.ParseInt(reflect.TypeOf(rpt), s, true, true) + if err == nil { + *rpt = val.(RehydratePriorityType) + } + return err +} +func (rpt RehydratePriorityType) String() string { + return enum.StringInt(rpt, reflect.TypeOf(rpt)) +} + +func (rpt RehydratePriorityType) ToRehydratePriorityType() azblob.RehydratePriorityType { + switch rpt { + case ERehydratePriorityType.None(), ERehydratePriorityType.Standard(): + return azblob.RehydratePriorityStandard + case ERehydratePriorityType.High(): + return azblob.RehydratePriorityHigh + default: + return azblob.RehydratePriorityStandard + } +} diff --git a/common/rpc-models.go b/common/rpc-models.go index 588881aed..accb27e48 100644 --- a/common/rpc-models.go +++ b/common/rpc-models.go @@ -149,6 +149,7 @@ type CopyJobPartOrderRequest struct { S2SInvalidMetadataHandleOption InvalidMetadataHandleOption S2SPreserveBlobTags bool CpkOptions CpkOptions + SetPropertiesFlags SetPropertiesFlags // S2SSourceCredentialType will override CredentialInfo.CredentialType for use on the source. // As a result, CredentialInfo.OAuthTokenInfo may end up being fulfilled even _if_ CredentialInfo.CredentialType is _not_ OAuth. @@ -219,6 +220,7 @@ type BlobTransferAttributes struct { DeleteSnapshotsOption DeleteSnapshotsOption // when deleting, specify what to do with the snapshots BlobTagsString string // when user explicitly provides blob tags PermanentDeleteOption PermanentDeleteOption // Permanently deletes soft-deleted snapshots when indicated by user + RehydratePriority RehydratePriorityType // rehydrate priority of blob } type JobIDDetails struct { diff --git a/ste/JobPartPlan.go b/ste/JobPartPlan.go index acefdaf8f..bf9bbd975 100644 --- a/ste/JobPartPlan.go +++ b/ste/JobPartPlan.go @@ -89,6 +89,8 @@ type JobPartPlanHeader struct { // Determine what to do with soft-deleted snapshots PermanentDeleteOption common.PermanentDeleteOption + + RehydratePriority common.RehydratePriorityType } // Status returns the job status stored in JobPartPlanHeader in thread-safe manner @@ -325,6 +327,8 @@ type JobPartPlanDstBlob struct { // Specifies the maximum size of block which determines the number of chunks and chunk size of a transfer BlockSize int64 + + SetPropertiesFlags common.SetPropertiesFlags } // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/ste/JobPartPlanFileName.go b/ste/JobPartPlanFileName.go index 5a04fb25c..13ac8cfe7 100644 --- a/ste/JobPartPlanFileName.go +++ b/ste/JobPartPlanFileName.go @@ -201,6 +201,7 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { CpkInfo: order.CpkOptions.CpkInfo, CpkScopeInfoLength: uint16(len(order.CpkOptions.CpkScopeInfo)), IsSourceEncrypted: order.CpkOptions.IsSourceEncrypted, + SetPropertiesFlags: order.SetPropertiesFlags, }, DstLocalData: JobPartPlanDstLocal{ PreserveLastModifiedTime: order.BlobAttributes.PreserveLastModifiedTime, @@ -217,6 +218,7 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { atomicJobStatus: common.EJobStatus.InProgress(), // We default to InProgress DeleteSnapshotsOption: order.BlobAttributes.DeleteSnapshotsOption, PermanentDeleteOption: order.BlobAttributes.PermanentDeleteOption, + RehydratePriority: order.BlobAttributes.RehydratePriority, } // Copy any strings into their respective fields diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index ceda812c7..44cbf7b10 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -62,6 +62,7 @@ type IJobPartMgr interface { IsSourceEncrypted() bool /* Status Manager Updates */ SendXferDoneMsg(msg xferDoneMsg) + PropertiesToTransfer() common.SetPropertiesFlags } type serviceAPIVersionOverride struct{} @@ -314,6 +315,10 @@ type jobPartMgr struct { cpkOptions common.CpkOptions closeOnCompletion chan struct{} + + SetPropertiesFlags common.SetPropertiesFlags + + RehydratePriority common.RehydratePriorityType } func (jpm *jobPartMgr) getOverwritePrompter() *overwritePrompter { @@ -392,6 +397,9 @@ func (jpm *jobPartMgr) ScheduleTransfers(jobCtx context.Context) { IsSourceEncrypted: dstData.IsSourceEncrypted, } + jpm.SetPropertiesFlags = dstData.SetPropertiesFlags + jpm.RehydratePriority = plan.RehydratePriority + jpm.preserveLastModifiedTime = plan.DstLocalData.PreserveLastModifiedTime jpm.blobTypeOverride = plan.DstBlobData.BlobType @@ -609,7 +617,7 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context) { // Create pipeline for data transfer. switch fromTo { case common.EFromTo.BlobTrash(), common.EFromTo.BlobLocal(), common.EFromTo.LocalBlob(), common.EFromTo.BenchmarkBlob(), - common.EFromTo.BlobBlob(), common.EFromTo.FileBlob(), common.EFromTo.S3Blob(), common.EFromTo.GCPBlob(): + common.EFromTo.BlobBlob(), common.EFromTo.FileBlob(), common.EFromTo.S3Blob(), common.EFromTo.GCPBlob(), common.EFromTo.BlobNone(), common.EFromTo.BlobFSNone(): credential := common.CreateBlobCredential(ctx, credInfo, credOption) jpm.Log(pipeline.LogInfo, fmt.Sprintf("JobID=%v, credential type: %v", jpm.Plan().JobID, credInfo.CredentialType)) jpm.pipeline = NewBlobPipeline( @@ -660,7 +668,7 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context) { jpm.jobMgr.PipelineNetworkStats()) // Create pipeline for Azure File. case common.EFromTo.FileTrash(), common.EFromTo.FileLocal(), common.EFromTo.LocalFile(), common.EFromTo.BenchmarkFile(), - common.EFromTo.FileFile(), common.EFromTo.BlobFile(): + common.EFromTo.FileFile(), common.EFromTo.BlobFile(), common.EFromTo.FileNone(): jpm.pipeline = NewFilePipeline( azfile.NewAnonymousCredential(), azfile.PipelineOptions{ @@ -795,6 +803,10 @@ func (jpm *jobPartMgr) IsSourceEncrypted() bool { return jpm.cpkOptions.IsSourceEncrypted } +func (jpm *jobPartMgr) PropertiesToTransfer() common.SetPropertiesFlags { + return jpm.SetPropertiesFlags +} + func (jpm *jobPartMgr) ShouldPutMd5() bool { return jpm.putMd5 } diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index e7ddea7cf..fa9f8ba8f 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -92,6 +92,8 @@ type IJobPartTransferMgr interface { CpkScopeInfo() common.CpkScopeInfo IsSourceEncrypted() bool GetS2SSourceBlobTokenCredential() azblob.TokenCredential + PropertiesToTransfer() common.SetPropertiesFlags + ResetSourceSize() // sets source size to 0 (made to be used by setProperties command to make number of bytes transferred = 0) } type TransferInfo struct { @@ -118,7 +120,8 @@ type TransferInfo struct { // NumChunks is the number of chunks in which transfer will be split into while uploading the transfer. // NumChunks is not used in case of AppendBlob transfer. - NumChunks uint16 + NumChunks uint16 + RehydratePriority azblob.RehydratePriorityType } func (i TransferInfo) IsFolderPropertiesTransfer() bool { @@ -346,10 +349,10 @@ func (jptm *jobPartTransferMgr) Info() TransferInfo { * we can have 4 blocks in core, waiting for a disk or n/w operation. Any higher block size would *sort of* * serialize n/w and disk operations, and is better avoided. */ - if (sourceSize % common.MaxNumberOfBlocksPerBlob == 0) { - blockSize = sourceSize/common.MaxNumberOfBlocksPerBlob + if sourceSize%common.MaxNumberOfBlocksPerBlob == 0 { + blockSize = sourceSize / common.MaxNumberOfBlocksPerBlob } else { - blockSize = sourceSize/common.MaxNumberOfBlocksPerBlob +1 + blockSize = sourceSize/common.MaxNumberOfBlocksPerBlob + 1 } break } @@ -386,8 +389,9 @@ func (jptm *jobPartTransferMgr) Info() TransferInfo { SrcMetadata: srcMetadata, SrcBlobTags: srcBlobTags, }, - SrcBlobType: srcBlobType, - S2SSrcBlobTier: srcBlobTier, + SrcBlobType: srcBlobType, + S2SSrcBlobTier: srcBlobTier, + RehydratePriority: plan.RehydratePriority.ToRehydratePriorityType(), } return *jptm.transferInfo @@ -540,6 +544,14 @@ func (jptm *jobPartTransferMgr) IsSourceEncrypted() bool { return jptm.jobPartMgr.IsSourceEncrypted() } +func (jptm *jobPartTransferMgr) PropertiesToTransfer() common.SetPropertiesFlags { + return jptm.jobPartMgr.PropertiesToTransfer() +} + +func (jptm *jobPartTransferMgr) ResetSourceSize() { + jptm.transferInfo.SourceSize = 0 +} + // JobHasLowFileCount returns an estimate of whether we only have a very small number of files in the overall job // (An "estimate" because it actually only looks at the current job part) func (jptm *jobPartTransferMgr) JobHasLowFileCount() bool { diff --git a/ste/sender-blockBlob.go b/ste/sender-blockBlob.go index 06f43780f..3a0085b5f 100644 --- a/ste/sender-blockBlob.go +++ b/ste/sender-blockBlob.go @@ -195,7 +195,7 @@ func (s *blockBlobSenderBase) Epilogue() { jptm.Log(pipeline.LogDebug, fmt.Sprintf("Conclude Transfer with BlockList %s", blockIDs)) // commit the blocks. - if !ValidateTier(jptm, s.destBlobTier, s.destBlockBlobURL.BlobURL, s.jptm.Context()) { + if !ValidateTier(jptm, s.destBlobTier, s.destBlockBlobURL.BlobURL, s.jptm.Context(), false) { s.destBlobTier = azblob.DefaultAccessTier } diff --git a/ste/sender-blockBlobFromLocal.go b/ste/sender-blockBlobFromLocal.go index b2682b3ab..618146e5a 100644 --- a/ste/sender-blockBlobFromLocal.go +++ b/ste/sender-blockBlobFromLocal.go @@ -109,7 +109,7 @@ func (u *blockBlobUploader) generatePutWholeBlob(id common.ChunkID, blockIndex i // Upload the blob jptm.LogChunkStatus(id, common.EWaitReason.Body()) var err error - if !ValidateTier(jptm, u.destBlobTier, u.destBlockBlobURL.BlobURL, u.jptm.Context()) { + if !ValidateTier(jptm, u.destBlobTier, u.destBlockBlobURL.BlobURL, u.jptm.Context(), false) { u.destBlobTier = azblob.DefaultAccessTier } diff --git a/ste/sender-blockBlobFromURL.go b/ste/sender-blockBlobFromURL.go index f8e8500e1..4a872ff79 100644 --- a/ste/sender-blockBlobFromURL.go +++ b/ste/sender-blockBlobFromURL.go @@ -91,7 +91,7 @@ func (c *urlToBlockBlobCopier) generateCreateEmptyBlob(id common.ChunkID) chunkF jptm.LogChunkStatus(id, common.EWaitReason.S2SCopyOnWire()) // Create blob and finish. - if !ValidateTier(jptm, c.destBlobTier, c.destBlockBlobURL.BlobURL, c.jptm.Context()) { + if !ValidateTier(jptm, c.destBlobTier, c.destBlockBlobURL.BlobURL, c.jptm.Context(), false) { c.destBlobTier = azblob.DefaultAccessTier } @@ -154,7 +154,7 @@ func (c *urlToBlockBlobCopier) generateStartPutBlobFromURL(id common.ChunkID, bl c.jptm.LogChunkStatus(id, common.EWaitReason.S2SCopyOnWire()) // Create blob and finish. - if !ValidateTier(c.jptm, c.destBlobTier, c.destBlockBlobURL.BlobURL, c.jptm.Context()) { + if !ValidateTier(c.jptm, c.destBlobTier, c.destBlockBlobURL.BlobURL, c.jptm.Context(), false) { c.destBlobTier = azblob.DefaultAccessTier } diff --git a/ste/sender-pageBlob.go b/ste/sender-pageBlob.go index ab320c752..8b52e95bf 100644 --- a/ste/sender-pageBlob.go +++ b/ste/sender-pageBlob.go @@ -226,7 +226,7 @@ func (s *pageBlobSenderBase) Prologue(ps common.PrologueState) (destinationModif } destBlobTier := azblob.PremiumPageBlobAccessTierType(s.destBlobTier) - if !ValidateTier(s.jptm, s.destBlobTier, s.destPageBlobURL.BlobURL, s.jptm.Context()) { + if !ValidateTier(s.jptm, s.destBlobTier, s.destPageBlobURL.BlobURL, s.jptm.Context(), false) { destBlobTier = azblob.DefaultPremiumBlobAccessTier } diff --git a/ste/xfer-anyToRemote-file.go b/ste/xfer-anyToRemote-file.go index 02d72f654..673aa00cc 100644 --- a/ste/xfer-anyToRemote-file.go +++ b/ste/xfer-anyToRemote-file.go @@ -120,7 +120,7 @@ func BlobTierAllowed(destTier azblob.AccessTierType) bool { } } -func ValidateTier(jptm IJobPartTransferMgr, blobTier azblob.AccessTierType, blobURL azblob.BlobURL, ctx context.Context) (isValid bool) { +func ValidateTier(jptm IJobPartTransferMgr, blobTier azblob.AccessTierType, blobURL azblob.BlobURL, ctx context.Context, performQuietly bool) (isValid bool) { if jptm.IsLive() && blobTier != azblob.AccessTierNone { @@ -135,13 +135,13 @@ func ValidateTier(jptm IJobPartTransferMgr, blobTier azblob.AccessTierType, blob if tierAvailable { return true - } else { + } else if !performQuietly { tierNotAllowedFailure.Do(func() { glcm := common.GetLifecycleMgr() glcm.Info("Destination could not accommodate the tier " + string(blobTier) + ". Going ahead with the default tier. In case of service to service transfer, consider setting the flag --s2s-preserve-access-tier=false.") }) - return false } + return false } else { return false } diff --git a/ste/xfer-setProperties.go b/ste/xfer-setProperties.go new file mode 100644 index 000000000..a5d22451e --- /dev/null +++ b/ste/xfer-setProperties.go @@ -0,0 +1,214 @@ +package ste + +import ( + "fmt" + "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-blob-go/azblob" + "github.com/Azure/azure-storage-file-go/azfile" + "net/http" + "net/url" + "strings" +) + +func SetProperties(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer pacer) { + // If the transfer was cancelled, then reporting transfer as done and increasing the bytes transferred by the size of the source. + if jptm.WasCanceled() { + jptm.ReportTransferDone() + return + } + + // schedule the work as a chunk, so it will run on the main goroutine pool, instead of the + // smaller "transfer initiation pool", where this code runs. + id := common.NewChunkID(jptm.Info().Source, 0, 0) + cf := createChunkFunc(true, jptm, id, func() { + to := jptm.FromTo() + switch to.From() { + case common.ELocation.Blob(): + setPropertiesBlob(jptm, p) + case common.ELocation.BlobFS(): + setPropertiesBlobFS(jptm, p) + case common.ELocation.File(): + setPropertiesFile(jptm, p) + default: + panic("Attempting set-properties on invalid location: " + to.From().String()) + } + }) + jptm.ScheduleChunks(cf) +} + +func setPropertiesBlob(jptm IJobPartTransferMgr, p pipeline.Pipeline) { + info := jptm.Info() + // Get the source blob url of blob to set properties on + u, _ := url.Parse(info.Source) + srcBlobURL := azblob.NewBlobURL(*u, p) + + // Internal function which checks the transfer status and logs the msg respectively. + // Sets the transfer status and Reports Transfer as Done. + // Internal function is created to avoid redundancy of the above steps from several places in the api. + transferDone := func(status common.TransferStatus, err error) { + if status == common.ETransferStatus.Failed() { + jptm.LogError(info.Source, "SET-PROPERTIES FAILED with error: ", err) + } else { + jptm.Log(pipeline.LogInfo, fmt.Sprintf("SET-PROPERTIES SUCCESSFUL: %s", strings.Split(info.Destination, "?")[0])) + } + + jptm.SetStatus(status) + jptm.ResetSourceSize() // sets source size to 0 (made to be used by setProperties command to make number of bytes transferred = 0) + jptm.ReportTransferDone() + } + + PropertiesToTransfer := jptm.PropertiesToTransfer() + _, metadata, blobTags, _ := jptm.ResourceDstData(nil) + + if PropertiesToTransfer.ShouldTransferTier() { + rehydratePriority := info.RehydratePriority + blockBlobTier, pageBlobTier := jptm.BlobTiers() + + var err error = nil + if jptm.Info().SrcBlobType == azblob.BlobBlockBlob && blockBlobTier != common.EBlockBlobTier.None() && ValidateTier(jptm, blockBlobTier.ToAccessTierType(), srcBlobURL, jptm.Context(), true) { + _, err = srcBlobURL.SetTier(jptm.Context(), blockBlobTier.ToAccessTierType(), azblob.LeaseAccessConditions{}, rehydratePriority) + } + // cannot return true for >1, therefore only one of these will run + if jptm.Info().SrcBlobType == azblob.BlobPageBlob && pageBlobTier != common.EPageBlobTier.None() && ValidateTier(jptm, pageBlobTier.ToAccessTierType(), srcBlobURL, jptm.Context(), true) { + _, err = srcBlobURL.SetTier(jptm.Context(), pageBlobTier.ToAccessTierType(), azblob.LeaseAccessConditions{}, rehydratePriority) + } + + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + // don't mark it a success just yet, because more properties might need to be changed + } + + if PropertiesToTransfer.ShouldTransferMetaData() { + _, err := srcBlobURL.SetMetadata(jptm.Context(), metadata.ToAzBlobMetadata(), azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + //TODO the canonical thingi in this is changing key value to upper case. How to go around it? + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + } + if PropertiesToTransfer.ShouldTransferBlobTags() { + _, err := srcBlobURL.SetTags(jptm.Context(), nil, nil, nil, blobTags.ToAzBlobTagsMap()) + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + } + // marking it a successful flow, as no property has resulted in err != nil + transferDone(common.ETransferStatus.Success(), nil) +} + +func setPropertiesBlobFS(jptm IJobPartTransferMgr, p pipeline.Pipeline) { + info := jptm.Info() + // Get the source blob url of blob to delete + u, _ := url.Parse(info.Source) + srcBlobURL := azblob.NewBlobURL(*u, p) + + // Internal function which checks the transfer status and logs the msg respectively. + // Sets the transfer status and Report Transfer as Done. + // Internal function is created to avoid redundancy of the above steps from several places in the api. + transferDone := func(status common.TransferStatus, err error) { + if status == common.ETransferStatus.Failed() { + jptm.LogError(info.Source, "SET-PROPERTIES ERROR ", err) + } else { + jptm.Log(pipeline.LogInfo, fmt.Sprintf("SET-PROPERTIES SUCCESSFUL: %s", strings.Split(info.Destination, "?")[0])) + } + + jptm.SetStatus(status) + jptm.ResetSourceSize() // sets source size to 0 (made to be used by setProperties command to make number of bytes transferred = 0) + jptm.ReportTransferDone() + } + + PropertiesToTransfer := jptm.PropertiesToTransfer() + _, metadata, blobTags, _ := jptm.ResourceDstData(nil) + + if PropertiesToTransfer.ShouldTransferTier() { + rehydratePriority := info.RehydratePriority + _, pageBlobTier := jptm.BlobTiers() + var err error = nil + if ValidateTier(jptm, pageBlobTier.ToAccessTierType(), srcBlobURL, jptm.Context(), false) { + _, err = srcBlobURL.SetTier(jptm.Context(), pageBlobTier.ToAccessTierType(), azblob.LeaseAccessConditions{}, rehydratePriority) + } + + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + // don't mark it a success just yet, because more properties might need to be changed + } + + if PropertiesToTransfer.ShouldTransferMetaData() { + _, err := srcBlobURL.SetMetadata(jptm.Context(), metadata.ToAzBlobMetadata(), azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + } + if PropertiesToTransfer.ShouldTransferBlobTags() { + _, err := srcBlobURL.SetTags(jptm.Context(), nil, nil, nil, blobTags.ToAzBlobTagsMap()) + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + } + + // marking it a successful flow, as no property has resulted in err != nil + transferDone(common.ETransferStatus.Success(), nil) +} + +func setPropertiesFile(jptm IJobPartTransferMgr, p pipeline.Pipeline) { + info := jptm.Info() + u, _ := url.Parse(info.Source) + srcFileURL := azfile.NewFileURL(*u, p) + _ = srcFileURL + // Internal function which checks the transfer status and logs the msg respectively. + // Sets the transfer status and Report Transfer as Done. + // Internal function is created to avoid redundancy of the above steps from several places in the api. + transferDone := func(status common.TransferStatus, err error) { + if status == common.ETransferStatus.Failed() { + jptm.LogError(info.Source, "SET-PROPERTIES ERROR ", err) + } else { + jptm.Log(pipeline.LogInfo, fmt.Sprintf("SET-PROPERTIES SUCCESSFUL: %s", strings.Split(info.Destination, "?")[0])) + } + + jptm.SetStatus(status) + jptm.ResetSourceSize() // sets source size to 0 (made to be used by setProperties command to make number of bytes transferred = 0) + jptm.ReportTransferDone() + } + + PropertiesToTransfer := jptm.PropertiesToTransfer() + _, metadata, _, _ := jptm.ResourceDstData(nil) + + if PropertiesToTransfer.ShouldTransferTier() { + // this case should have been picked up by front end and given error (changing tier is not available for File Storage) + err := fmt.Errorf("trying to change tier of file") + transferDone(common.ETransferStatus.Failed(), err) + } + if PropertiesToTransfer.ShouldTransferMetaData() { + _, err := srcFileURL.SetMetadata(jptm.Context(), metadata.ToAzFileMetadata()) + if err != nil { + errorHandlerForXferSetProperties(err, jptm, transferDone) + return + } + } + // TAGS NOT AVAILABLE FOR FILES + transferDone(common.ETransferStatus.Success(), nil) +} + +func errorHandlerForXferSetProperties(err error, jptm IJobPartTransferMgr, transferDone func(status common.TransferStatus, err error)) { + if strErr, ok := err.(azblob.StorageError); ok { + + // If the status code was 403, it means there was an authentication error, and we exit. + // User can resume the job if completely ordered with a new sas. + if strErr.Response().StatusCode == http.StatusForbidden { + errMsg := fmt.Sprintf("Authentication Failed. The SAS is not correct or expired or does not have the correct permission %s", err.Error()) + jptm.Log(pipeline.LogError, errMsg) + common.GetLifecycleMgr().Error(errMsg) + } + } + + // in all other cases, make the transfer as failed + transferDone(common.ETransferStatus.Failed(), err) +} diff --git a/ste/xfer.go b/ste/xfer.go index 6013309d7..d125c4191 100644 --- a/ste/xfer.go +++ b/ste/xfer.go @@ -150,11 +150,13 @@ func computeJobXfer(fromTo common.FromTo, blobType common.BlobType) newJobXfer { } // main computeJobXfer logic - switch { - case fromTo == common.EFromTo.BlobTrash(): + switch fromTo { + case common.EFromTo.BlobTrash(): return DeleteBlob - case fromTo == common.EFromTo.FileTrash(): + case common.EFromTo.FileTrash(): return DeleteFile + case common.EFromTo.BlobNone(), common.EFromTo.BlobFSNone(), common.EFromTo.FileNone(): + return SetProperties default: if fromTo.IsDownload() { return parameterizeDownload(remoteToLocal, getDownloader(fromTo.From())) From 9908569ff432d79a9779f3f773d33f51e547408b Mon Sep 17 00:00:00 2001 From: Arnav Prasad <36070960+Strikerzee@users.noreply.github.com> Date: Tue, 7 Jun 2022 17:06:58 +0530 Subject: [PATCH 12/26] Job Manager changes (#1820) * Combine all JobMgr changes into one. JobMgr Cleanup. - Close all the go routines once job is done. Fix for Report Handler routine cleanup. Memory Leak Fix. - Set the nil to address allocated memory. - Fixing some of memory issues. - JobMgr is freed without setting channel to nil. Co-authored-by: Nitin Singla --- common/chunkStatusLogger.go | 17 +++ common/singleChunkReader.go | 22 +++- jobsAdmin/JobsAdmin.go | 69 +++++++++-- ste/jobStatusManager.go | 19 ++- ste/mgr-JobMgr.go | 226 +++++++++++++++++++++++++++--------- ste/mgr-JobPartMgr.go | 12 ++ 6 files changed, 292 insertions(+), 73 deletions(-) mode change 100644 => 100755 jobsAdmin/JobsAdmin.go mode change 100644 => 100755 ste/jobStatusManager.go mode change 100644 => 100755 ste/mgr-JobMgr.go diff --git a/common/chunkStatusLogger.go b/common/chunkStatusLogger.go index 134be24a7..573bfebf1 100644 --- a/common/chunkStatusLogger.go +++ b/common/chunkStatusLogger.go @@ -241,6 +241,7 @@ type ChunkStatusLoggerCloser interface { GetCounts(td TransferDirection) []chunkStatusCount GetPrimaryPerfConstraint(td TransferDirection, rc RetryCounter) PerfConstraint FlushLog() // not close, because we had issues with writes coming in after this // TODO: see if that issue still exists + CloseLogger() } type RetryCounter interface { @@ -315,6 +316,18 @@ func (csl *chunkStatusLogger) FlushLog() { } } +// CloseLogger close the chunklogger thread. +func (csl *chunkStatusLogger) CloseLogger() { + // Once logger is closed, we log no more chunks. + csl.outputEnabled = false + + /* + * No more chunks will ever be written, let the main logger know about this. + * On closing this channel the main logger will exit from its for-range loop. + */ + close(csl.unsavedEntries) +} + func (csl *chunkStatusLogger) main(chunkLogPath string) { f, err := os.Create(chunkLogPath) if err != nil { @@ -332,18 +345,22 @@ func (csl *chunkStatusLogger) main(chunkLogPath string) { defer doFlush() alwaysFlushFromNowOn := false + + // We will exit the following for-range loop after CloseLogger() closes the csl.unsavedEntries channel. for x := range csl.unsavedEntries { if x == nil { alwaysFlushFromNowOn = true doFlush() csl.flushDone <- struct{}{} continue // TODO can become break (or be moved to later if we close unsaved entries, once we figure out how we got stuff written to us after CloseLog was called) + } _, _ = w.WriteString(fmt.Sprintf("%s,%d,%s,%s\n", x.Name, x.OffsetInFile(), x.reason, x.waitStart)) if alwaysFlushFromNowOn { // TODO: remove when we figure out how we got stuff written to us after CloseLog was called. For now, this should handle those cases (if they still exist) doFlush() } + } } diff --git a/common/singleChunkReader.go b/common/singleChunkReader.go index 2e9d2f8d8..d362c2ea8 100644 --- a/common/singleChunkReader.go +++ b/common/singleChunkReader.go @@ -227,14 +227,20 @@ func (cr *singleChunkReader) blockingPrefetch(fileReader io.ReaderAt, isRetry bo // Must use "relaxed" RAM limit IFF this is a retry. Else, we can, in theory, get deadlock with all active goroutines blocked // here doing retries, but no RAM _will_ become available because its // all used by queued chunkfuncs (that can't be processed because all goroutines are active). - cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.RAMToSchedule()) + if cr.chunkLogger != nil { + cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.RAMToSchedule()) + } + err := cr.cacheLimiter.WaitUntilAdd(cr.ctx, cr.length, func() bool { return isRetry }) if err != nil { return err } // prepare to read - cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.DiskIO()) + if cr.chunkLogger != nil { + cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.DiskIO()) + } + targetBuffer := cr.slicePool.RentSlice(cr.length) // read WITHOUT holding the "close" lock. While we don't have the lock, we mutate ONLY local variables, no instance state. @@ -412,6 +418,18 @@ func (cr *singleChunkReader) Close() error { // do the real work cr.closeBuffer() cr.isClosed = true + + /* + * Set chunkLogger to nil, so that chunkStatusLogger can be GC'ed. + * + * TODO: We should not need to explicitly set this to nil but today we have a yet-unknown ref on cr which + * is leaking this "big" chunkStatusLogger memory, so we cause that to be freed by force dropping this ref. + * + * Note: We are force setting this to nil and we safe guard against this by checking chunklogger not nil at respective places. + * At present this is called only from blockingPrefetch(). + */ + cr.chunkLogger = nil + return nil } diff --git a/jobsAdmin/JobsAdmin.go b/jobsAdmin/JobsAdmin.go old mode 100644 new mode 100755 index 87130391c..f71a48e68 --- a/jobsAdmin/JobsAdmin.go +++ b/jobsAdmin/JobsAdmin.go @@ -24,7 +24,6 @@ import ( "context" "encoding/json" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/ste" "os" "path/filepath" "runtime" @@ -35,6 +34,8 @@ import ( "sync/atomic" "time" + "github.com/Azure/azure-storage-azcopy/v10/ste" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" ) @@ -99,6 +100,9 @@ var JobsAdmin interface { TryGetPerformanceAdvice(bytesInJob uint64, filesInJob uint32, fromTo common.FromTo, dir common.TransferDirection, p *ste.PipelineNetworkStats) []common.PerformanceAdvice SetConcurrencySettingsToAuto() + + // JobMgrCleanUp do the JobMgr cleanup. + JobMgrCleanUp(jobId common.JobID) } func initJobsAdmin(appCtx context.Context, concurrency ste.ConcurrencySettings, targetRateInMegaBitsPerSec float64, azcopyJobPlanFolder string, azcopyLogPathFolder string, providePerfAdvice bool) { @@ -241,19 +245,20 @@ type jobsAdmin struct { logger common.ILoggerCloser jobIDToJobMgr jobIDToJobMgr // Thread-safe map from each JobID to its JobInfo // Other global state can be stored in more fields here... - logDir string // Where log files are stored - planDir string // Initialize to directory where Job Part Plans are stored - appCtx context.Context - pacer ste.PacerAdmin - slicePool common.ByteSlicePooler - cacheLimiter common.CacheLimiter - fileCountLimiter common.CacheLimiter - concurrencyTuner ste.ConcurrencyTuner - commandLineMbpsCap float64 + logDir string // Where log files are stored + planDir string // Initialize to directory where Job Part Plans are stored + appCtx context.Context + pacer ste.PacerAdmin + slicePool common.ByteSlicePooler + cacheLimiter common.CacheLimiter + fileCountLimiter common.CacheLimiter + concurrencyTuner ste.ConcurrencyTuner + commandLineMbpsCap float64 provideBenchmarkResults bool cpuMonitor common.CPUMonitor jobLogger common.ILoggerResetable } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// func (ja *jobsAdmin) NewJobPartPlanFileName(jobID common.JobID, partNumber common.PartNumber) ste.JobPartPlanFileName { @@ -296,6 +301,39 @@ func (ja *jobsAdmin) JobMgrEnsureExists(jobID common.JobID, }) } +// JobMgrCleanup cleans up the jobMgr identified by the given jobId. It undoes what NewJobMgr() does, basically it does the following: +// 1. Stop all go routines started to process this job. +// 2. Release the memory allocated for this JobMgr instance. +// Note: this is not thread safe and only one goroutine should call this for a job. +func (ja *jobsAdmin) JobMgrCleanUp(jobId common.JobID) { + // First thing get the jobMgr. + jm, found := ja.JobMgr(jobId) + + if found { + /* + * Change log level to Info, so that we can capture these messages in job log file. + * These log messages useful in debuggability and tells till what stage cleanup done. + */ + jm.Log(pipeline.LogInfo, "JobMgrCleanUp Enter") + + // Delete the jobMgr from jobIDtoJobMgr map, so that next call will fail. + ja.DeleteJob(jobId) + + jm.Log(pipeline.LogInfo, "Job deleted from jobMgr map") + + /* + * Rest of jobMgr related cleanup done by DeferredCleanupJobMgr function. + * Now that we have removed the jobMgr from the map, no new caller will find it and hence cannot start any + * new activity using the jobMgr. We cleanup the resources of the jobMgr in a deferred manner as a safety net + * to allow processing any messages that may be in transit. + * + * NOTE: This is not really required but we don't want to miss any in-transit messages as some of the TODOs in + * the code suggest. + */ + go jm.DeferredCleanupJobMgr() + } +} + func (ja *jobsAdmin) BytesOverWire() int64 { return ja.pacer.GetTotalTraffic() } @@ -507,6 +545,13 @@ func (ja *jobsAdmin) TryGetPerformanceAdvice(bytesInJob uint64, filesInJob uint3 a := ste.NewPerformanceAdvisor(p, ja.commandLineMbpsCap, int64(megabitsPerSec), finalReason, finalConcurrency, dir, averageBytesPerFile, isToAzureFiles) return a.GetAdvice() } + +//Structs for messageHandler + +/* PerfAdjustment message. */ +type jaPerfAdjustmentMsg struct { + Throughput int64 `json:"cap-mbps,string"` +} func (ja *jobsAdmin) messageHandler(inputChan <-chan *common.LCMMsg) { toBitsPerSec := func(megaBitsPerSec int64) int64 { @@ -516,7 +561,7 @@ func (ja *jobsAdmin) messageHandler(inputChan <-chan *common.LCMMsg) { const minIntervalBetweenPerfAdjustment = time.Minute lastPerfAdjustTime := time.Now().Add(-2 * minIntervalBetweenPerfAdjustment) var err error - + for { msg := <-inputChan var msgType common.LCMMsgType @@ -534,7 +579,7 @@ func (ja *jobsAdmin) messageHandler(inputChan <-chan *common.LCMMsg) { if e := json.Unmarshal([]byte(msg.Req.Value), &perfAdjustmentReq); e != nil { err = fmt.Errorf("parsing %s failed with %s", msg.Req.Value, e.Error()) } - + if perfAdjustmentReq.Throughput < 0 { err = fmt.Errorf("invalid value %d for cap-mbps. cap-mpbs should be greater than 0", perfAdjustmentReq.Throughput) diff --git a/ste/jobStatusManager.go b/ste/jobStatusManager.go old mode 100644 new mode 100755 index 6b264d9bc..f4114dcc7 --- a/ste/jobStatusManager.go +++ b/ste/jobStatusManager.go @@ -23,15 +23,16 @@ package ste import ( "time" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" ) type JobPartCreatedMsg struct { - TotalTransfers uint32 - IsFinalPart bool + TotalTransfers uint32 + IsFinalPart bool TotalBytesEnumerated uint64 - FileTransfers uint32 - FolderTransfer uint32 + FileTransfers uint32 + FolderTransfer uint32 } type xferDoneMsg = common.TransferDetail @@ -41,6 +42,7 @@ type jobStatusManager struct { listReq chan bool partCreated chan JobPartCreatedMsg xferDone chan xferDoneMsg + done chan struct{} } /* These functions should not fail */ @@ -61,6 +63,11 @@ func (jm *jobMgr) ResurrectSummary(js common.ListJobSummaryResponse) { jm.jstm.js = js } +func (jm *jobMgr) CleanupJobStatusMgr() { + jm.Log(pipeline.LogInfo, "CleanJobStatusMgr called.") + jm.jstm.done <- struct{}{} +} + func (jm *jobMgr) handleStatusUpdateMessage() { jstm := jm.jstm js := &jstm.js @@ -106,6 +113,10 @@ func (jm *jobMgr) handleStatusUpdateMessage() { // There is no need to keep sending the same items over and over again js.FailedTransfers = []common.TransferDetail{} js.SkippedTransfers = []common.TransferDetail{} + + case <-jstm.done: + jm.Log(pipeline.LogInfo, "Cleanup JobStatusmgr.") + return } } } diff --git a/ste/mgr-JobMgr.go b/ste/mgr-JobMgr.go old mode 100644 new mode 100755 index 4ff6fabe2..2920b83ae --- a/ste/mgr-JobMgr.go +++ b/ste/mgr-JobMgr.go @@ -101,6 +101,10 @@ type IJobMgr interface { SuccessfulBytesInActiveFiles() uint64 CancelPauseJobOrder(desiredJobStatus common.JobStatus) common.CancelPauseResumeResponse IsDaemon() bool + + // Cleanup Functions + DeferredCleanupJobMgr() + CleanupJobStatusMgr() } // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -137,6 +141,12 @@ func NewJobMgr(concurrency ConcurrencySettings, jobID common.JobID, appCtx conte jstm.listReq = make(chan bool) jstm.partCreated = make(chan JobPartCreatedMsg, 100) jstm.xferDone = make(chan xferDoneMsg, 1000) + jstm.done = make(chan struct{}, 1) + // Different logger for each job. + if jobLogger == nil { + jobLogger = common.NewJobLogger(jobID, common.ELogLevel.Debug(), logFileFolder, "" /* logFileNameSuffix */) + jobLogger.OpenLog() + } jm := jobMgr{jobID: jobID, jobPartMgrs: newJobPartToJobPartMgr(), include: map[string]int{}, exclude: map[string]int{}, httpClient: NewAzcopyHTTPClient(concurrency.MaxIdleConnections), @@ -147,6 +157,7 @@ func NewJobMgr(concurrency ConcurrencySettings, jobID common.JobID, appCtx conte pipelineNetworkStats: newPipelineNetworkStats(tuner), // let the stats coordinate with the concurrency tuner initMu: &sync.Mutex{}, jobPartProgress: jobPartProgressCh, + reportCancelCh: make(chan struct{}, 1), coordinatorChannels: CoordinatorChannels{ partsChannel: partsCh, normalTransferCh: normalTransferCh, @@ -158,12 +169,15 @@ func NewJobMgr(concurrency ConcurrencySettings, jobID common.JobID, appCtx conte lowTransferCh: lowTransferCh, normalChunckCh: normalChunkCh, lowChunkCh: lowChunkCh, + closeTransferCh: make(chan struct{}, 100), + scheduleCloseCh: make(chan struct{}, 1), }, poolSizingChannels: poolSizingChannels{ // all deliberately unbuffered, because pool sizer routine works in lock-step with these - processing them as they happen, never catching up on populated buffer later entryNotificationCh: make(chan struct{}), exitNotificationCh: make(chan struct{}), scalebackRequestCh: make(chan struct{}), requestSlowTuneCh: make(chan struct{}), + done: make(chan struct{}, 1), }, concurrencyTuner: tuner, pacer: pacer, @@ -288,6 +302,10 @@ type jobMgr struct { httpClient *http.Client jobPartMgrs jobPartToJobPartMgr // The map of part #s to JobPartMgrs + + // reportCancelCh to close the report thread. + reportCancelCh chan struct{} + // partsDone keep the count of completed part of the Job. partsDone uint32 // throughput common.CountPerSecond // TODO: Set LastCheckedTime to now @@ -586,62 +604,76 @@ func (jm *jobMgr) reportJobPartDoneHandler() { shouldLog := jm.ShouldLog(pipeline.LogInfo) for { - partProgressInfo := <-jm.jobPartProgress - jobPart0Mgr, ok := jm.jobPartMgrs.Get(0) - if !ok { - jm.Panic(fmt.Errorf("Failed to find Job %v, Part #0", jm.jobID)) - } - part0Plan := jobPart0Mgr.Plan() - jobStatus := part0Plan.JobStatus() // status of part 0 is status of job as a whole - partsDone := atomic.AddUint32(&jm.partsDone, 1) - jobProgressInfo.transfersCompleted += partProgressInfo.transfersCompleted - jobProgressInfo.transfersSkipped += partProgressInfo.transfersSkipped - jobProgressInfo.transfersFailed += partProgressInfo.transfersFailed - - if partProgressInfo.completionChan != nil { - close(partProgressInfo.completionChan) - } + select { + case <-jm.reportCancelCh: + jobPart0Mgr, ok := jm.jobPartMgrs.Get(0) + if ok { + part0plan := jobPart0Mgr.Plan() + if part0plan.JobStatus() == common.EJobStatus.InProgress() || + part0plan.JobStatus() == common.EJobStatus.Cancelling() { + jm.Panic(fmt.Errorf("reportCancelCh received cancel event while job still not completed, Job(%s) in state: %s", + jm.jobID.String(), part0plan.JobStatus())) + } + } else { + jm.Log(pipeline.LogError, "part0Plan of job invalid") + } + jm.Log(pipeline.LogInfo, "reportJobPartDoneHandler done called") + return - // If the last part is still awaited or other parts all still not complete, - // JobPart 0 status is not changed (unless we are cancelling) - haveFinalPart = atomic.LoadInt32(&jm.atomicFinalPartOrderedIndicator) == 1 - allKnownPartsDone := partsDone == jm.jobPartMgrs.Count() - isCancelling := jobStatus == common.EJobStatus.Cancelling() - shouldComplete := allKnownPartsDone && (haveFinalPart || isCancelling) - if shouldComplete { - partDescription := "all parts of entire Job" - if !haveFinalPart { - partDescription = "known parts of incomplete Job" + case partProgressInfo := <-jm.jobPartProgress: + jobPart0Mgr, ok := jm.jobPartMgrs.Get(0) + if !ok { + jm.Panic(fmt.Errorf("Failed to find Job %v, Part #0", jm.jobID)) } - if shouldLog { - jm.Log(pipeline.LogInfo, fmt.Sprintf("%s %s successfully completed, cancelled or paused", partDescription, jm.jobID.String())) + part0Plan := jobPart0Mgr.Plan() + jobStatus := part0Plan.JobStatus() // status of part 0 is status of job as a whole + partsDone := atomic.AddUint32(&jm.partsDone, 1) + jobProgressInfo.transfersCompleted += partProgressInfo.transfersCompleted + jobProgressInfo.transfersSkipped += partProgressInfo.transfersSkipped + jobProgressInfo.transfersFailed += partProgressInfo.transfersFailed + + if partProgressInfo.completionChan != nil { + close(partProgressInfo.completionChan) } - switch part0Plan.JobStatus() { - case common.EJobStatus.Cancelling(): - part0Plan.SetJobStatus(common.EJobStatus.Cancelled()) + // If the last part is still awaited or other parts all still not complete, + // JobPart 0 status is not changed (unless we are cancelling) + haveFinalPart = atomic.LoadInt32(&jm.atomicFinalPartOrderedIndicator) == 1 + allKnownPartsDone := partsDone == jm.jobPartMgrs.Count() + isCancelling := jobStatus == common.EJobStatus.Cancelling() + shouldComplete := allKnownPartsDone && (haveFinalPart || isCancelling) + if shouldComplete { + partDescription := "all parts of entire Job" + if !haveFinalPart { + partDescription = "known parts of incomplete Job" + } if shouldLog { - jm.Log(pipeline.LogInfo, fmt.Sprintf("%s %v successfully cancelled", partDescription, jm.jobID)) + jm.Log(pipeline.LogInfo, fmt.Sprintf("%s %s successfully completed, cancelled or paused", partDescription, jm.jobID.String())) } - case common.EJobStatus.InProgress(): - part0Plan.SetJobStatus((common.EJobStatus).EnhanceJobStatusInfo(jobProgressInfo.transfersSkipped > 0, - jobProgressInfo.transfersFailed > 0, - jobProgressInfo.transfersCompleted > 0)) - } - // reset counters - atomic.StoreUint32(&jm.partsDone, 0) - jobProgressInfo = jobPartProgressInfo{} + switch part0Plan.JobStatus() { + case common.EJobStatus.Cancelling(): + part0Plan.SetJobStatus(common.EJobStatus.Cancelled()) + if shouldLog { + jm.Log(pipeline.LogInfo, fmt.Sprintf("%s %v successfully cancelled", partDescription, jm.jobID)) + } + case common.EJobStatus.InProgress(): + part0Plan.SetJobStatus((common.EJobStatus).EnhanceJobStatusInfo(jobProgressInfo.transfersSkipped > 0, + jobProgressInfo.transfersFailed > 0, + jobProgressInfo.transfersCompleted > 0)) + } - // flush logs - jm.chunkStatusLogger.FlushLog() // TODO: remove once we sort out what will be calling CloseLog (currently nothing) - if allKnownPartsDone { - common.GetLifecycleMgr().ReportAllJobPartsDone() - } - } // Else log and wait for next part to complete + // reset counters + atomic.StoreUint32(&jm.partsDone, 0) + jobProgressInfo = jobPartProgressInfo{} - if shouldLog { - jm.Log(pipeline.LogInfo, fmt.Sprintf("is part of Job which %d total number of parts done ", partsDone)) + // flush logs + jm.chunkStatusLogger.FlushLog() // TODO: remove once we sort out what will be calling CloseLog (currently nothing) + } //Else log and wait for next part to complete + + if shouldLog { + jm.Log(pipeline.LogInfo, fmt.Sprintf("is part of Job which %d total number of parts done ", partsDone)) + } } } } @@ -672,10 +704,54 @@ func (jm *jobMgr) CloseLog() { jm.chunkStatusLogger.FlushLog() } +// DeferredCleanupJobMgr cleanup all the jobMgr resources. +// Warning: DeferredCleanupJobMgr should be called from JobMgrCleanup(). +// As this function neither threadsafe nor idempotient. So if DeferredCleanupJobMgr called +// mulitple times, it may stuck as receiving channel already closed. Where as JobMgrCleanup() +// safe in that sense it will do the cleanup only once. +// +// TODO: Add JobsAdmin reference to each JobMgr so that in any circumstances JobsAdmin should not freed, +// while jobMgr running. Whereas JobsAdmin store number JobMgr running at any time. +// At that point DeferredCleanupJobMgr() will delete jobMgr from jobsAdmin map. +func (jm *jobMgr) DeferredCleanupJobMgr() { + jm.Log(pipeline.LogInfo, "DeferredCleanupJobMgr called") + + time.Sleep(60 * time.Second) + + jm.Log(pipeline.LogInfo, "DeferredCleanupJobMgr out of sleep") + + // Call jm.Cancel to signal routines workdone. + // This will take care of any jobPartMgr release. + jm.Cancel() + + // Cleanup the JobStatusMgr go routine. + jm.CleanupJobStatusMgr() + + // Transfer Thread Cleanup. + jm.cleanupTransferRoutine() + + // Remove JobPartsMgr from jobPartMgr kv. + jm.deleteJobPartsMgrs() + + // Close chunk status logger. + jm.cleanupChunkStatusLogger() + jm.Log(pipeline.LogInfo, "DeferredCleanupJobMgr Exit, Closing the log") + + // Sleep for sometime so that all go routine done with cleanUp and log the progress in job log. + time.Sleep(60 * time.Second) + + jm.logger.CloseLog() +} + func (jm *jobMgr) ChunkStatusLogger() common.ChunkStatusLogger { return jm.chunkStatusLogger } +func (jm *jobMgr) cleanupChunkStatusLogger() { + jm.chunkStatusLogger.FlushLog() + jm.chunkStatusLogger.CloseLogger() +} + // PartsDone returns the number of the Job's parts that are either completed or failed // func (jm *jobMgr) PartsDone() uint32 { return atomic.LoadUint32(&jm.partsDone) } @@ -697,6 +773,8 @@ type XferChannels struct { lowTransferCh <-chan IJobPartTransferMgr // Read-only normalChunckCh chan chunkFunc // Read-write lowChunkCh chan chunkFunc // Read-write + closeTransferCh chan struct{} + scheduleCloseCh chan struct{} } type poolSizingChannels struct { @@ -704,6 +782,7 @@ type poolSizingChannels struct { exitNotificationCh chan struct{} scalebackRequestCh chan struct{} requestSlowTuneCh chan struct{} + done chan struct{} } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -744,6 +823,26 @@ func (jm *jobMgr) QueueJobParts(jpm IJobPartMgr) { jm.coordinatorChannels.partsChannel <- jpm } +// deleteJobPartsMgrs remove jobPartMgrs from jobPartToJobPartMgr kv. +func (jm *jobMgr) deleteJobPartsMgrs() { + jm.Log(pipeline.LogInfo, "deleteJobPartsMgrs enter") + jm.jobPartMgrs.Iterate(false, func(k common.PartNumber, v IJobPartMgr) { + v.Close() + delete(jm.jobPartMgrs.m, k) + }) + jm.Log(pipeline.LogInfo, "deleteJobPartsMgrs exit") +} + +// cleanupTransferRoutine closes all the Transfer thread. +// Note: Created the buffer channel so that, if somehow any thread missing(down), it should not stuck. +func (jm *jobMgr) cleanupTransferRoutine() { + jm.reportCancelCh <- struct{}{} + jm.xferChannels.scheduleCloseCh <- struct{}{} + for cc := 0; cc < jm.concurrency.TransferInitiationPoolSize.Value; cc++ { + jm.xferChannels.closeTransferCh <- struct{}{} + } +} + // worker that sizes the chunkProcessor pool, dynamically if necessary func (jm *jobMgr) poolSizer() { @@ -784,10 +883,15 @@ func (jm *jobMgr) poolSizer() { } else if actualConcurrency > targetConcurrency { hasHadTimeToStablize = false jm.poolSizingChannels.scalebackRequestCh <- struct{}{} + } else if actualConcurrency == 0 && targetConcurrency == 0 { + jm.Log(pipeline.LogInfo, "Exits Pool sizer") + return } // wait for something to happen (maybe ack from the worker of the change, else a timer interval) select { + case <-jm.poolSizingChannels.done: + targetConcurrency = 0 case <-jm.poolSizingChannels.entryNotificationCh: // new worker has started actualConcurrency++ @@ -802,7 +906,7 @@ func (jm *jobMgr) poolSizer() { throughputMonitoringInterval = expandedMonitoringInterval slowTuneCh = nil // so we won't keep running this case at the expense of others) case <-time.After(throughputMonitoringInterval): - if actualConcurrency == targetConcurrency { // scalebacks can take time. Don't want to do any tuning if actual is not yet aligned to target + if targetConcurrency != 0 && actualConcurrency == targetConcurrency { // scalebacks can take time. Don't want to do any tuning if actual is not yet aligned to target bytesOnWire := jm.pacer.GetTotalTraffic() if hasHadTimeToStablize { // throughput has had time to stabilize since last change, so we can meaningfully measure and act on throughput @@ -840,15 +944,22 @@ func (jm *jobMgr) RequestTuneSlowly() { func (jm *jobMgr) scheduleJobParts() { startedPoolSizer := false for { - jobPart := <-jm.xferChannels.partsChannel + select { + case <-jm.xferChannels.scheduleCloseCh: + jm.Log(pipeline.LogInfo, "ScheduleJobParts done called") + jm.poolSizingChannels.done <- struct{}{} + return + + case jobPart := <-jm.xferChannels.partsChannel: - if !startedPoolSizer { - // spin up a GR to co-ordinate dynamic sizing of the main pool - // It will automatically spin up the right number of chunk processors - go jm.poolSizer() - startedPoolSizer = true + if !startedPoolSizer { + // spin up a GR to co-ordinate dynamic sizing of the main pool + // It will automatically spin up the right number of chunk processors + go jm.poolSizer() + startedPoolSizer = true + } + jobPart.ScheduleTransfers(jm.Context()) } - jobPart.ScheduleTransfers(jm.Context()) } } @@ -905,8 +1016,13 @@ func (jm *jobMgr) transferProcessor(workerID int) { for { // No scaleback check here, because this routine runs only in a small number of goroutines, so no need to kill them off select { + case <-jm.xferChannels.closeTransferCh: + jm.Log(pipeline.LogInfo, "transferProcessor done called") + return + case jptm := <-jm.xferChannels.normalTransferCh: startTransfer(jptm) + default: select { case jptm := <-jm.xferChannels.lowTransferCh: diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index 44cbf7b10..b34ecf671 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -889,6 +889,18 @@ func (jpm *jobPartMgr) Close() { jpm.httpHeaders = common.ResourceHTTPHeaders{} jpm.metadata = common.Metadata{} jpm.preserveLastModifiedTime = false + + /* + * Set pipeline to nil, so that jpm/JobMgr can be GC'ed. + * + * TODO: We should not need to explicitly set this to nil but today we have a yet-unknown ref on pipeline which + * is leaking JobMgr memory, so we cause that to be freed by force dropping this ref. + * + * Note: Force setting this to nil can technically result in crashes since the containing object is still around, + * but we should be protected against that since we do this Close in a deferred manner, at least few minutes after the job completes. + */ + jpm.pipeline = nil + // TODO: Delete file? /*if err := os.Remove(jpm.planFile.Name()); err != nil { jpm.Panic(fmt.Errorf("error removing Job Part Plan file %s. Error=%v", jpm.planFile.Name(), err)) From a56e3eb3a8e4556919172171ffd687c99628428b Mon Sep 17 00:00:00 2001 From: adreed-msft <49764384+adreed-msft@users.noreply.github.com> Date: Wed, 8 Jun 2022 18:46:12 -0700 Subject: [PATCH 13/26] Fix some oopsies in the unix properties PR (#1818) * Fix some oopsies in the unix properties PR * Apply changes to append/page blob --- ste/sender-appendBlobFromLocal.go | 3 +++ ste/sender-blockBlobFromLocal.go | 4 ++++ ste/sender-pageBlobFromLocal.go | 3 +++ ste/sourceInfoProvider-Local_linux.go | 2 +- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ste/sender-appendBlobFromLocal.go b/ste/sender-appendBlobFromLocal.go index bf08649c2..a2c8fb950 100644 --- a/ste/sender-appendBlobFromLocal.go +++ b/ste/sender-appendBlobFromLocal.go @@ -36,6 +36,9 @@ type appendBlobUploader struct { func (u *appendBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { if u.jptm.Info().PreservePOSIXProperties { if unixSIP, ok := u.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + // Clone the metadata before we write to it, we shouldn't be writing to the same metadata as every other blob. + u.metadataToApply = common.Metadata(u.metadataToApply).Clone().ToAzBlobMetadata() + statAdapter, err := unixSIP.GetUNIXProperties() if err != nil { u.jptm.FailActiveSend("GetUNIXProperties", err) diff --git a/ste/sender-blockBlobFromLocal.go b/ste/sender-blockBlobFromLocal.go index 618146e5a..fdaf3f460 100644 --- a/ste/sender-blockBlobFromLocal.go +++ b/ste/sender-blockBlobFromLocal.go @@ -47,7 +47,11 @@ func newBlockBlobUploader(jptm IJobPartTransferMgr, destination string, p pipeli func (s *blockBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { if s.jptm.Info().PreservePOSIXProperties { + if unixSIP, ok := s.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + // Clone the metadata before we write to it, we shouldn't be writing to the same metadata as every other blob. + s.metadataToApply = common.Metadata(s.metadataToApply).Clone().ToAzBlobMetadata() + statAdapter, err := unixSIP.GetUNIXProperties() if err != nil { s.jptm.FailActiveSend("GetUNIXProperties", err) diff --git a/ste/sender-pageBlobFromLocal.go b/ste/sender-pageBlobFromLocal.go index 2dd1ebf6c..bcb7aef05 100644 --- a/ste/sender-pageBlobFromLocal.go +++ b/ste/sender-pageBlobFromLocal.go @@ -47,6 +47,9 @@ func newPageBlobUploader(jptm IJobPartTransferMgr, destination string, p pipelin func (u *pageBlobUploader) Prologue(ps common.PrologueState) (destinationModified bool) { if u.jptm.Info().PreservePOSIXProperties { if unixSIP, ok := u.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + // Clone the metadata before we write to it, we shouldn't be writing to the same metadata as every other blob. + u.metadataToApply = common.Metadata(u.metadataToApply).Clone().ToAzBlobMetadata() + statAdapter, err := unixSIP.GetUNIXProperties() if err != nil { u.jptm.FailActiveSend("GetUNIXProperties", err) diff --git a/ste/sourceInfoProvider-Local_linux.go b/ste/sourceInfoProvider-Local_linux.go index 1007a23b8..d1a01a94d 100644 --- a/ste/sourceInfoProvider-Local_linux.go +++ b/ste/sourceInfoProvider-Local_linux.go @@ -97,7 +97,7 @@ func (s statxTAdapter) MTime() time.Time { } func (s statxTAdapter) CTime() time.Time { - return time.Unix(s.Btime.Sec, int64(s.Ctime.Nsec)) + return time.Unix(s.Ctime.Sec, int64(s.Ctime.Nsec)) } type statTAdapter unix.Stat_t From 583e6e782e6c1e57f9c8c58ebe897d0e31a90ed5 Mon Sep 17 00:00:00 2001 From: Arnav Prasad <36070960+Strikerzee@users.noreply.github.com> Date: Thu, 9 Jun 2022 18:23:36 +0530 Subject: [PATCH 14/26] Fix symlink (#1798) * Squash "Fix the symlink" into one commit. Fix the symlink. - Calling filepath.evalsymlinks in case of linux. - Explicit checking "." (current directory) for symlink. Remove snippet of code not relevant to the cherry-picked commits. * Fix build error. * Fix rebase build errors. Co-authored-by: Nitin Singla --- cmd/zc_traverser_local.go | 71 ++++++++++++++++++++++++++++----------- jobsAdmin/init.go | 3 +- ste/jobStatusManager.go | 1 + 3 files changed, 55 insertions(+), 20 deletions(-) mode change 100644 => 100755 cmd/zc_traverser_local.go mode change 100644 => 100755 jobsAdmin/init.go diff --git a/cmd/zc_traverser_local.go b/cmd/zc_traverser_local.go old mode 100644 new mode 100755 index f7968d8dc..64c85b43e --- a/cmd/zc_traverser_local.go +++ b/cmd/zc_traverser_local.go @@ -27,6 +27,7 @@ import ( "os" "path" "path/filepath" + "runtime" "strings" "github.com/Azure/azure-pipeline-go/pipeline" @@ -34,6 +35,8 @@ import ( "github.com/Azure/azure-storage-azcopy/v10/common/parallel" ) +const MAX_SYMLINKS_TO_FOLLOW = 40 + type localTraverser struct { fullPath string recursive bool @@ -72,8 +75,14 @@ func (t *localTraverser) getInfoIfSingleFile() (os.FileInfo, bool, error) { } func UnfurlSymlinks(symlinkPath string) (result string, err error) { + var count uint32 unfurlingPlan := []string{symlinkPath} + // We need to do some special UNC path handling for windows. + if runtime.GOOS != "windows" { + return filepath.EvalSymlinks(symlinkPath) + } + for len(unfurlingPlan) > 0 { item := unfurlingPlan[0] @@ -93,7 +102,7 @@ func UnfurlSymlinks(symlinkPath string) (result string, err error) { // Previously, we'd try to detect if the read link was a relative path by appending and starting the item // However, it seems to be a fairly unlikely and hard to reproduce scenario upon investigation (Couldn't manage to reproduce the scenario) // So it was dropped. However, on the off chance, we'll still do it if syntactically it makes sense. - if len(result) == 0 || result[0] == '.' { // A relative path being "" or "." likely (and in the latter case, on our officially supported OSes, always) means that it's just the same folder. + if result == "" || result == "." { // A relative path being "" or "." likely (and in the latter case, on our officially supported OSes, always) means that it's just the same folder. result = filepath.Dir(item) } else if !os.IsPathSeparator(result[0]) { // We can assume that a relative path won't start with a separator possiblyResult := filepath.Join(filepath.Dir(item), result) @@ -104,12 +113,21 @@ func UnfurlSymlinks(symlinkPath string) (result string, err error) { result = common.ToExtendedPath(result) + /* + * Either we can store all the symlink seen till now for this path or we count how many iterations to find out cyclic loop. + * Choose the count method and restrict the number of links to 40. Which linux kernel adhere. + */ + if count >= MAX_SYMLINKS_TO_FOLLOW { + return "", errors.New("failed to unfurl symlink: too many links") + } + unfurlingPlan = append(unfurlingPlan, result) } else { return item, nil } unfurlingPlan = unfurlingPlan[1:] + count++ } return "", errors.New("failed to unfurl symlink: exited loop early") @@ -197,10 +215,33 @@ func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlink computedRelativePath = "" } + // TODO: Later we might want to transfer these special files as such. + unsupportedFileTypes := (os.ModeSocket | os.ModeNamedPipe | os.ModeIrregular | os.ModeDevice) + + if fileInfo == nil { + err := fmt.Errorf("fileInfo is nil for file %s", filePath) + WarnStdoutAndScanningLog(err.Error()) + return nil + } + + if (fileInfo.Mode() & unsupportedFileTypes) != 0 { + err := fmt.Errorf("Unsupported file type %s: %v", filePath, fileInfo.Mode()) + WarnStdoutAndScanningLog(err.Error()) + return nil + } + if fileInfo.Mode()&os.ModeSymlink != 0 { if !followSymlinks { return nil // skip it } + + /* + * There is one case where symlink can point to outside of sharepoint(symlink is absolute path). In that case + * we need to throw error. Its very unlikely same file or folder present on the agent side. + * In that case it anywaythrow the error. + * + * TODO: Need to handle this case. + */ result, err := UnfurlSymlinks(filePath) if err != nil { @@ -246,24 +287,16 @@ func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlink WarnStdoutAndScanningLog(fmt.Sprintf("Ignored already linked directory pointed at %s (link at %s)", result, common.GenerateFullPath(fullPath, computedRelativePath))) } } else { - WarnStdoutAndScanningLog(fmt.Sprintf("Symlinks to individual files are not currently supported, so will ignore file at %s (link at %s)", result, common.GenerateFullPath(fullPath, computedRelativePath))) - // TODO: remove the above info call and enable the below, with suitable multi-OS testing - // including enable the test: TestWalkWithSymlinks_ToFile - /* - // It's a symlink to a file. Just process the file because there's no danger of cycles with links to individual files. - // (this does create the inconsistency that if there are two symlinks to the same file we will process it twice, - // but if there are two symlinks to the same directory we will process it only once. Because only directories are - // deduped to break cycles. For now, we are living with the inconsistency. The alternative would be to "burn" more - // RAM by putting filepaths into seenDirs too, but that could be a non-trivial amount of RAM in big directories trees). - - // TODO: this code here won't handle the case of (file-type symlink) -> (another file-type symlink) -> file - // But do we WANT to handle that? (since it opens us to risk of file->file cycles, and we are deliberately NOT - // putting files in our map, to reduce RAM usage). Maybe just detect if the target of a file symlink its itself a symlink - // and skip those cases with an error message? - // Make file info that has name of source, and stats of dest (to mirror what os.Stat calls on source will give us later) - targetFi := symlinkTargetFileInfo{rStat, fileInfo.Name()} - return walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), targetFi, fileError) - */ + // It's a symlink to a file and we handle cyclic symlinks. + // (this does create the inconsistency that if there are two symlinks to the same file we will process it twice, + // but if there are two symlinks to the same directory we will process it only once. Because only directories are + // deduped to break cycles. For now, we are living with the inconsistency. The alternative would be to "burn" more + // RAM by putting filepaths into seenDirs too, but that could be a non-trivial amount of RAM in big directories trees). + targetFi := symlinkTargetFileInfo{rStat, fileInfo.Name()} + + err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), targetFi, fileError) + _, err = getProcessingError(err) + return err } return nil } else { diff --git a/jobsAdmin/init.go b/jobsAdmin/init.go old mode 100644 new mode 100755 index 390805a3c..c1d3a1f48 --- a/jobsAdmin/init.go +++ b/jobsAdmin/init.go @@ -24,14 +24,15 @@ import ( "context" "encoding/json" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/ste" "io/ioutil" "math" "net/http" "time" "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-azcopy/v10/ste" ) var steCtx = context.Background() diff --git a/ste/jobStatusManager.go b/ste/jobStatusManager.go index f4114dcc7..edd5f556f 100755 --- a/ste/jobStatusManager.go +++ b/ste/jobStatusManager.go @@ -24,6 +24,7 @@ import ( "time" "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/common" ) From bf94c3dc06a9b268048611960aa8cd0173ca25f9 Mon Sep 17 00:00:00 2001 From: Ze Qian Zhang Date: Thu, 9 Jun 2022 12:01:34 -0700 Subject: [PATCH 15/26] Upgrade version of Python SDK used in distributed mutext script (#1826) --- azure-pipelines.yml | 8 ++------ tool_distributed_mutex.py | 8 ++++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 780729c27..051fdbd13 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -183,9 +183,7 @@ jobs: inputs: secureFile: 'ci-gcs-dev.json' - script: | - pip install azure-storage-blob==12.0.0b3 - # the recent release 1.0.0b4 has a breaking change - pip install azure-core==1.0.0b3 + pip install azure-storage-blob==12.12.0 # acquire the mutex before running live tests to avoid conflicts python ./tool_distributed_mutex.py lock "$(MUTEX_URL)" # set the variable to indicate that the mutex was actually acquired @@ -241,9 +239,7 @@ jobs: S2S_SRC_GCP_SERVICE_URL: $(S2S_SRC_GCP_SERVICE_URL) SHARE_SAS_URL: $(SHARE_SAS_URL) - script: | - pip install azure-storage-blob==12.0.0b3 - # the recent release 1.0.0b4 has a breaking change - pip install azure-core==1.0.0b3 + pip install azure-storage-blob==12.12.0 python ./tool_distributed_mutex.py unlock "$(MUTEX_URL)" name: 'Release_the_distributed_mutex' # this runs even if the job was canceled (only if the mutex was acquired by this job) diff --git a/tool_distributed_mutex.py b/tool_distributed_mutex.py index 564a7a320..e02fe75c1 100644 --- a/tool_distributed_mutex.py +++ b/tool_distributed_mutex.py @@ -5,10 +5,10 @@ # note that the track 2 python SDK is being used here from azure.storage.blob import ( BlobClient, - LeaseClient, + BlobLeaseClient, ) -from azure.core import ( +from azure.core.exceptions import ( HttpResponseError, ) @@ -31,7 +31,7 @@ def process(): action, mutex_url = get_raw_input() # check whether the blob exists, if not quit right away to avoid wasting time - blob_client = BlobClient(mutex_url) + blob_client = BlobClient.from_blob_url(mutex_url) try: blob_client.get_blob_properties() print("INFO: validated mutex url") @@ -39,7 +39,7 @@ def process(): raise ValueError('please provide an existing and valid blob URL, failed to get properties with error: ' + e) # get a handle on the lease - lease_client = LeaseClient(blob_client) + lease_client = BlobLeaseClient(blob_client) if action == UNLOCK: # make the lease free as soon as possible lease_client.break_lease(lease_break_period=1) From 0e4c1a72129a27d96dac63d2e69abcef1cbfedf8 Mon Sep 17 00:00:00 2001 From: Arnav Prasad <36070960+Strikerzee@users.noreply.github.com> Date: Tue, 14 Jun 2022 09:38:11 +0530 Subject: [PATCH 16/26] Allow calling app to cancel enumeration and handle scanning directory errors in traverser (#1803) * This patch for enabling calling app to cancel enumeration. - Used appCtx in parallel-walk so that when app want to cancel the enumeration it can do. - It doesn't effect azcopy working. * Replace context pointer with context and nil with context.TODO() * Handle scanning directory errors in traverser. * Fail visibly when scanning fails for files and directories * add in more error paths * fix processOneDirectory changes and add more comments * Minor fix. * Fix nil ptr dereference. * Resolve comments. * Replace err with err.Error() as per comments. * Minor fix. Co-authored-by: Nitin Singla --- cmd/copyEnumeratorInit.go | 9 ++- cmd/list.go | 7 ++- cmd/removeEnumerator.go | 5 +- cmd/setPropertiesEnumerator.go | 2 +- cmd/syncEnumerator.go | 8 ++- cmd/zc_enumerator.go | 14 ++++- cmd/zc_traverser_list.go | 4 +- cmd/zc_traverser_local.go | 56 ++++++++++++++----- cmd/zt_generic_service_traverser_test.go | 8 ++- cmd/zt_generic_traverser_test.go | 26 ++++----- common/parallel/FileSystemCrawler.go | 38 +++++++++---- .../parallel/zt_FileSystemCrawlerTest_test.go | 9 +-- 12 files changed, 127 insertions(+), 59 deletions(-) mode change 100644 => 100755 cmd/copyEnumeratorInit.go mode change 100644 => 100755 cmd/list.go mode change 100644 => 100755 cmd/removeEnumerator.go mode change 100644 => 100755 cmd/setPropertiesEnumerator.go mode change 100644 => 100755 cmd/syncEnumerator.go mode change 100644 => 100755 cmd/zc_enumerator.go mode change 100644 => 100755 cmd/zc_traverser_list.go diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go old mode 100644 new mode 100755 index 98f228a4b..4ed629090 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" "log" "net/url" "os" @@ -15,6 +14,8 @@ import ( "sync" "time" + "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-blob-go/azblob" @@ -84,7 +85,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, - cca.S2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + cca.S2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions, nil /* errorChannel */) if err != nil { return nil, err @@ -355,7 +356,9 @@ func (cca *CookedCopyCmdArgs) isDestDirectory(dst common.ResourceString, ctx *co return false } - rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil, nil, false, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions) + rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil, + nil, false, false, false, common.EPermanentDeleteOption.None(), + func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions, nil /* errorChannel */) if err != nil { return false diff --git a/cmd/list.go b/cmd/list.go old mode 100644 new mode 100755 index cf799fad1..8d82034d4 --- a/cmd/list.go +++ b/cmd/list.go @@ -24,10 +24,11 @@ import ( "context" "errors" "fmt" - pipeline2 "github.com/Azure/azure-pipeline-go/pipeline" "strconv" "strings" + "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/spf13/cobra" "github.com/Azure/azure-storage-azcopy/v10/common" @@ -221,7 +222,9 @@ func (cooked cookedListCmdArgs) HandleListContainerCommand() (err error) { } } - traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil, true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, nil, false, pipeline2.LogNone, common.CpkOptions{}) + traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil, + true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, + nil, false, pipeline.LogNone, common.CpkOptions{}, nil /* errorChannel */) if err != nil { return fmt.Errorf("failed to initialize traverser: %s", err.Error()) diff --git a/cmd/removeEnumerator.go b/cmd/removeEnumerator.go old mode 100644 new mode 100755 index 53e112591..c598a30cf --- a/cmd/removeEnumerator.go +++ b/cmd/removeEnumerator.go @@ -25,9 +25,10 @@ import ( "encoding/json" "errors" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" "strings" + "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/azbfs" @@ -50,7 +51,7 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, - azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions, nil /* errorChannel */) // report failure to create traverser if err != nil { diff --git a/cmd/setPropertiesEnumerator.go b/cmd/setPropertiesEnumerator.go old mode 100644 new mode 100755 index 6656e1637..6c6f730ba --- a/cmd/setPropertiesEnumerator.go +++ b/cmd/setPropertiesEnumerator.go @@ -50,7 +50,7 @@ func setPropertiesEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, - azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions) + azcopyLogVerbosity.ToPipelineLogLevel(), cca.CpkOptions, nil /* errorChannel */) // report failure to create traverser if err != nil { diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go old mode 100644 new mode 100755 index 13ff2d88c..1a2f08bcc --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -24,11 +24,12 @@ import ( "context" "errors" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" "runtime" "strings" "sync/atomic" + "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" @@ -56,6 +57,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s } // TODO: enable symlink support in a future release after evaluating the implications + // TODO: Consider passing an errorChannel so that enumeration errors during sync can be conveyed to the caller. // GetProperties is enabled by default as sync supports both upload and download. // This property only supports Files and S3 at the moment, but provided that Files sync is coming soon, enable to avoid stepping on Files sync work sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, @@ -63,7 +65,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s if entityType == common.EEntityType.File() { atomic.AddUint64(&cca.atomicSourceFilesScanned, 1) } - }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions) + }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions, nil /* errorChannel */) if err != nil { return nil, err @@ -84,7 +86,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s if entityType == common.EEntityType.File() { atomic.AddUint64(&cca.atomicDestinationFilesScanned, 1) } - }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions) + }, nil, cca.s2sPreserveBlobTags, azcopyLogVerbosity.ToPipelineLogLevel(), cca.cpkOptions, nil /* errorChannel */) if err != nil { return nil, err } diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go old mode 100644 new mode 100755 index dad37376a..577ddddc5 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -307,8 +307,12 @@ type enumerationCounterFunc func(entityType common.EntityType) // ctx, pipeline are only required for remote resources. // followSymlinks is only required for local resources (defaults to false) // errorOnDirWOutRecursive is used by copy. - -func InitResourceTraverser(resource common.ResourceString, location common.Location, ctx *context.Context, credential *common.CredentialInfo, followSymlinks *bool, listOfFilesChannel chan string, recursive, getProperties, includeDirectoryStubs bool, permanentDeleteOption common.PermanentDeleteOption, incrementEnumerationCounter enumerationCounterFunc, listOfVersionIds chan string, s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions) (ResourceTraverser, error) { +// If errorChannel is non-nil, all errors encountered during enumeration will be conveyed through this channel. +// To avoid slowdowns, use a buffered channel of enough capacity. +func InitResourceTraverser(resource common.ResourceString, location common.Location, ctx *context.Context, + credential *common.CredentialInfo, followSymlinks *bool, listOfFilesChannel chan string, recursive, getProperties, + includeDirectoryStubs bool, permanentDeleteOption common.PermanentDeleteOption, incrementEnumerationCounter enumerationCounterFunc, listOfVersionIds chan string, + s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions, errorChannel chan ErrorFileInfo) (ResourceTraverser, error) { var output ResourceTraverser var p *pipeline.Pipeline @@ -392,7 +396,11 @@ func InitResourceTraverser(resource common.ResourceString, location common.Locat output = newListTraverser(baseResource, location, nil, nil, recursive, toFollow, getProperties, globChan, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, logLevel, cpkOptions) } else { - output = newLocalTraverser(resource.ValueLocal(), recursive, toFollow, incrementEnumerationCounter) + if ctx != nil { + output = newLocalTraverser(*ctx, resource.ValueLocal(), recursive, toFollow, incrementEnumerationCounter, errorChannel) + } else { + output = newLocalTraverser(context.TODO(), resource.ValueLocal(), recursive, toFollow, incrementEnumerationCounter, errorChannel) + } } case common.ELocation.Benchmark(): ben, err := newBenchmarkTraverser(resource.Value, incrementEnumerationCounter) diff --git a/cmd/zc_traverser_list.go b/cmd/zc_traverser_list.go old mode 100644 new mode 100755 index cb61a44dd..791639248 --- a/cmd/zc_traverser_list.go +++ b/cmd/zc_traverser_list.go @@ -107,7 +107,9 @@ func newListTraverser(parent common.ResourceString, parentType common.Location, } // Construct a traverser that goes through the child - traverser, err := InitResourceTraverser(source, parentType, ctx, credential, &followSymlinks, nil, recursive, getProperties, includeDirectoryStubs, common.EPermanentDeleteOption.None(), incrementEnumerationCounter, nil, s2sPreserveBlobTags, logLevel, cpkOptions) + traverser, err := InitResourceTraverser(source, parentType, ctx, credential, &followSymlinks, + nil, recursive, getProperties, includeDirectoryStubs, common.EPermanentDeleteOption.None(), incrementEnumerationCounter, + nil, s2sPreserveBlobTags, logLevel, cpkOptions, nil /* errorChannel */) if err != nil { return nil, err } diff --git a/cmd/zc_traverser_local.go b/cmd/zc_traverser_local.go index 64c85b43e..a5980a7b0 100755 --- a/cmd/zc_traverser_local.go +++ b/cmd/zc_traverser_local.go @@ -21,6 +21,7 @@ package cmd import ( + "context" "errors" "fmt" "io/ioutil" @@ -41,9 +42,10 @@ type localTraverser struct { fullPath string recursive bool followSymlinks bool - + appCtx context.Context // a generic function to notify that a new stored object has been enumerated incrementEnumerationCounter enumerationCounterFunc + errorChannel chan ErrorFileInfo } func (t *localTraverser) IsDirectory(bool) bool { @@ -164,15 +166,28 @@ type symlinkTargetFileInfo struct { name string } +// ErrorFileInfo holds information about files and folders that failed enumeration. +type ErrorFileInfo struct { + FilePath string + FileInfo os.FileInfo + ErrorMsg error +} + func (s symlinkTargetFileInfo) Name() string { return s.name // override the name } +func writeToErrorChannel(errorChannel chan ErrorFileInfo, err ErrorFileInfo) { + if errorChannel != nil { + errorChannel <- err + } +} + // WalkWithSymlinks is a symlinks-aware, parallelized, version of filePath.Walk. // Separate this from the traverser for two purposes: // 1) Cleaner code // 2) Easier to test individually than to test the entire traverser. -func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlinks bool) (err error) { +func WalkWithSymlinks(appCtx context.Context, fullPath string, walkFunc filepath.WalkFunc, followSymlinks bool, errorChannel chan ErrorFileInfo) (err error) { // We want to re-queue symlinks up in their evaluated form because filepath.Walk doesn't evaluate them for us. // So, what is the plan of attack? @@ -202,9 +217,10 @@ func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlink walkQueue = walkQueue[1:] // walk contents of this queueItem in parallel // (for simplicity of coding, we don't parallelize across multiple queueItems) - parallel.Walk(queueItem.fullPath, EnumerationParallelism, EnumerationParallelStatFiles, func(filePath string, fileInfo os.FileInfo, fileError error) error { + parallel.Walk(appCtx, queueItem.fullPath, EnumerationParallelism, EnumerationParallelStatFiles, func(filePath string, fileInfo os.FileInfo, fileError error) error { if fileError != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Accessing '%s' failed with error: %s", filePath, fileError)) + WarnStdoutAndScanningLog(fmt.Sprintf("Accessing '%s' failed with error: %s", filePath, fileError.Error())) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: fileError}) return nil } computedRelativePath := strings.TrimPrefix(cleanLocalPath(filePath), cleanLocalPath(queueItem.fullPath)) @@ -245,25 +261,33 @@ func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlink result, err := UnfurlSymlinks(filePath) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to resolve symlink %s: %s", filePath, err)) + err = fmt.Errorf("Failed to resolve symlink %s: %s", filePath, err.Error()) + WarnStdoutAndScanningLog(err.Error()) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err}) return nil } result, err = filepath.Abs(result) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get absolute path of symlink result %s: %s", filePath, err)) + err = fmt.Errorf("Failed to get absolute path of symlink result %s: %s", filePath, err.Error()) + WarnStdoutAndScanningLog(err.Error()) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err}) return nil } slPath, err := filepath.Abs(filePath) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get absolute path of %s: %s", filePath, err)) + err = fmt.Errorf("Failed to get absolute path of %s: %s", filePath, err.Error()) + WarnStdoutAndScanningLog(err.Error()) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err}) return nil } rStat, err := os.Stat(result) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get properties of symlink target at %s: %s", result, err)) + err = fmt.Errorf("Failed to get properties of symlink target at %s: %s", result, err.Error()) + WarnStdoutAndScanningLog(err.Error()) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err}) return nil } @@ -304,7 +328,9 @@ func WalkWithSymlinks(fullPath string, walkFunc filepath.WalkFunc, followSymlink result, err := filepath.Abs(filePath) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get absolute path of %s: %s", filePath, err)) + err = fmt.Errorf("Failed to get absolute path of %s: %s", filePath, err.Error()) + WarnStdoutAndScanningLog(err.Error()) + writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err}) return nil } @@ -370,7 +396,7 @@ func (t *localTraverser) Traverse(preprocessor objectMorpher, processor objectPr if t.recursive { processFile := func(filePath string, fileInfo os.FileInfo, fileError error) error { if fileError != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Accessing %s failed with error: %s", filePath, fileError)) + WarnStdoutAndScanningLog(fmt.Sprintf("Accessing %s failed with error: %s", filePath, fileError.Error())) return nil } @@ -378,7 +404,7 @@ func (t *localTraverser) Traverse(preprocessor objectMorpher, processor objectPr if fileInfo.IsDir() { newFileInfo, err := WrapFolder(filePath, fileInfo) if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get last change of target at %s: %s", filePath, err)) + WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get last change of target at %s: %s", filePath, err.Error())) } else { // fileInfo becomes nil in case we fail to wrap folder. fileInfo = newFileInfo @@ -417,7 +443,7 @@ func (t *localTraverser) Traverse(preprocessor objectMorpher, processor objectPr } // note: Walk includes root, so no need here to separately create StoredObject for root (as we do for other folder-aware sources) - return WalkWithSymlinks(t.fullPath, processFile, t.followSymlinks) + return WalkWithSymlinks(t.appCtx, t.fullPath, processFile, t.followSymlinks, t.errorChannel) } else { // if recursive is off, we only need to scan the files immediately under the fullPath // We don't transfer any directory properties here, not even the root. (Because the root's @@ -495,12 +521,14 @@ func (t *localTraverser) Traverse(preprocessor objectMorpher, processor objectPr return } -func newLocalTraverser(fullPath string, recursive bool, followSymlinks bool, incrementEnumerationCounter enumerationCounterFunc) *localTraverser { +func newLocalTraverser(ctx context.Context, fullPath string, recursive bool, followSymlinks bool, incrementEnumerationCounter enumerationCounterFunc, errorChannel chan ErrorFileInfo) *localTraverser { traverser := localTraverser{ fullPath: cleanLocalPath(fullPath), recursive: recursive, followSymlinks: followSymlinks, - incrementEnumerationCounter: incrementEnumerationCounter} + appCtx: ctx, + incrementEnumerationCounter: incrementEnumerationCounter, + errorChannel: errorChannel} return &traverser } diff --git a/cmd/zt_generic_service_traverser_test.go b/cmd/zt_generic_service_traverser_test.go index 06683819f..a593db99f 100644 --- a/cmd/zt_generic_service_traverser_test.go +++ b/cmd/zt_generic_service_traverser_test.go @@ -1,6 +1,8 @@ package cmd import ( + "context" + "github.com/Azure/azure-storage-blob-go/azblob" "github.com/Azure/azure-storage-file-go/azfile" chk "gopkg.in/check.v1" @@ -56,7 +58,7 @@ func (s *genericTraverserSuite) TestBlobFSServiceTraverserWithManyObjects(c *chk scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, objectList) // Create a local traversal - localTraverser := newLocalTraverser(dstDirName, true, true, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), dstDirName, true, true, func(common.EntityType) {}, nil) // Invoke the traversal with an indexer so the results are indexed for easy validation localIndexer := newObjectIndexer() @@ -172,7 +174,7 @@ func (s *genericTraverserSuite) TestServiceTraverserWithManyObjects(c *chk.C) { scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, objectList) // Create a local traversal - localTraverser := newLocalTraverser(dstDirName, true, true, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), dstDirName, true, true, func(common.EntityType) {}, nil) // Invoke the traversal with an indexer so the results are indexed for easy validation localIndexer := newObjectIndexer() @@ -356,7 +358,7 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, objectList) // Create a local traversal - localTraverser := newLocalTraverser(dstDirName, true, true, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), dstDirName, true, true, func(common.EntityType) {}, nil) // Invoke the traversal with an indexer so the results are indexed for easy validation localIndexer := newObjectIndexer() diff --git a/cmd/zt_generic_traverser_test.go b/cmd/zt_generic_traverser_test.go index a92c20c5c..d677f98a3 100644 --- a/cmd/zt_generic_traverser_test.go +++ b/cmd/zt_generic_traverser_test.go @@ -261,7 +261,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinks_ToFolder(c *chk.C) { fileCount := 0 sawLinkTargetDir := false - c.Assert(WalkWithSymlinks(tmpDir, func(path string, fi os.FileInfo, err error) error { + c.Assert(WalkWithSymlinks(context.TODO(), tmpDir, func(path string, fi os.FileInfo, err error) error { c.Assert(err, chk.IsNil) if fi.IsDir() { @@ -276,7 +276,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinks_ToFolder(c *chk.C) { fileCount++ return nil }, - true), chk.IsNil) + true, nil), chk.IsNil) // 3 files live in base, 3 files live in symlink c.Assert(fileCount, chk.Equals, 6) @@ -331,7 +331,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksBreakLoop(c *chk.C) { // Only 3 files should ever be found. // This is because the symlink links back to the root dir fileCount := 0 - c.Assert(WalkWithSymlinks(tmpDir, func(path string, fi os.FileInfo, err error) error { + c.Assert(WalkWithSymlinks(context.TODO(), tmpDir, func(path string, fi os.FileInfo, err error) error { c.Assert(err, chk.IsNil) if fi.IsDir() { @@ -341,7 +341,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksBreakLoop(c *chk.C) { fileCount++ return nil }, - true), chk.IsNil) + true, nil), chk.IsNil) c.Assert(fileCount, chk.Equals, 3) } @@ -361,7 +361,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksDedupe(c *chk.C) { // Only 6 files should ever be found. // 3 in the root dir, 3 in subdir, then symlinkdir should be ignored because it's been seen. fileCount := 0 - c.Assert(WalkWithSymlinks(tmpDir, func(path string, fi os.FileInfo, err error) error { + c.Assert(WalkWithSymlinks(context.TODO(), tmpDir, func(path string, fi os.FileInfo, err error) error { c.Assert(err, chk.IsNil) if fi.IsDir() { @@ -371,7 +371,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksDedupe(c *chk.C) { fileCount++ return nil }, - true), chk.IsNil) + true, nil), chk.IsNil) c.Assert(fileCount, chk.Equals, 6) } @@ -392,7 +392,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksMultitarget(c *chk.C) { trySymlink(filepath.Join(tmpDir, "extradir"), filepath.Join(tmpDir, "linktolink"), c) fileCount := 0 - c.Assert(WalkWithSymlinks(tmpDir, func(path string, fi os.FileInfo, err error) error { + c.Assert(WalkWithSymlinks(context.TODO(), tmpDir, func(path string, fi os.FileInfo, err error) error { c.Assert(err, chk.IsNil) if fi.IsDir() { @@ -402,7 +402,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksMultitarget(c *chk.C) { fileCount++ return nil }, - true), chk.IsNil) + true, nil), chk.IsNil) // 3 files live in base, 3 files live in first symlink, second & third symlink is ignored. c.Assert(fileCount, chk.Equals, 6) @@ -425,7 +425,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksToParentAndChild(c *chk.C) { trySymlink(child, filepath.Join(root1, "tochild"), c) fileCount := 0 - c.Assert(WalkWithSymlinks(root1, func(path string, fi os.FileInfo, err error) error { + c.Assert(WalkWithSymlinks(context.TODO(), root1, func(path string, fi os.FileInfo, err error) error { c.Assert(err, chk.IsNil) if fi.IsDir() { @@ -435,7 +435,7 @@ func (s *genericTraverserSuite) TestWalkWithSymlinksToParentAndChild(c *chk.C) { fileCount++ return nil }, - true), chk.IsNil) + true, nil), chk.IsNil) // 6 files total live under toroot. tochild should be ignored (or if tochild was traversed first, child will be ignored on toroot). c.Assert(fileCount, chk.Equals, 6) @@ -484,7 +484,7 @@ func (s *genericTraverserSuite) TestTraverserWithSingleObject(c *chk.C) { scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, blobList) // construct a local traverser - localTraverser := newLocalTraverser(filepath.Join(dstDirName, dstFileName), false, false, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), filepath.Join(dstDirName, dstFileName), false, false, func(common.EntityType) {}, nil) // invoke the local traversal with a dummy processor localDummyProcessor := dummyProcessor{} @@ -644,7 +644,7 @@ func (s *genericTraverserSuite) TestTraverserContainerAndLocalDirectory(c *chk.C // test two scenarios, either recursive or not for _, isRecursiveOn := range []bool{true, false} { // construct a local traverser - localTraverser := newLocalTraverser(dstDirName, isRecursiveOn, false, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), dstDirName, isRecursiveOn, false, func(common.EntityType) {}, nil) // invoke the local traversal with an indexer // so that the results are indexed for easy validation @@ -805,7 +805,7 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk // test two scenarios, either recursive or not for _, isRecursiveOn := range []bool{true, false} { // construct a local traverser - localTraverser := newLocalTraverser(filepath.Join(dstDirName, virDirName), isRecursiveOn, false, func(common.EntityType) {}) + localTraverser := newLocalTraverser(context.TODO(), filepath.Join(dstDirName, virDirName), isRecursiveOn, false, func(common.EntityType) {}, nil) // invoke the local traversal with an indexer // so that the results are indexed for easy validation diff --git a/common/parallel/FileSystemCrawler.go b/common/parallel/FileSystemCrawler.go index aaf41a282..f8cf4a4a3 100644 --- a/common/parallel/FileSystemCrawler.go +++ b/common/parallel/FileSystemCrawler.go @@ -63,7 +63,9 @@ func CrawlLocalDirectory(ctx context.Context, root string, parallelism int, read // (whereas with filepath.Walk it will usually (always?) have a value). // 2. If the return value of walkFunc function is not nil, enumeration will always stop, not matter what the type of the error. // (Unlike filepath.WalkFunc, where returning filePath.SkipDir is handled as a special case). -func Walk(root string, parallelism int, parallelStat bool, walkFn filepath.WalkFunc) { +func Walk(appCtx context.Context, root string, parallelism int, parallelStat bool, walkFn filepath.WalkFunc) { + var ctx context.Context + var cancel context.CancelFunc signalRootError := func(e error) { _ = walkFn(root, nil, e) } @@ -96,7 +98,8 @@ func Walk(root string, parallelism int, parallelStat bool, walkFn filepath.WalkF // walk the stuff inside the root reader, remainingParallelism := NewDirReader(parallelism, parallelStat) defer reader.Close() - ctx, cancel := context.WithCancel(context.Background()) + + ctx, cancel = context.WithCancel(appCtx) ch := CrawlLocalDirectory(ctx, root, remainingParallelism, reader) for crawlResult := range ch { entry, err := crawlResult.Item() @@ -104,7 +107,13 @@ func Walk(root string, parallelism int, parallelStat bool, walkFn filepath.WalkF fsEntry := entry.(FileSystemEntry) err = walkFn(fsEntry.fullPath, fsEntry.info, nil) } else { - err = walkFn("", nil, err) // cannot supply path here, because crawlResult probably doesn't have one, due to the error + // Our directory scanners can enqueue FileSystemEntry items with potentially full path and fileInfo for failures encountered during enumeration. + // If the entry is valid we pass those to caller. + if fsEntry, ok := entry.(FileSystemEntry); ok { + err = walkFn(fsEntry.fullPath, fsEntry.info, err) + } else { + err = walkFn("", nil, err) // cannot supply path here, because crawlResult probably doesn't have one, due to the error + } } if err != nil { cancel() @@ -119,7 +128,11 @@ func enumerateOneFileSystemDirectory(dir Directory, enqueueDir func(Directory), d, err := os.Open(dirString) // for directories, we don't need a special open with FILE_FLAG_BACKUP_SEMANTICS, because directory opening uses FindFirst which doesn't need that flag. https://blog.differentpla.net/blog/2007/05/25/findfirstfile-and-se_backup_name if err != nil { - return err + // FileInfo value being nil should mean that the FileSystemEntry refers to a directory. + enqueueOutput(FileSystemEntry{dirString, nil}, err) + + // Since we have already enqueued the failed enumeration entry, return nil error to avoid duplicate queueing by workerLoop(). + return nil } defer d.Close() @@ -132,18 +145,23 @@ func enumerateOneFileSystemDirectory(dir Directory, enqueueDir func(Directory), } return nil } else if err != nil { - return err + // FileInfo value being nil should mean that the FileSystemEntry refers to a directory. + enqueueOutput(FileSystemEntry{dirString, nil}, err) + + // Since we have already enqueued the failed enumeration entry, return nil error to avoid duplicate queueing by workerLoop(). + return nil } for _, childInfo := range list { - if failable, ok := childInfo.(failableFileInfo); ok && failable.Error() != nil { - // while Readdir as a whole did not fail, this particular file info did - enqueueOutput(FileSystemEntry{}, failable.Error()) - continue - } childEntry := FileSystemEntry{ fullPath: filepath.Join(dirString, childInfo.Name()), info: childInfo, } + + if failable, ok := childInfo.(failableFileInfo); ok && failable.Error() != nil { + // while Readdir as a whole did not fail, this particular file info did + enqueueOutput(childEntry, failable.Error()) + continue + } isSymlink := childInfo.Mode()&os.ModeSymlink != 0 // for compatibility with filepath.Walk, we do not follow symlinks, but we do enqueue them as output if childInfo.IsDir() && !isSymlink { enqueueDir(childEntry.fullPath) diff --git a/common/parallel/zt_FileSystemCrawlerTest_test.go b/common/parallel/zt_FileSystemCrawlerTest_test.go index 21afcca27..af184c4b3 100644 --- a/common/parallel/zt_FileSystemCrawlerTest_test.go +++ b/common/parallel/zt_FileSystemCrawlerTest_test.go @@ -22,12 +22,13 @@ package parallel import ( "context" - chk "gopkg.in/check.v1" "os" "path/filepath" "runtime" "strings" "testing" + + chk "gopkg.in/check.v1" ) // Hookup to the testing framework @@ -61,7 +62,7 @@ func (s *fileSystemCrawlerSuite) TestParallelEnumerationFindsTheRightFiles(c *ch // our parallel walk parallelResults := make(map[string]struct{}) - Walk(dir, 16, false, func(path string, _ os.FileInfo, fileErr error) error { + Walk(context.TODO(), dir, 16, false, func(path string, _ os.FileInfo, fileErr error) error { if fileErr == nil { parallelResults[path] = struct{}{} } @@ -122,7 +123,7 @@ func (s *fileSystemCrawlerSuite) doTestParallelEnumerationGetsTheRightFileInfo(p // our parallel walk parallelResults := make(map[string]os.FileInfo) - Walk(dir, 64, parallelStat, func(path string, fi os.FileInfo, fileErr error) error { + Walk(context.TODO(), dir, 64, parallelStat, func(path string, fi os.FileInfo, fileErr error) error { if fileErr == nil { parallelResults[path] = fi } @@ -174,7 +175,7 @@ func (s *fileSystemCrawlerSuite) doTestParallelEnumerationGetsTheRightFileInfo(p func (s *fileSystemCrawlerSuite) TestRootErrorsAreSignalled(c *chk.C) { receivedError := false nonExistentDir := filepath.Join(os.TempDir(), "Big random-named directory that almost certainly doesn't exist 85784362628398473732827384") - Walk(nonExistentDir, 16, false, func(path string, _ os.FileInfo, fileErr error) error { + Walk(context.TODO(), nonExistentDir, 16, false, func(path string, _ os.FileInfo, fileErr error) error { if fileErr != nil && path == nonExistentDir { receivedError = true } From 42f9d06f342955fff4d73bcdeeea55d45f2dbef9 Mon Sep 17 00:00:00 2001 From: Narasimha Kulkarni Date: Mon, 27 Jun 2022 02:00:47 -0700 Subject: [PATCH 17/26] Fix race between Updating JobPart and xferDoneMsg (#1814) * Fix race between Updating JobPart and xferDoneMsg * Rename variable to more appropriate one * Close xferDone channel * Add missing continue * Fix rebase issue --- ste/jobStatusManager.go | 81 +++++++++++++++++++++++++++++++---------- ste/mgr-JobMgr.go | 15 +++++--- 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/ste/jobStatusManager.go b/ste/jobStatusManager.go index edd5f556f..ca0ad84b2 100755 --- a/ste/jobStatusManager.go +++ b/ste/jobStatusManager.go @@ -23,8 +23,6 @@ package ste import ( "time" - "github.com/Azure/azure-pipeline-go/pipeline" - "github.com/Azure/azure-storage-azcopy/v10/common" ) @@ -38,17 +36,35 @@ type JobPartCreatedMsg struct { type xferDoneMsg = common.TransferDetail type jobStatusManager struct { - js common.ListJobSummaryResponse - respChan chan common.ListJobSummaryResponse - listReq chan bool - partCreated chan JobPartCreatedMsg - xferDone chan xferDoneMsg - done chan struct{} + js common.ListJobSummaryResponse + respChan chan common.ListJobSummaryResponse + listReq chan struct{} + partCreated chan JobPartCreatedMsg + xferDone chan xferDoneMsg + xferDoneDrained chan struct{} // To signal that all xferDone have been processed + statusMgrDone chan struct{} // To signal statusManager has closed +} + +func (jm *jobMgr) waitToDrainXferDone() { + <-jm.jstm.xferDoneDrained +} + +func (jm *jobMgr) statusMgrClosed() bool { + select { + case <-jm.jstm.statusMgrDone: + return true + default: + return false + } } /* These functions should not fail */ func (jm *jobMgr) SendJobPartCreatedMsg(msg JobPartCreatedMsg) { jm.jstm.partCreated <- msg + if msg.IsFinalPart { + //Inform statusManager that this is all parts we've + close(jm.jstm.partCreated) + } } func (jm *jobMgr) SendXferDoneMsg(msg xferDoneMsg) { @@ -56,29 +72,40 @@ func (jm *jobMgr) SendXferDoneMsg(msg xferDoneMsg) { } func (jm *jobMgr) ListJobSummary() common.ListJobSummaryResponse { - jm.jstm.listReq <- true - return <-jm.jstm.respChan + if jm.statusMgrClosed() { + return jm.jstm.js + } + + select { + case jm.jstm.listReq <- struct{}{}: + return <-jm.jstm.respChan + case <-jm.jstm.statusMgrDone: + // StatusManager closed while we requested for an update. + // Return the last update. This is okay because there will + // be no further updates. + return jm.jstm.js + } } func (jm *jobMgr) ResurrectSummary(js common.ListJobSummaryResponse) { jm.jstm.js = js } -func (jm *jobMgr) CleanupJobStatusMgr() { - jm.Log(pipeline.LogInfo, "CleanJobStatusMgr called.") - jm.jstm.done <- struct{}{} -} - func (jm *jobMgr) handleStatusUpdateMessage() { jstm := jm.jstm js := &jstm.js js.JobID = jm.jobID js.CompleteJobOrdered = false js.ErrorMsg = "" + allXferDoneHandled := false for { select { - case msg := <-jstm.partCreated: + case msg, ok := <-jstm.partCreated: + if !ok { + jstm.partCreated = nil + continue + } js.CompleteJobOrdered = js.CompleteJobOrdered || msg.IsFinalPart js.TotalTransfers += msg.TotalTransfers js.FileTransfers += msg.FileTransfers @@ -86,7 +113,16 @@ func (jm *jobMgr) handleStatusUpdateMessage() { js.TotalBytesEnumerated += msg.TotalBytesEnumerated js.TotalBytesExpected += msg.TotalBytesEnumerated - case msg := <-jstm.xferDone: + case msg, ok := <-jstm.xferDone: + if !ok { //Channel is closed, all transfers have been attended. + jstm.xferDone = nil + + //close drainXferDone so that other components can know no further updates happen + allXferDoneHandled = true + close(jstm.xferDoneDrained) + continue + } + msg.Src = common.URLStringExtension(msg.Src).RedactSecretQueryParamForLogging() msg.Dst = common.URLStringExtension(msg.Dst).RedactSecretQueryParamForLogging() @@ -115,9 +151,14 @@ func (jm *jobMgr) handleStatusUpdateMessage() { js.FailedTransfers = []common.TransferDetail{} js.SkippedTransfers = []common.TransferDetail{} - case <-jstm.done: - jm.Log(pipeline.LogInfo, "Cleanup JobStatusmgr.") - return + if allXferDoneHandled { + close(jstm.statusMgrDone) + close(jstm.respChan) + close(jstm.listReq) + jstm.listReq = nil + jstm.respChan = nil + return + } } } } diff --git a/ste/mgr-JobMgr.go b/ste/mgr-JobMgr.go index 2920b83ae..92aebe3db 100755 --- a/ste/mgr-JobMgr.go +++ b/ste/mgr-JobMgr.go @@ -104,7 +104,6 @@ type IJobMgr interface { // Cleanup Functions DeferredCleanupJobMgr() - CleanupJobStatusMgr() } // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -138,10 +137,11 @@ func NewJobMgr(concurrency ConcurrencySettings, jobID common.JobID, appCtx conte jobPartProgressCh := make(chan jobPartProgressInfo) var jstm jobStatusManager jstm.respChan = make(chan common.ListJobSummaryResponse) - jstm.listReq = make(chan bool) + jstm.listReq = make(chan struct{}) jstm.partCreated = make(chan JobPartCreatedMsg, 100) jstm.xferDone = make(chan xferDoneMsg, 1000) - jstm.done = make(chan struct{}, 1) + jstm.xferDoneDrained = make(chan struct{}) + jstm.statusMgrDone = make(chan struct{}) // Different logger for each job. if jobLogger == nil { jobLogger = common.NewJobLogger(jobID, common.ELogLevel.Debug(), logFileFolder, "" /* logFileNameSuffix */) @@ -643,6 +643,12 @@ func (jm *jobMgr) reportJobPartDoneHandler() { isCancelling := jobStatus == common.EJobStatus.Cancelling() shouldComplete := allKnownPartsDone && (haveFinalPart || isCancelling) if shouldComplete { + // Inform StatusManager that all parts are done. + close(jm.jstm.xferDone) + // Wait for all XferDone messages to be processed by statusManager. Front end + // depends on JobStatus to determine if we've to quit job. Setting it here without + // draining XferDone will make it report incorrect statistics. + jm.waitToDrainXferDone() partDescription := "all parts of entire Job" if !haveFinalPart { partDescription = "known parts of incomplete Job" @@ -724,9 +730,6 @@ func (jm *jobMgr) DeferredCleanupJobMgr() { // This will take care of any jobPartMgr release. jm.Cancel() - // Cleanup the JobStatusMgr go routine. - jm.CleanupJobStatusMgr() - // Transfer Thread Cleanup. jm.cleanupTransferRoutine() From 265ac44957aa396328e151c772f9c19f2f5d5f8b Mon Sep 17 00:00:00 2001 From: Ze Qian Zhang Date: Mon, 27 Jun 2022 11:29:12 -0700 Subject: [PATCH 18/26] Minor doc update for copy using oauth (#1828) --- cmd/helpMessages.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index 59c788d7e..af360def9 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -134,7 +134,7 @@ Copy a single blob to another blob by using a SAS token. - azcopy cp "https://[srcaccount].blob.core.windows.net/[container]/[path/to/blob]?[SAS]" "https://[destaccount].blob.core.windows.net/[container]/[path/to/blob]?[SAS]" -Copy a single blob to another blob by using a SAS token and an OAuth token. You have to use a SAS token at the end of the source account URL, but the destination account doesn't need one if you log into AzCopy by using the azcopy login command. +Copy a single blob to another blob by using a SAS token and an OAuth token. You have to use a SAS token at the end of the source account URL if you do not have the right permissions to read it with the identity used for login. - azcopy cp "https://[srcaccount].blob.core.windows.net/[container]/[path/to/blob]?[SAS]" "https://[destaccount].blob.core.windows.net/[container]/[path/to/blob]" From 134d2b662a7875a3fc7eece26cf6115310f419b4 Mon Sep 17 00:00:00 2001 From: Ze Qian Zhang Date: Wed, 29 Jun 2022 23:14:54 -0700 Subject: [PATCH 19/26] Edit help message for set-properties (#1836) --- cmd/helpMessages.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index af360def9..c475614bf 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -540,28 +540,28 @@ Sets properties of Blob, BlobFS, and File storage. The properties currently supp const setPropertiesCmdExample = ` Change tier of blob to hot: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=hot + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=hot Change tier of blob from archive to cool with rehydrate priority set to high: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=cool --rehydrate-priority=high + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --block-blob-tier=cool --rehydrate-priority=high Change tier of all files in a directory to archive: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --block-blob-tier=archive --recursive=true + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --block-blob-tier=archive --recursive=true Change metadata of blob to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=abc=def;ghi=jkl + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=abc=def;ghi=jkl Change metadata of all files in a directory to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --metadata=abc=def;ghi=jkl --recursive=true + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/virtual/dir]" --metadata=abc=def;ghi=jkl --recursive=true Clear all existing metadata of blob: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=clear + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --metadata=clear Change blob-tags of blob to {key = "abc", val = "def"} and {key = "ghi", val = "jkl"}: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=abc=def&ghi=jkl + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=abc=def&ghi=jkl - While setting tags on the blobs, there are additional permissions('t' for tags) in SAS without which the service will give authorization error back. Clear all existing blob-tags of blob: - - azcopy setprops "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=clear + - azcopy set-properties "https://[account].blob.core.windows.net/[container]/[path/to/blob]" --blob-tags=clear - While setting tags on the blobs, there are additional permissions('t' for tags) in SAS without which the service will give authorization error back. ` From 05f98cf9ff0dde9b6f423be5dd161107e76d4800 Mon Sep 17 00:00:00 2001 From: Narasimha Kulkarni Date: Wed, 29 Jun 2022 23:18:33 -0700 Subject: [PATCH 20/26] Improve error if we cannot determine if source is a directory (#1412) * Improve error if we cannot determine if source is a directory * Replace ste logger with scanning logger * Undo white space changes * Fix CI --- cmd/zc_traverser_blob.go | 26 ++++++++++++++++++++++++-- cmd/zt_sync_blob_blob_test.go | 2 +- testSuite/scripts/test_blob_sync.py | 4 ++-- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cmd/zc_traverser_blob.go b/cmd/zc_traverser_blob.go index ca98db05c..92332fdf7 100644 --- a/cmd/zc_traverser_blob.go +++ b/cmd/zc_traverser_blob.go @@ -72,7 +72,7 @@ func (t *blobTraverser) IsDirectory(isSource bool) bool { return isDirDirect } - _, isSingleBlob, _, err := t.getPropertiesIfSingleBlob() + _, _, isDirStub, err := t.getPropertiesIfSingleBlob() if stgErr, ok := err.(azblob.StorageError); ok { // We know for sure this is a single blob still, let it walk on through to the traverser. @@ -81,7 +81,29 @@ func (t *blobTraverser) IsDirectory(isSource bool) bool { } } - return !isSingleBlob + if err == nil { + return isDirStub + } + + blobURLParts := azblob.NewBlobURLParts(*t.rawURL) + containerRawURL := copyHandlerUtil{}.getContainerUrl(blobURLParts) + containerURL := azblob.NewContainerURL(containerRawURL, t.p) + searchPrefix := strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING) + common.AZCOPY_PATH_SEPARATOR_STRING + resp, err := containerURL.ListBlobsFlatSegment(t.ctx, azblob.Marker{}, azblob.ListBlobsSegmentOptions{Prefix: searchPrefix, MaxResults: 1}) + if err != nil { + if azcopyScanningLogger != nil { + msg := fmt.Sprintf("Failed to check if the destination is a folder or a file (Azure Files). Assuming the destination is a file: %s", err) + azcopyScanningLogger.Log(pipeline.LogError, msg) + } + return false + } + + if len(resp.Segment.BlobItems) == 0 { + //Not a directory + return false + } + + return true } func (t *blobTraverser) getPropertiesIfSingleBlob() (props *azblob.BlobGetPropertiesResponse, isBlob bool, isDirStub bool, err error) { diff --git a/cmd/zt_sync_blob_blob_test.go b/cmd/zt_sync_blob_blob_test.go index be3f9d442..372403466 100644 --- a/cmd/zt_sync_blob_blob_test.go +++ b/cmd/zt_sync_blob_blob_test.go @@ -482,7 +482,7 @@ func (s *cmdIntegrationSuite) TestSyncS2SContainerAndEmptyVirtualDir(c *chk.C) { // construct the raw input to simulate user input srcContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, srcContainerName) - dstVirtualDirURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, dstContainerName, "emptydir") + dstVirtualDirURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, dstContainerName, "emptydir/") raw := getDefaultSyncRawInput(srcContainerURLWithSAS.String(), dstVirtualDirURLWithSAS.String()) // verify that targeting a virtual directory works fine diff --git a/testSuite/scripts/test_blob_sync.py b/testSuite/scripts/test_blob_sync.py index 98538c41f..231dc6ac9 100644 --- a/testSuite/scripts/test_blob_sync.py +++ b/testSuite/scripts/test_blob_sync.py @@ -71,7 +71,7 @@ def test_sync_entire_directory_with_local(self): # sync to local src = vdir_sas - dst = dir_path + dst = dir_path + "/" result = util.Command("sync").add_arguments(src).add_arguments(dst).add_flags("log-level", "info")\ .execute_azcopy_copy_command() self.assertTrue(result) @@ -116,7 +116,7 @@ def test_sync_entire_vdir_to_vdir(self): content_dir_name = "dir_sync_test" content_dir_path = util.create_test_n_files(1024, 10, content_dir_name) src_vdir_path = util.get_resource_sas("srcdir") - dst_vdir_path = util.get_resource_sas("dstdir") + dst_vdir_path = util.get_resource_sas("dstdir/") # create sub-directory inside directory sub_dir_name = os.path.join(content_dir_name, "sub_dir_sync_test") From b990b7bd897b160506a943c1eeb431b6b4ded7ea Mon Sep 17 00:00:00 2001 From: Mohit Sharma <65536214+mohsha-msft@users.noreply.github.com> Date: Mon, 4 Jul 2022 09:05:27 +0530 Subject: [PATCH 21/26] "dfs" to "blob" endpoint conversion (#1839) * DFS to Blob endpoint conversion. * DFS to Blob endpoint conversion. * DFS to Blob endpoint conversion. Co-authored-by: Mohit Sharma --- cmd/helpMessages.go | 1 + cmd/setProperties.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index c475614bf..277596697 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -536,6 +536,7 @@ Sets properties of Blob, BlobFS, and File storage. The properties currently supp Blobs -> Tier, Metadata, Tags BlobFS -> Tier, Metadata, Tags Files -> Metadata +Note: dfs endpoints will be replaced by blob endpoints. ` const setPropertiesCmdExample = ` diff --git a/cmd/setProperties.go b/cmd/setProperties.go index 44032d4c0..e1845b513 100644 --- a/cmd/setProperties.go +++ b/cmd/setProperties.go @@ -100,6 +100,11 @@ func init() { //the resource to set properties of is set as src raw.src = args[0] + // We support DFS by using blob end-point of the account. We replace dfs by blob in src and dst + if src := InferArgumentLocation(raw.src); src == common.ELocation.BlobFS() { + raw.src = strings.Replace(raw.src, ".dfs", ".blob", 1) + glcm.Info("Switching to use blob endpoint on source account.") + } srcLocationType := InferArgumentLocation(raw.src) if raw.fromTo == "" { From eb334976b0beb6abb8546907409e5e1f2f633437 Mon Sep 17 00:00:00 2001 From: Narasimha Kulkarni Date: Tue, 5 Jul 2022 04:01:43 -0700 Subject: [PATCH 22/26] Fix jobs list (#1795) --- jobsAdmin/JobsAdmin.go | 37 +++++++++++++++++++++++++++++++++++++ jobsAdmin/init.go | 32 +------------------------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/jobsAdmin/JobsAdmin.go b/jobsAdmin/JobsAdmin.go index f71a48e68..6a9143956 100755 --- a/jobsAdmin/JobsAdmin.go +++ b/jobsAdmin/JobsAdmin.go @@ -103,6 +103,7 @@ var JobsAdmin interface { // JobMgrCleanUp do the JobMgr cleanup. JobMgrCleanUp(jobId common.JobID) + ListJobs(givenStatus common.JobStatus) common.ListJobsResponse } func initJobsAdmin(appCtx context.Context, concurrency ste.ConcurrencySettings, targetRateInMegaBitsPerSec float64, azcopyJobPlanFolder string, azcopyLogPathFolder string, providePerfAdvice bool) { @@ -425,6 +426,42 @@ func (ja *jobsAdmin) ResurrectJobParts() { } } +func (ja *jobsAdmin) ListJobs(givenStatus common.JobStatus) common.ListJobsResponse { + ret := common.ListJobsResponse{JobIDDetails: []common.JobIDDetails{}} + files := func(ext string) []os.FileInfo { + var files []os.FileInfo + filepath.Walk(ja.planDir, func(path string, fileInfo os.FileInfo, _ error) error { + if !fileInfo.IsDir() && strings.HasSuffix(fileInfo.Name(), ext) { + files = append(files, fileInfo) + } + return nil + }) + return files + }(fmt.Sprintf(".steV%d", ste.DataSchemaVersion)) + + // TODO : sort the file. + for f := 0; f < len(files); f++ { + planFile := ste.JobPartPlanFileName(files[f].Name()) + jobID, partNum, err := planFile.Parse() + if err != nil || partNum != 0 { // Summary is in 0th JobPart + continue + } + + mmf := planFile.Map() + jpph := mmf.Plan() + + if givenStatus == common.EJobStatus.All() || givenStatus == jpph.JobStatus() { + ret.JobIDDetails = append(ret.JobIDDetails, + common.JobIDDetails{JobId: jobID, CommandString: jpph.CommandString(), + StartTime: jpph.StartTime, JobStatus: jpph.JobStatus()}) + } + + mmf.Unmap() + } + + return ret +} + func (ja *jobsAdmin) SetConcurrencySettingsToAuto() { // Setting initial pool size to 4 and max pool size to 3,000 ja.concurrency.InitialMainPoolSize = 4 diff --git a/jobsAdmin/init.go b/jobsAdmin/init.go index c1d3a1f48..0c40bcaef 100755 --- a/jobsAdmin/init.go +++ b/jobsAdmin/init.go @@ -736,37 +736,7 @@ func GetJobLCMWrapper(jobID common.JobID) common.LifecycleMgr { // ListJobs returns the jobId of all the jobs existing in the current instance of azcopy func ListJobs(givenStatus common.JobStatus) common.ListJobsResponse { - // Resurrect all the Jobs from the existing JobPart Plan files - JobsAdmin.ResurrectJobParts() - // building the ListJobsResponse for sending response back to front-end - jobIds := JobsAdmin.JobIDs() - // Silently ignore if no JobIDs are present. - if len(jobIds) == 0 { - return common.ListJobsResponse{} - } - listJobResponse := common.ListJobsResponse{JobIDDetails: []common.JobIDDetails{}} - for _, jobId := range jobIds { - jm, found := JobsAdmin.JobMgr(jobId) - if !found { - continue - } - jpm, found := jm.JobPartMgr(0) - if !found { - continue - } - if givenStatus == common.EJobStatus.All() || givenStatus == jpm.Plan().JobStatus() { - listJobResponse.JobIDDetails = append(listJobResponse.JobIDDetails, - common.JobIDDetails{JobId: jobId, CommandString: jpm.Plan().CommandString(), - StartTime: jpm.Plan().StartTime, JobStatus: jpm.Plan().JobStatus()}) - } - - // Close the job part managers and the log. - jm.IterateJobParts(false, func(k common.PartNumber, v ste.IJobPartMgr) { - v.Close() - }) - jm.CloseLog() - } - return listJobResponse + return JobsAdmin.ListJobs(givenStatus) } // GetJobFromTo api returns the job FromTo info. From 4ad3e88556dd9b1e76400f354676d3bd2de47aa0 Mon Sep 17 00:00:00 2001 From: adreed-msft <49764384+adreed-msft@users.noreply.github.com> Date: Thu, 7 Jul 2022 15:02:43 -0700 Subject: [PATCH 23/26] Fix chunk writer alignment (StgExp) (#1819) --- common/chunkedFileWriter.go | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/common/chunkedFileWriter.go b/common/chunkedFileWriter.go index 3a515c33c..63ca0ce4c 100644 --- a/common/chunkedFileWriter.go +++ b/common/chunkedFileWriter.go @@ -58,10 +58,17 @@ type chunkedFileWriter struct { // NOTE: for the 64 bit atomic functions to work on a 32 bit system, we have to guarantee the right 64-bit alignment // so the 64 bit integers are placed first in the struct to avoid future breaks // refer to: https://golang.org/pkg/sync/atomic/#pkg-note-BUG + currentReservedCapacity int64 + // all time received count for this instance totalChunkReceiveMilliseconds int64 totalReceivedChunkCount int32 + // used to control scheduling of new chunks against this file, + // to make sure we don't get too many sitting in RAM all waiting to be + // saved at the same time + activeChunkCount int32 + // the file we are writing to (type as interface to somewhat abstract away io.File - e.g. for unit testing) file io.WriteCloser @@ -77,13 +84,8 @@ type chunkedFileWriter struct { // file chunks that have arrived and not been sorted yet newUnorderedChunks chan fileChunk - // used to control scheduling of new chunks against this file, - // to make sure we don't get too many sitting in RAM all waiting to be - // saved at the same time - activeChunkCount int32 - // used for completion - successMd5 chan []byte + successMd5 chan []byte chunkWriterDone chan bool // controls body-read retries. Public so value can be shared with retryReader @@ -94,9 +96,7 @@ type chunkedFileWriter struct { sourceMd5Exists bool - currentReservedCapacity int64 - - err error //This field should be set only by workerRoutine + err error // This field should be set only by workerRoutine } type fileChunk struct { @@ -146,7 +146,7 @@ func (w *chunkedFileWriter) WaitToScheduleChunk(ctx context.Context, id ChunkID, atomic.AddInt32(&w.activeChunkCount, 1) } return err - //At this point, the book-keeping of this memory is chunkedFileWriter's responsibility + // At this point, the book-keeping of this memory is chunkedFileWriter's responsibility } // Threadsafe method to enqueue a new chunk for processing @@ -164,11 +164,11 @@ func (w *chunkedFileWriter) EnqueueChunk(ctx context.Context, id ChunkID, chunkS // read into a buffer buffer := w.slicePool.RentSlice(chunkSize) - + defer func() { - //cleanup stuff if we abruptly quit + // cleanup stuff if we abruptly quit if err == nil { - return //We've successfully queued, the worker will now takeover + return // We've successfully queued, the worker will now takeover } w.cacheLimiter.Remove(chunkSize) // remove this from the tally of scheduled-but-unsaved bytes atomic.AddInt64(&w.currentReservedCapacity, -chunkSize) @@ -176,7 +176,7 @@ func (w *chunkedFileWriter) EnqueueChunk(ctx context.Context, id ChunkID, chunkS atomic.AddInt32(&w.activeChunkCount, -1) w.chunkLogger.LogChunkStatus(id, EWaitReason.ChunkDone()) // this chunk is all finished }() - + readStart := time.Now() _, err = io.ReadFull(chunkContents, buffer) close(readDone) @@ -214,13 +214,13 @@ func (w *chunkedFileWriter) Flush(ctx context.Context) ([]byte, error) { * * Why should we do this? * Ideally, the capacity should be zero here, because workerRoutine() would return - * the slice after saving the chunk. However, transferProcessor() is designed such that + * the slice after saving the chunk. However, transferProcessor() is designed such that * it has to schedule all chunks of jptm even if it has detected a failure in between. * In such a case, we'd have added to the capacity of the fileWriter, while the * workerRoutine() has already exited. We release that capacity here. When Flush() finds - * active chunks here, it is only those which have not rented a slice. + * active chunks here, it is only those which have not rented a slice. */ - defer func() { + defer func() { w.cacheLimiter.Remove(atomic.LoadInt64(&w.currentReservedCapacity)) }() @@ -255,7 +255,7 @@ func (w *chunkedFileWriter) workerRoutine(ctx context.Context) { } defer func() { - //cleanup stuff if we abruptly quit + // cleanup stuff if we abruptly quit for _, chunk := range unsavedChunksByFileOffset { w.cacheLimiter.Remove(int64(chunk.id.length)) // remove this from the tally of scheduled-but-unsaved bytes atomic.AddInt64(&w.currentReservedCapacity, -chunk.id.length) @@ -295,7 +295,7 @@ func (w *chunkedFileWriter) workerRoutine(ctx context.Context) { err := w.sequentiallyProcessAvailableChunks(unsavedChunksByFileOffset, &nextOffsetToSave, md5Hasher, ctx) if err != nil { w.err = err - return // no point in processing any more after a failure + return // no point in processing any more after a failure } } } @@ -313,7 +313,7 @@ func (w *chunkedFileWriter) sequentiallyProcessAvailableChunks(unsavedChunksByFi // Look for next chunk in sequence nextChunkInSequence, exists := unsavedChunksByFileOffset[*nextOffsetToSave] if !exists { - return nil //its not there yet. That's OK. + return nil // its not there yet. That's OK. } delete(unsavedChunksByFileOffset, *nextOffsetToSave) // remove it *nextOffsetToSave += int64(len(nextChunkInSequence.data)) // update immediately so we won't forget! @@ -339,7 +339,7 @@ func (w *chunkedFileWriter) setStatusForContiguousAvailableChunks(unsavedChunksB nextChunkInSequence, exists := unsavedChunksByFileOffset[nextOffsetToSave] if !exists { - return //its not there yet, so no need to touch anything AFTER it. THEY are still waiting for prior chunk + return // its not there yet, so no need to touch anything AFTER it. THEY are still waiting for prior chunk } nextOffsetToSave += int64(len(nextChunkInSequence.data)) w.chunkLogger.LogChunkStatus(nextChunkInSequence.id, EWaitReason.QueueToWrite()) // we WILL write this. Just may have to write others before it From 36aff75c2ab7c555a5d22ceebd7b0e5213698b80 Mon Sep 17 00:00:00 2001 From: Ze Qian Zhang Date: Wed, 13 Jul 2022 09:03:39 -0700 Subject: [PATCH 24/26] Minor doc update for set-properties to mark it as preview (#1847) --- cmd/helpMessages.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index 277596697..52d208dbe 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -528,13 +528,13 @@ Run an upload that does not delete the transferred files. (These files can then // ===================================== SET-PROPERTIES COMMAND ===================================== // -const setPropertiesCmdShortDescription = "Given a location, change all the valid system properties of that storage (blob or file)" +const setPropertiesCmdShortDescription = "(Preview) Given a location, change all the valid system properties of that storage (blob or file)" const setPropertiesCmdLongDescription = ` -Sets properties of Blob, BlobFS, and File storage. The properties currently supported by this command are: +(Preview) Sets properties of Blob, ADLS Gen2, and File storage. The properties currently supported by this command are: Blobs -> Tier, Metadata, Tags - BlobFS -> Tier, Metadata, Tags + ADLS Gen2 -> Tier, Metadata, Tags Files -> Metadata Note: dfs endpoints will be replaced by blob endpoints. ` From 8c5bd4fc94a4bba507c11a54a2cccab825fd8bc6 Mon Sep 17 00:00:00 2001 From: adreed-msft <49764384+adreed-msft@users.noreply.github.com> Date: Tue, 19 Jul 2022 00:28:05 -0700 Subject: [PATCH 25/26] Implement multi-auth for managed disks (SAS+OAuth) when the managed disk export account requests it (#1844) * MD OAuth (mostly) * Reimplement MD-OAuth as a credential type signifying resource --- cmd/copy.go | 2 +- cmd/copyEnumeratorInit.go | 18 ++++----- cmd/credentialUtil.go | 72 ++++++++++++++++++++++++++++++++--- cmd/jobsResume.go | 2 +- cmd/list.go | 2 +- cmd/zt_credentialUtil_test.go | 24 +++++++++++- common/credentialFactory.go | 9 ++++- common/fe-ste-models.go | 5 +++ common/oauthTokenManager.go | 45 ++++++++++++++++------ ste/mgr-JobPartTransferMgr.go | 5 ++- 10 files changed, 150 insertions(+), 34 deletions(-) diff --git a/cmd/copy.go b/cmd/copy.go index 83f8951ec..9e5c27a8a 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -1402,7 +1402,7 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { // For OAuthToken credential, assign OAuthTokenInfo to CopyJobPartOrderRequest properly, // the info will be transferred to STE. - if cca.credentialInfo.CredentialType == common.ECredentialType.OAuthToken() { + if cca.credentialInfo.CredentialType.IsAzureOAuth() { uotm := GetUserOAuthTokenManagerInstance() // Get token from env var or cache. if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index 4ed629090..5a5a688f1 100755 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -50,16 +50,16 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde if cca.FromTo.IsS2S() { jobPartOrder.S2SSourceCredentialType = srcCredInfo.CredentialType - } - if jobPartOrder.S2SSourceCredentialType == common.ECredentialType.OAuthToken() { - uotm := GetUserOAuthTokenManagerInstance() - // get token from env var or cache - if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { - return nil, err - } else { - cca.credentialInfo.OAuthTokenInfo = *tokenInfo - jobPartOrder.CredentialInfo.OAuthTokenInfo = *tokenInfo + if jobPartOrder.S2SSourceCredentialType.IsAzureOAuth() { + uotm := GetUserOAuthTokenManagerInstance() + // get token from env var or cache + if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { + return nil, err + } else { + cca.credentialInfo.OAuthTokenInfo = *tokenInfo + jobPartOrder.CredentialInfo.OAuthTokenInfo = *tokenInfo + } } } diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go index b727b6c2f..e1fefabb8 100644 --- a/cmd/credentialUtil.go +++ b/cmd/credentialUtil.go @@ -135,17 +135,62 @@ func GetOAuthTokenManagerInstance() (*common.UserOAuthTokenManager, error) { // 4. If there is OAuth token info passed from env var, indicating using token credential. (Note: this is only for testing) // 5. Otherwise use anonymous credential. // The implementation logic follows above rule, and adjusts sequence to save web request(for verifying public resource). -func getBlobCredentialType(ctx context.Context, blobResourceURL string, canBePublic bool, standaloneSAS bool, cpkOptions common.CpkOptions) (common.CredentialType, bool, error) { +func getBlobCredentialType(ctx context.Context, blobResourceURL string, canBePublic bool, standaloneSAS string, cpkOptions common.CpkOptions) (common.CredentialType, bool, error) { resourceURL, err := url.Parse(blobResourceURL) if err != nil { return common.ECredentialType.Unknown(), false, errors.New("provided blob resource string is not in URL format") } + if standaloneSAS != "" { + resourceURL.RawQuery = standaloneSAS + } + sas := azblob.NewBlobURLParts(*resourceURL).SAS + isMDAccount := strings.HasPrefix(resourceURL.Host, "md-") + canBePublic = canBePublic && !isMDAccount // MD accounts cannot be public. // If SAS existed, return anonymous credential type. - if isSASExisted := sas.Signature() != ""; isSASExisted || standaloneSAS { + if isSASExisted := sas.Signature() != ""; isSASExisted { + if isMDAccount { + // Ping the account anyway, and discern if we need OAuth. + p := azblob.NewPipeline( + azblob.NewAnonymousCredential(), + azblob.PipelineOptions{ + Retry: azblob.RetryOptions{ + Policy: azblob.RetryPolicyExponential, + MaxTries: ste.UploadMaxTries, + TryTimeout: ste.UploadTryTimeout, + RetryDelay: ste.UploadRetryDelay, + MaxRetryDelay: ste.UploadMaxRetryDelay, + }, + RequestLog: azblob.RequestLogOptions{ + SyslogDisabled: common.IsForceLoggingDisabled(), + }, + }) + + clientProvidedKey := azblob.ClientProvidedKeyOptions{} + if cpkOptions.IsSourceEncrypted { + clientProvidedKey = common.GetClientProvidedKey(cpkOptions) + } + + bURL := azblob.NewBlobURL(*resourceURL, p) + _, err := bURL.GetProperties(ctx, azblob.BlobAccessConditions{}, clientProvidedKey) + + if err != nil { + if stgErr, ok := err.(azblob.StorageError); ok { + if httpResp := stgErr.Response(); httpResp.StatusCode == 401 { + challenge := httpResp.Header.Get("WWW-Authenticate") + if strings.Contains(challenge, common.MDResource) { + return common.ECredentialType.MDOAuthToken(), false, nil + } + } + } + + return common.ECredentialType.Unknown(), false, err + } + } + return common.ECredentialType.Anonymous(), false, nil } @@ -205,6 +250,7 @@ func getBlobCredentialType(ctx context.Context, blobResourceURL string, canBePub // If SAS token doesn't exist, it could be using OAuth token or the resource is public. if !oAuthTokenExists() { // no oauth token found, then directly return anonymous credential + // MD accounts will auto-fail without a request due to the update of the "canBePublic" flag earlier isPublicResource := checkPublic() // No forms of auth are present.no SAS token or OAuth token is present and the resource is not public @@ -373,6 +419,7 @@ func checkAuthSafeForTarget(ct common.CredentialType, resource, extraSuffixesAAD // these auth types don't pick up anything from environment vars, so they are not the focus of this routine return nil case common.ECredentialType.OAuthToken(), + common.ECredentialType.MDOAuthToken(), common.ECredentialType.SharedKey(): // Files doesn't currently support OAuth, but it's a valid azure endpoint anyway, so it'll pass the check. if resourceType != common.ELocation.Blob() && resourceType != common.ELocation.BlobFS() && resourceType != common.ELocation.File() { @@ -462,6 +509,8 @@ func logAuthType(ct common.CredentialType, location common.Location, isSource bo name := ct.String() if ct == common.ECredentialType.OAuthToken() { name = "Azure AD" // clarify the name to something users will recognize + } else if ct == common.ECredentialType.MDOAuthToken() { + name = "Azure AD (Managed Disk)" } message := fmt.Sprintf("Authenticating to %s using %s", resource, name) if _, exists := authMessagesAlreadyLogged.Load(message); !exists { @@ -480,14 +529,22 @@ func getCredentialTypeForLocation(ctx context.Context, location common.Location, } func doGetCredentialTypeForLocation(ctx context.Context, location common.Location, resource, resourceSAS string, isSource bool, getForcedCredType func() common.CredentialType, cpkOptions common.CpkOptions) (credType common.CredentialType, isPublic bool, err error) { - if resourceSAS != "" { + mdAccount := false + if location == common.ELocation.Blob() { + uri, _ := url.Parse(resource) + if strings.HasPrefix(uri.Host, "md-") { + mdAccount = true + } + } + + if resourceSAS != "" && !mdAccount { credType = common.ECredentialType.Anonymous() } else if credType = getForcedCredType(); credType == common.ECredentialType.Unknown() || location == common.ELocation.S3() || location == common.ELocation.GCP() { switch location { case common.ELocation.Local(), common.ELocation.Benchmark(): credType = common.ECredentialType.Anonymous() case common.ELocation.Blob(): - credType, isPublic, err = getBlobCredentialType(ctx, resource, isSource, resourceSAS != "", cpkOptions) + credType, isPublic, err = getBlobCredentialType(ctx, resource, isSource, resourceSAS, cpkOptions) if azErr, ok := err.(common.AzError); ok && azErr.Equals(common.EAzError.LoginCredMissing()) { _, autoLoginErr := GetOAuthTokenManagerInstance() if autoLoginErr == nil { @@ -531,6 +588,11 @@ func doGetCredentialTypeForLocation(ctx context.Context, location common.Locatio } } + // We may not always use the OAuth token on Managed Disks. As such, we should change to the type indicating the potential for use. + // if mdAccount && credType == common.ECredentialType.OAuthToken() { + // credType = common.ECredentialType.MDOAuthToken() + // } + if err = checkAuthSafeForTarget(credType, resource, cmdLineExtraSuffixesAAD, location); err != nil { return common.ECredentialType.Unknown(), false, err } @@ -545,7 +607,7 @@ func GetCredentialInfoForLocation(ctx context.Context, location common.Location, credInfo.CredentialType, isPublic, err = getCredentialTypeForLocation(ctx, location, resource, resourceSAS, isSource, cpkOptions) // flesh out the rest of the fields, for those types that require it - if credInfo.CredentialType == common.ECredentialType.OAuthToken() { + if credInfo.CredentialType.IsAzureOAuth() { uotm := GetUserOAuthTokenManagerInstance() if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { diff --git a/cmd/jobsResume.go b/cmd/jobsResume.go index 165aa84aa..2ef1d5fa5 100644 --- a/cmd/jobsResume.go +++ b/cmd/jobsResume.go @@ -293,7 +293,7 @@ func (rca resumeCmdArgs) process() error { destinationSAS: rca.DestinationSAS, }, common.CpkOptions{}); err != nil { return err - } else if credentialInfo.CredentialType == common.ECredentialType.OAuthToken() { + } else if credentialInfo.CredentialType.IsAzureOAuth() { uotm := GetUserOAuthTokenManagerInstance() // Get token from env var or cache. if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { diff --git a/cmd/list.go b/cmd/list.go index 8d82034d4..571d45865 100755 --- a/cmd/list.go +++ b/cmd/list.go @@ -213,7 +213,7 @@ func (cooked cookedListCmdArgs) HandleListContainerCommand() (err error) { return fmt.Errorf("failed to obtain credential info: %s", err.Error()) } else if cooked.location == cooked.location.File() && source.SAS == "" { return errors.New("azure files requires a SAS token for authentication") - } else if credentialInfo.CredentialType == common.ECredentialType.OAuthToken() { + } else if credentialInfo.CredentialType.IsAzureOAuth() { uotm := GetUserOAuthTokenManagerInstance() if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { return err diff --git a/cmd/zt_credentialUtil_test.go b/cmd/zt_credentialUtil_test.go index 10a83784c..83aedf95e 100644 --- a/cmd/zt_credentialUtil_test.go +++ b/cmd/zt_credentialUtil_test.go @@ -22,9 +22,10 @@ package cmd import ( "context" + "strings" + "github.com/Azure/azure-storage-azcopy/v10/common" chk "gopkg.in/check.v1" - "strings" ) type credentialUtilSuite struct{} @@ -49,6 +50,10 @@ func (s *credentialUtilSuite) TestCheckAuthSafeForTarget(c *chk.C) { {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.chinacloudapi.cn", "", true}, {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.cloudapi.de", "", true}, {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.core.usgovcloudapi.net", "", true}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.windows.net", "", true}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.chinacloudapi.cn", "", true}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.cloudapi.de", "", true}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://myaccount.blob.core.core.usgovcloudapi.net", "", true}, {common.ECredentialType.SharedKey(), common.ELocation.BlobFS(), "http://myaccount.dfs.core.windows.net", "", true}, {common.ECredentialType.S3AccessKey(), common.ELocation.S3(), "http://something.s3.eu-central-1.amazonaws.com", "", true}, {common.ECredentialType.S3AccessKey(), common.ELocation.S3(), "http://something.s3.cn-north-1.amazonaws.com.cn", "", true}, @@ -59,11 +64,13 @@ func (s *credentialUtilSuite) TestCheckAuthSafeForTarget(c *chk.C) { // These should fail (they are not storage) {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://somethingelseinazure.windows.net", "", false}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://somethingelseinazure.windows.net", "", false}, {common.ECredentialType.S3AccessKey(), common.ELocation.S3(), "http://somethingelseinaws.amazonaws.com", "", false}, {common.ECredentialType.GoogleAppCredentials(), common.ELocation.GCP(), "http://appengine.google.com", "", false}, // As should these (they are nothing to do with the expected URLs) {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://abc.example.com", "", false}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://abc.example.com", "", false}, {common.ECredentialType.S3AccessKey(), common.ELocation.S3(), "http://abc.example.com", "", false}, {common.ECredentialType.GoogleAppCredentials(), common.ELocation.GCP(), "http://abc.example.com", "", false}, // Test that we don't want to send an S3 access key to a blob resource type. @@ -72,6 +79,7 @@ func (s *credentialUtilSuite) TestCheckAuthSafeForTarget(c *chk.C) { // But the same Azure one should pass if the user opts in to them (we don't support any similar override for S3) {common.ECredentialType.OAuthToken(), common.ELocation.Blob(), "http://abc.example.com", "*.foo.com;*.example.com", true}, + {common.ECredentialType.MDOAuthToken(), common.ELocation.Blob(), "http://abc.example.com", "*.foo.com;*.example.com", true}, } for i, t := range tests { @@ -93,3 +101,17 @@ func (s *credentialUtilSuite) TestCheckAuthSafeForTargetIsCalledWhenGettingAuthT c.Assert(strings.Contains(err.Error(), "If this URL is in fact an Azure service, you can enable Azure authentication to notblob.example.com."), chk.Equals, true) } + +func (s *credentialUtilSuite) TestCheckAuthSafeForTargetIsCalledWhenGettingAuthTypeMDOAuth(c *chk.C) { + mockGetCredTypeFromEnvVar := func() common.CredentialType { + return common.ECredentialType.MDOAuthToken() // force it to OAuth, which is the case we want to test + } + + // Call our core cred type getter function, in a way that will fail the safety check, and assert + // that it really does fail. + // This checks that our safety check is hooked into the main logic + _, _, err := doGetCredentialTypeForLocation(context.Background(), common.ELocation.Blob(), "http://notblob.example.com", "", true, mockGetCredTypeFromEnvVar, common.CpkOptions{}) + c.Assert(err, chk.NotNil) + c.Assert(strings.Contains(err.Error(), "If this URL is in fact an Azure service, you can enable Azure authentication to notblob.example.com."), + chk.Equals, true) +} diff --git a/common/credentialFactory.go b/common/credentialFactory.go index d5fe1407b..47a16ea66 100644 --- a/common/credentialFactory.go +++ b/common/credentialFactory.go @@ -21,7 +21,6 @@ package common import ( - gcpUtils "cloud.google.com/go/storage" "context" "errors" "fmt" @@ -31,6 +30,8 @@ import ( "sync" "time" + gcpUtils "cloud.google.com/go/storage" + "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-blob-go/azblob" "github.com/Azure/go-autorest/autorest/adal" @@ -94,11 +95,15 @@ func (o CredentialOpOptions) cancel() { func CreateBlobCredential(ctx context.Context, credInfo CredentialInfo, options CredentialOpOptions) azblob.Credential { credential := azblob.NewAnonymousCredential() - if credInfo.CredentialType == ECredentialType.OAuthToken() { + if credInfo.CredentialType.IsAzureOAuth() { if credInfo.OAuthTokenInfo.IsEmpty() { options.panicError(errors.New("invalid state, cannot get valid OAuth token information")) } + if credInfo.CredentialType == ECredentialType.MDOAuthToken() { + credInfo.OAuthTokenInfo.Resource = MDResource // token will instantly refresh with this + } + // Create TokenCredential with refresher. return azblob.NewTokenCredential( credInfo.OAuthTokenInfo.AccessToken, diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index d7b827d69..a0a32f4e0 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -832,12 +832,17 @@ type CredentialType uint8 func (CredentialType) Unknown() CredentialType { return CredentialType(0) } func (CredentialType) OAuthToken() CredentialType { return CredentialType(1) } // For Azure, OAuth +func (CredentialType) MDOAuthToken() CredentialType { return CredentialType(7) } // For Azure MD impexp func (CredentialType) Anonymous() CredentialType { return CredentialType(2) } // For Azure, SAS or public. func (CredentialType) SharedKey() CredentialType { return CredentialType(3) } // For Azure, SharedKey func (CredentialType) S3AccessKey() CredentialType { return CredentialType(4) } // For S3, AccessKeyID and SecretAccessKey func (CredentialType) GoogleAppCredentials() CredentialType { return CredentialType(5) } func (CredentialType) S3PublicBucket() CredentialType { return CredentialType(6) } // For S3, Anon Credentials & public bucket +func (ct CredentialType) IsAzureOAuth() bool { + return ct == ct.OAuthToken() || ct == ct.MDOAuthToken() +} + func (ct CredentialType) String() string { return enum.StringInt(ct, reflect.TypeOf(ct)) } diff --git a/common/oauthTokenManager.go b/common/oauthTokenManager.go index ba5cd9ab2..fbb022c8a 100644 --- a/common/oauthTokenManager.go +++ b/common/oauthTokenManager.go @@ -53,6 +53,7 @@ const ApplicationID = "579a7132-0e58-4d80-b1e1-7a1e2d337859" // Resource used in azure storage OAuth authentication const Resource = "https://storage.azure.com" +const MDResource = "https://disk.azure.com/" // There must be a trailing slash-- The service checks explicitly for "https://disk.azure.com/" const DefaultTenantID = "common" const DefaultActiveDirectoryEndpoint = "https://login.microsoftonline.com" const IMDSAPIVersionArcVM = "2019-11-01" @@ -167,7 +168,7 @@ func (uotm *UserOAuthTokenManager) MSILogin(ctx context.Context, identityInfo Id } // secretLoginNoUOTM non-interactively logs in with a client secret. -func secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID string) (*OAuthTokenInfo, error) { +func secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID, resource string) (*OAuthTokenInfo, error) { if tenantID == "" { tenantID = DefaultTenantID } @@ -194,7 +195,7 @@ func secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID *oauthConfig, applicationID, secret, - Resource, + resource, ) if err != nil { return nil, err @@ -220,7 +221,7 @@ func secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID // SecretLogin is a UOTM shell for secretLoginNoUOTM. func (uotm *UserOAuthTokenManager) SecretLogin(tenantID, activeDirectoryEndpoint, secret, applicationID string, persist bool) (*OAuthTokenInfo, error) { - oAuthTokenInfo, err := secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID) + oAuthTokenInfo, err := secretLoginNoUOTM(tenantID, activeDirectoryEndpoint, secret, applicationID, Resource) if err != nil { return nil, err @@ -239,7 +240,12 @@ func (uotm *UserOAuthTokenManager) SecretLogin(tenantID, activeDirectoryEndpoint // GetNewTokenFromSecret is a refresh shell for secretLoginNoUOTM func (credInfo *OAuthTokenInfo) GetNewTokenFromSecret(ctx context.Context) (*adal.Token, error) { - tokeninfo, err := secretLoginNoUOTM(credInfo.Tenant, credInfo.ActiveDirectoryEndpoint, credInfo.SPNInfo.Secret, credInfo.ApplicationID) + targetResource := Resource + if credInfo.Token.Resource != "" && credInfo.Token.Resource != targetResource { + targetResource = credInfo.Token.Resource + } + + tokeninfo, err := secretLoginNoUOTM(credInfo.Tenant, credInfo.ActiveDirectoryEndpoint, credInfo.SPNInfo.Secret, credInfo.ApplicationID, targetResource) if err != nil { return nil, err @@ -273,7 +279,7 @@ func readPKCSBlock(block *pem.Block, secret []byte, parseFunc func([]byte) (inte return pk, err } -func certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, applicationID string) (*OAuthTokenInfo, error) { +func certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, applicationID, resource string) (*OAuthTokenInfo, error) { if tenantID == "" { tenantID = DefaultTenantID } @@ -376,7 +382,7 @@ func certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, appl applicationID, cert, p, - Resource, + resource, ) if err != nil { return nil, err @@ -405,7 +411,7 @@ func certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, appl func (uotm *UserOAuthTokenManager) CertLogin(tenantID, activeDirectoryEndpoint, certPath, certPass, applicationID string, persist bool) (*OAuthTokenInfo, error) { // TODO: Global default cert flag for true non interactive login? // (Also could be useful if the user has multiple certificates they want to switch between in the same file.) - oAuthTokenInfo, err := certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, applicationID) + oAuthTokenInfo, err := certLoginNoUOTM(tenantID, activeDirectoryEndpoint, certPath, certPass, applicationID, Resource) uotm.stashedInfo = oAuthTokenInfo if persist && err == nil { @@ -420,7 +426,12 @@ func (uotm *UserOAuthTokenManager) CertLogin(tenantID, activeDirectoryEndpoint, // GetNewTokenFromCert refreshes a token manually from a certificate. func (credInfo *OAuthTokenInfo) GetNewTokenFromCert(ctx context.Context) (*adal.Token, error) { - tokeninfo, err := certLoginNoUOTM(credInfo.Tenant, credInfo.ActiveDirectoryEndpoint, credInfo.SPNInfo.CertPath, credInfo.SPNInfo.Secret, credInfo.ApplicationID) + targetResource := Resource + if credInfo.Token.Resource != "" && credInfo.Token.Resource != targetResource { + targetResource = credInfo.Token.Resource + } + + tokeninfo, err := certLoginNoUOTM(credInfo.Tenant, credInfo.ActiveDirectoryEndpoint, credInfo.SPNInfo.CertPath, credInfo.SPNInfo.Secret, credInfo.ApplicationID, targetResource) if err != nil { return nil, err @@ -779,11 +790,16 @@ func fixupTokenJson(bytes []byte) []byte { // Without this change, if some router is configured to not return "ICMP unreachable" then it will take 30 secs to timeout and increase the response time. // We are additionally checking Arc first, and then Azure VM because Arc endpoint is local so as to further reduce the response time of the Azure VM IMDS endpoint. func (credInfo *OAuthTokenInfo) GetNewTokenFromMSI(ctx context.Context) (*adal.Token, error) { + targetResource := Resource + if credInfo.Token.Resource != "" && credInfo.Token.Resource != targetResource { + targetResource = credInfo.Token.Resource + } + // Try Arc VM - req, resp, errArcVM := credInfo.queryIMDS(ctx, MSIEndpointArcVM, Resource, IMDSAPIVersionArcVM) + req, resp, errArcVM := credInfo.queryIMDS(ctx, MSIEndpointArcVM, targetResource, IMDSAPIVersionArcVM) if errArcVM != nil { // Try Azure VM since there was an error in trying Arc VM - reqAzureVM, respAzureVM, errAzureVM := credInfo.queryIMDS(ctx, MSIEndpointAzureVM, Resource, IMDSAPIVersionAzureVM) + reqAzureVM, respAzureVM, errAzureVM := credInfo.queryIMDS(ctx, MSIEndpointAzureVM, targetResource, IMDSAPIVersionAzureVM) if errAzureVM != nil { var serr syscall.Errno if errors.As(errArcVM, &serr) { @@ -814,7 +830,7 @@ func (credInfo *OAuthTokenInfo) GetNewTokenFromMSI(ctx context.Context) (*adal.T req, resp = reqAzureVM, respAzureVM } else if !isValidArcResponse(resp) { // Not valid response from ARC IMDS endpoint. Perhaps some other process listening on it. Try Azure IMDS endpoint as fallback option. - reqAzureVM, respAzureVM, errAzureVM := credInfo.queryIMDS(ctx, MSIEndpointAzureVM, Resource, IMDSAPIVersionAzureVM) + reqAzureVM, respAzureVM, errAzureVM := credInfo.queryIMDS(ctx, MSIEndpointAzureVM, targetResource, IMDSAPIVersionAzureVM) if errAzureVM != nil { // Neither Arc nor Azure VM IMDS endpoint available. Can't use MSI. return nil, fmt.Errorf("invalid response received from Arc IMDS endpoint (%s), probably some unknown process listening. If this an Azure VM, please check whether MSI is enabled, to enable MSI please refer to https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/qs-configure-portal-windows-vm#enable-system-assigned-identity-on-an-existing-vm: %v", MSIEndpointArcVM, errAzureVM) @@ -891,6 +907,11 @@ func (credInfo *OAuthTokenInfo) GetNewTokenFromMSI(ctx context.Context) (*adal.T // RefreshTokenWithUserCredential gets new token with user credential through refresh. func (credInfo *OAuthTokenInfo) RefreshTokenWithUserCredential(ctx context.Context) (*adal.Token, error) { + targetResource := Resource + if credInfo.Token.Resource != "" && credInfo.Token.Resource != targetResource { + targetResource = credInfo.Token.Resource + } + oauthConfig, err := adal.NewOAuthConfig(credInfo.ActiveDirectoryEndpoint, credInfo.Tenant) if err != nil { return nil, err @@ -901,7 +922,7 @@ func (credInfo *OAuthTokenInfo) RefreshTokenWithUserCredential(ctx context.Conte spt, err := adal.NewServicePrincipalTokenFromManualToken( *oauthConfig, IffString(credInfo.ClientID != "", credInfo.ClientID, ApplicationID), - Resource, + targetResource, credInfo.Token) if err != nil { return nil, err diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index fa9f8ba8f..b249d3d51 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -215,8 +215,9 @@ func (jptm *jobPartTransferMgr) GetS2SSourceBlobTokenCredential() azblob.TokenCr Cancel: jpm.jobMgr.Cancel, } - if jpm.jobMgr.getInMemoryTransitJobState().S2SSourceCredentialType == common.ECredentialType.OAuthToken() { - return common.CreateBlobCredential(jptm.Context(), jptm.jobPartMgr.(*jobPartMgr).jobMgr.getInMemoryTransitJobState().CredentialInfo.WithType(common.ECredentialType.OAuthToken()), credOption).(azblob.TokenCredential) + cType := jpm.jobMgr.getInMemoryTransitJobState().S2SSourceCredentialType + if cType.IsAzureOAuth() { + return common.CreateBlobCredential(jptm.Context(), jptm.jobPartMgr.(*jobPartMgr).jobMgr.getInMemoryTransitJobState().CredentialInfo.WithType(cType), credOption).(azblob.TokenCredential) } else { return nil } From a1595ed2e46276f3e69ecc4e776bcedd813c8ef8 Mon Sep 17 00:00:00 2001 From: siminsavani-msft <77068571+siminsavani-msft@users.noreply.github.com> Date: Tue, 19 Jul 2022 03:49:09 -0400 Subject: [PATCH 26/26] Update changelog and version for 10.16.0 (#1849) * Update changelog and version for 10.16.0 * Update ChangeLog.md Co-authored-by: Narasimha Kulkarni --- ChangeLog.md | 18 ++++++++++++++++++ common/version.go | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 14693ca30..57d9512ef 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,6 +1,24 @@ # Change Log +## Version 10.16.0 + +### New features + +1. Added time-based flag for remove to include files modified before/after certain date/time. +2. Added --output-level flag which allows users to set output verbosity. +3. Added --preserve-posix-properties flag that allows user to persist the results of statx(2)/stat(2) syscall on upload. +4. Implemented setprops command that allows users to set specific properties of Blobs, BlobFS, and Files. +5. Implemented multi-auth for managed disks (SAS+OAuth) when the managed disk export account requests it. + +### Bug fixes +1. Fixed [issue 1506](https://github.com/Azure/azure-storage-azcopy/issues/1506): Added input watcher to resolve issue since job could not be resumed. +2. Fixed [issue 1794](https://github.com/Azure/azure-storage-azcopy/issues/1794): Moved log-level to root.go so log-level arguments do not get ignored. +3. Fixed [issue 1824](https://github.com/Azure/azure-storage-azcopy/issues/1824): Avoid creating .azcopy under HOME if plan/log location is specified elsewhere. +4. Fixed [isue 1830](https://github.com/Azure/azure-storage-azcopy/issues/1830), [issue 1412](https://github.com/Azure/azure-storage-azcopy/issues/1418), and [issue 873](https://github.com/Azure/azure-storage-azcopy/issues/873): Improved error message for when AzCopy cannot determine if source is directory. +5. Fixed [issue 1777](https://github.com/Azure/azure-storage-azcopy/issues/1777): Fixed job list to handle respective output-type correctly. +6. Fixed win64 alignment issue. + ## Version 10.15.0 ### New features diff --git a/common/version.go b/common/version.go index 30c569689..facd4d2cb 100644 --- a/common/version.go +++ b/common/version.go @@ -1,6 +1,6 @@ package common -const AzcopyVersion = "10.15.0" +const AzcopyVersion = "10.16.0" const UserAgent = "AzCopy/" + AzcopyVersion const S3ImportUserAgent = "S3Import " + UserAgent const GCPImportUserAgent = "GCPImport " + UserAgent