diff --git a/Core/Cosmos.DataTransfer.Core/Cosmos.DataTransfer.Core.csproj b/Core/Cosmos.DataTransfer.Core/Cosmos.DataTransfer.Core.csproj index 1a2edcb..ac6fb46 100644 --- a/Core/Cosmos.DataTransfer.Core/Cosmos.DataTransfer.Core.csproj +++ b/Core/Cosmos.DataTransfer.Core/Cosmos.DataTransfer.Core.csproj @@ -19,7 +19,8 @@ - + + diff --git a/CosmosDbDataMigrationTool.sln b/CosmosDbDataMigrationTool.sln index 10126ef..66d32cc 100644 --- a/CosmosDbDataMigrationTool.sln +++ b/CosmosDbDataMigrationTool.sln @@ -64,6 +64,9 @@ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.Common", "Interfaces\Cosmos.DataTransfer.Common\Cosmos.DataTransfer.Common.csproj", "{0FAD9D89-2E41-4D65-8440-5C01885D9292}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "AzureBlob", "AzureBlob", "{9627A42A-BEB0-4A39-B49C-C3C6D54E705A}" + ProjectSection(SolutionItems) = preProject + Extensions\AzureBlob\README.md = Extensions\AzureBlob\README.md + EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "AwsS3", "AwsS3", "{502197E4-F554-4B5B-9235-FBFE7E49EBEF}" EndProject diff --git a/ExampleConfigs.md b/ExampleConfigs.md index ee32932..d80c830 100644 --- a/ExampleConfigs.md +++ b/ExampleConfigs.md @@ -1,123 +1,156 @@ # Example `migrationsettings.json` Files ## JSON to Cosmos-NoSQL + ```json { "Source": "json", "Sink": "cosmos-nosql", - "SourceSettings": { - "FilePath": "https://mytestfiles.local/sales-data.json" - }, - "SinkSettings": { - "ConnectionString": "AccountEndpoint=https://...", - "Database": "myDb", - "Container": "myContainer", - "PartitionKeyPath": "/id", - "RecreateContainer": true, - "WriteMode": "Insert", - "CreatedContainerMaxThroughput": 5000, - "IsServerlessAccount": false - } + "SourceSettings": { + "FilePath": "https://mytestfiles.local/sales-data.json" + }, + "SinkSettings": { + "ConnectionString": "AccountEndpoint=https://...", + "Database": "myDb", + "Container": "myContainer", + "PartitionKeyPath": "/id", + "RecreateContainer": true, + "WriteMode": "Insert", + "CreatedContainerMaxThroughput": 5000, + "IsServerlessAccount": false + } } ``` ## Cosmos-NoSQL to JSON + ```json { "Source": "Cosmos-NoSql", "Sink": "JSON", - "SourceSettings": - { - "ConnectionString": "AccountEndpoint=https://...", - "Database":"cosmicworks", - "Container":"customers", - "IncludeMetadataFields": true - }, - "SinkSettings": - { - "FilePath": "c:\\data\\cosmicworks\\customers.json", - "Indented": true - } + "SourceSettings": + { + "ConnectionString": "AccountEndpoint=https://...", + "Database":"cosmicworks", + "Container":"customers", + "IncludeMetadataFields": true + }, + "SinkSettings": + { + "FilePath": "c:\\data\\cosmicworks\\customers.json", + "Indented": true + } } ``` ## MongoDB to Cosmos-NoSQL + ```json { "Source": "mongodb", "Sink": "cosmos-nosql", - "SourceSettings": { - "ConnectionString": "mongodb://...", - "DatabaseName": "sales", - "Collection": "person" - }, - "SinkSettings": { - "ConnectionString": "AccountEndpoint=https://...", - "Database": "users", - "Container": "migrated", - "PartitionKeyPath": "/id", + "SourceSettings": { + "ConnectionString": "mongodb://...", + "DatabaseName": "sales", + "Collection": "person" + }, + "SinkSettings": { + "ConnectionString": "AccountEndpoint=https://...", + "Database": "users", + "Container": "migrated", + "PartitionKeyPath": "/id", "ConnectionMode": "Direct", - "WriteMode": "UpsertStream", - "CreatedContainerMaxThroughput": 8000, - "UseAutoscaleForCreatedContainer": false - } + "WriteMode": "UpsertStream", + "CreatedContainerMaxThroughput": 8000, + "UseAutoscaleForCreatedContainer": false + } } ``` ## SqlServer to AzureTableAPI + ```json { "Source": "SqlServer", "Sink": "AzureTableApi", - "SourceSettings": { - "ConnectionString": "Server=...", - "QueryText": "SELECT Id, Date, Amount FROM dbo.Payments WHERE Status = 'open'" - }, - "SinkSettings": { - "ConnectionString": "DefaultEndpointsProtocol=https;AccountName=...", - "Table": "payments", - "RowKeyFieldName": "Id" - } + "SourceSettings": { + "ConnectionString": "Server=...", + "QueryText": "SELECT Id, Date, Amount FROM dbo.Payments WHERE Status = 'open'" + }, + "SinkSettings": { + "ConnectionString": "DefaultEndpointsProtocol=https;AccountName=...", + "Table": "payments", + "RowKeyFieldName": "Id" + } } ``` ## Cosmos-NoSQL to SqlServer + ```json { "Source": "cosmos-nosql", "Sink": "sqlserver", - "SourceSettings": - { - "ConnectionString": "AccountEndpoint=https://...", - "Database":"operations", - "Container":"alerts", - "PartitionKeyValue": "jan", + "SourceSettings": + { + "ConnectionString": "AccountEndpoint=https://...", + "Database":"operations", + "Container":"alerts", + "PartitionKeyValue": "jan", "Query": "SELECT a.name, a.description, a.count, a.id, a.isSet FROM a" - }, - "SinkSettings": - { - "ConnectionString": "Server=...", - "TableName": "Import", - "ColumnMappings": [ - { - "ColumnName": "Name" - }, - { - "ColumnName": "Description" - }, - { - "ColumnName": "Count", - "SourceFieldName": "number" - }, - { - "ColumnName": "Id" - }, - { - "ColumnName": "IsSet", - "AllowNull": false, - "DefaultValue": false - } - ] - } + }, + "SinkSettings": + { + "ConnectionString": "Server=...", + "TableName": "Import", + "ColumnMappings": [ + { + "ColumnName": "Name" + }, + { + "ColumnName": "Description" + }, + { + "ColumnName": "Count", + "SourceFieldName": "number" + }, + { + "ColumnName": "Id" + }, + { + "ColumnName": "IsSet", + "AllowNull": false, + "DefaultValue": false + } + ] + } +} +``` + +## Cosmos-NoSQL to Json-AzureBlob (Using RBAC) + +```json +{ + "Source": "Cosmos-nosql", + "Sink": "Json-AzureBlob", + "SourceSettings": { + "UseRbacAuth": true, + "Database": "operations", + "Container": "alerts", + "PartitionKeyValue": "jan", + "AccountEndpoint": "https://.documents.azure.com", + "EnableInteractiveCredentials": true, + "IncludeMetadataFields": false, + "Query": "SELECT a.name, a.description, a.count, a.id, a.isSet FROM a" + }, + "SinkSettings": { + "UseRbacAuth": true, + "ContainerName": "operations-archive", + "AccountEndpoint": "https://.blob.core.windows.net", + "EnableInteractiveCredentials": true, + "BlobName": "jan-alerts" + }, + "Operations": [ + ] } ``` diff --git a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSink.cs b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSink.cs index 752ef42..2747b15 100644 --- a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSink.cs +++ b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSink.cs @@ -1,9 +1,10 @@ -using Azure.Storage.Blobs; +using Azure.Identity; +using Azure.Storage.Blobs; +using Azure.Storage.Blobs.Models; +using Azure.Storage.Blobs.Specialized; using Cosmos.DataTransfer.Interfaces; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; -using Azure.Storage.Blobs.Specialized; -using Azure.Storage.Blobs.Models; namespace Cosmos.DataTransfer.AzureBlobStorage { @@ -14,12 +15,31 @@ public async Task WriteToTargetAsync(Func writeToStream, IConfigur var settings = config.Get(); settings.Validate(); - logger.LogInformation("Saving file '{File}' to Azure Blob Container '{ContainerName}'", settings.BlobName, settings.ContainerName); + BlobContainerClient account; + if (settings.UseRbacAuth) + { + logger.LogInformation("Connecting to Storage account {AccountEndpoint} using {UseRbacAuth} with {EnableInteractiveCredentials}'", settings.AccountEndpoint, nameof(AzureBlobSourceSettings.UseRbacAuth), nameof(AzureBlobSourceSettings.EnableInteractiveCredentials)); + + var credential = new DefaultAzureCredential(includeInteractiveCredentials: settings.EnableInteractiveCredentials); +#pragma warning disable CS8604 // Validate above ensures AccountEndpoint is not null + var baseUri = new Uri(settings.AccountEndpoint); + var blobContainerUri = new Uri(baseUri, settings.ContainerName); +#pragma warning restore CS8604 // Restore warning + + account = new BlobContainerClient(blobContainerUri, credential); + } + else + { + logger.LogInformation("Connecting to Storage account using {ConnectionString}'", nameof(AzureBlobSourceSettings.ConnectionString)); + + account = new BlobContainerClient(settings.ConnectionString, settings.ContainerName); + } - var account = new BlobContainerClient(settings.ConnectionString, settings.ContainerName); await account.CreateIfNotExistsAsync(cancellationToken: cancellationToken); var blob = account.GetBlockBlobClient(settings.BlobName); + logger.LogInformation("Saving file '{File}' to Azure Blob Container '{ContainerName}'", settings.BlobName, settings.ContainerName); + await using var blobStream = await blob.OpenWriteAsync(true, new BlockBlobOpenWriteOptions { BufferSize = settings.MaxBlockSizeinKB * 1024L, diff --git a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSource.cs b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSource.cs index 896a62f..ee36691 100644 --- a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSource.cs +++ b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobDataSource.cs @@ -1,10 +1,11 @@ -using System.Runtime.CompilerServices; +using Azure.Identity; using Azure.Storage.Blobs; +using Azure.Storage.Blobs.Models; +using Azure.Storage.Blobs.Specialized; using Cosmos.DataTransfer.Interfaces; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; -using Azure.Storage.Blobs.Specialized; -using Azure.Storage.Blobs.Models; +using System.Runtime.CompilerServices; namespace Cosmos.DataTransfer.AzureBlobStorage; @@ -15,14 +16,33 @@ public class AzureBlobDataSource : IComposableDataSource var settings = config.Get(); settings.Validate(); - logger.LogInformation("Reading file '{File}' from Azure Blob Container '{ContainerName}'", settings.BlobName, settings.ContainerName); + BlobContainerClient account; + if (settings.UseRbacAuth) + { + logger.LogInformation("Connecting to Storage account {AccountEndpoint} using {UseRbacAuth} with {EnableInteractiveCredentials}'", settings.AccountEndpoint, nameof(AzureBlobSourceSettings.UseRbacAuth), nameof(AzureBlobSourceSettings.EnableInteractiveCredentials)); - var account = new BlobContainerClient(settings.ConnectionString, settings.ContainerName); + var credential = new DefaultAzureCredential(includeInteractiveCredentials: settings.EnableInteractiveCredentials); +#pragma warning disable CS8604 // Validate above ensures AccountEndpoint is not null + var baseUri = new Uri(settings.AccountEndpoint); + var blobContainerUri = new Uri(baseUri, settings.ContainerName); +#pragma warning restore CS8604 // Restore warning + + account = new BlobContainerClient(blobContainerUri, credential); + } + else + { + logger.LogInformation("Connecting to Storage account using {ConnectionString}'", nameof(AzureBlobSourceSettings.ConnectionString)); + + account = new BlobContainerClient(settings.ConnectionString, settings.ContainerName); + } + var blob = account.GetBlockBlobClient(settings.BlobName); var existsResponse = await blob.ExistsAsync(cancellationToken: cancellationToken); if (!existsResponse) yield break; + logger.LogInformation("Reading file '{File}' from Azure Blob Container '{ContainerName}'", settings.BlobName, settings.ContainerName); + var readStream = await blob.OpenReadAsync(new BlobOpenReadOptions(false) { BufferSize = settings.ReadBufferSizeInKB, diff --git a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSinkSettings.cs b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSinkSettings.cs index 5d3a5c8..477d6fc 100644 --- a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSinkSettings.cs +++ b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSinkSettings.cs @@ -1,14 +1,15 @@ using Cosmos.DataTransfer.Interfaces; -using System.ComponentModel.DataAnnotations; using Cosmos.DataTransfer.Interfaces.Manifest; +using System.ComponentModel.DataAnnotations; namespace Cosmos.DataTransfer.AzureBlobStorage { - public class AzureBlobSinkSettings : IDataExtensionSettings + public class AzureBlobSinkSettings : IDataExtensionSettings, IValidatableObject { - [Required] [SensitiveValue] - public string ConnectionString { get; set; } = null!; + public string? ConnectionString { get; set; } = null!; + + public string? AccountEndpoint { get; set; } = null!; [Required] public string ContainerName { get; set; } = null!; @@ -17,5 +18,22 @@ public class AzureBlobSinkSettings : IDataExtensionSettings public string BlobName { get; set; } = null!; public int? MaxBlockSizeinKB { get; set; } + + public bool UseRbacAuth { get; set; } + + public bool EnableInteractiveCredentials { get; set; } + + public virtual IEnumerable Validate(ValidationContext validationContext) + { + if (!UseRbacAuth && string.IsNullOrEmpty(ConnectionString)) + { + yield return new ValidationResult($"{nameof(ConnectionString)} must be specified unless {nameof(UseRbacAuth)} is true", new[] { nameof(ConnectionString) }); + } + + if (UseRbacAuth && string.IsNullOrEmpty(AccountEndpoint)) + { + yield return new ValidationResult($"{nameof(AccountEndpoint)} must be specified unless {nameof(UseRbacAuth)} is false", new[] { nameof(AccountEndpoint) }); + } + } } } \ No newline at end of file diff --git a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSourceSettings.cs b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSourceSettings.cs index af10068..9fab3e9 100644 --- a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSourceSettings.cs +++ b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/AzureBlobSourceSettings.cs @@ -1,14 +1,15 @@ using Cosmos.DataTransfer.Interfaces; -using System.ComponentModel.DataAnnotations; using Cosmos.DataTransfer.Interfaces.Manifest; +using System.ComponentModel.DataAnnotations; namespace Cosmos.DataTransfer.AzureBlobStorage; -public class AzureBlobSourceSettings : IDataExtensionSettings +public class AzureBlobSourceSettings : IDataExtensionSettings, IValidatableObject { - [Required] [SensitiveValue] - public string ConnectionString { get; set; } = null!; + public string? ConnectionString { get; set; } = null!; + + public string? AccountEndpoint { get; set; } = null!; [Required] public string ContainerName { get; set; } = null!; @@ -17,4 +18,21 @@ public class AzureBlobSourceSettings : IDataExtensionSettings public string BlobName { get; set; } = null!; public int? ReadBufferSizeInKB { get; set; } + + public bool UseRbacAuth { get; set; } + + public bool EnableInteractiveCredentials { get; set; } + + public virtual IEnumerable Validate(ValidationContext validationContext) + { + if (!UseRbacAuth && string.IsNullOrEmpty(ConnectionString)) + { + yield return new ValidationResult($"{nameof(ConnectionString)} must be specified unless {nameof(UseRbacAuth)} is true", new[] { nameof(ConnectionString) }); + } + + if (UseRbacAuth && string.IsNullOrEmpty(AccountEndpoint)) + { + yield return new ValidationResult($"{nameof(AccountEndpoint)} must be specified unless {nameof(UseRbacAuth)} is false", new[] { nameof(AccountEndpoint) }); + } + } } \ No newline at end of file diff --git a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/Cosmos.DataTransfer.AzureBlobStorage.csproj b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/Cosmos.DataTransfer.AzureBlobStorage.csproj index c0fe756..e51831c 100644 --- a/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/Cosmos.DataTransfer.AzureBlobStorage.csproj +++ b/Extensions/AzureBlob/Cosmos.DataTransfer.AzureBlobStorage/Cosmos.DataTransfer.AzureBlobStorage.csproj @@ -7,6 +7,7 @@ + diff --git a/Extensions/AzureBlob/README.md b/Extensions/AzureBlob/README.md index c7046a1..c7e60f6 100644 --- a/Extensions/AzureBlob/README.md +++ b/Extensions/AzureBlob/README.md @@ -6,7 +6,10 @@ The Azure Blob Storage extension provides reading and writing of formatted files ## Settings -Source and Sink settings require the parameters shown below. +Source and sink require settings used to locate and access the Azure Blob Storage account. This can be done in one of two ways: + +- Using a `ConnectionString` that includes an AccountEndpoint and AccountKey +- Using RBAC (Role Based Access Control) by setting `UseRbacAuth` to true and specifying `AccountEndpoint` and optionally `EnableInteractiveCredentials` to prompt the user to log in to Azure if default credentials are not available. ### Source @@ -20,6 +23,18 @@ An optional `ReadBufferSizeInKB` parameter can be used to control stream bufferi } ``` +Or with RBAC: + +```json +{ + "AccountEndpoint": "https://.blob.core.windows.net", + "ContainerName": "", + "BlobName": "", + "UseRbacAuth": true, + "EnableInteractiveCredentials": true +} +``` + ### Sink An optional `MaxBlockSizeInKB` parameter can also be specified to control the transfer. diff --git a/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/Cosmos.DataTransfer.CosmosExtension.csproj b/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/Cosmos.DataTransfer.CosmosExtension.csproj index 647e5f0..803ebb0 100644 --- a/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/Cosmos.DataTransfer.CosmosExtension.csproj +++ b/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/Cosmos.DataTransfer.CosmosExtension.csproj @@ -8,7 +8,7 @@ - + diff --git a/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosSettingsBase.cs b/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosSettingsBase.cs index 7a03794..dcbfd28 100644 --- a/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosSettingsBase.cs +++ b/Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosSettingsBase.cs @@ -14,7 +14,7 @@ public abstract class CosmosSettingsBase : IValidatableObject public string? WebProxy { get; set; } public bool UseRbacAuth { get; set; } public string? AccountEndpoint { get; set; } - public bool EnableInteractiveCredentials { get; set; } = true; + public bool EnableInteractiveCredentials { get; set; } public bool InitClientEncryption { get; set; } = false; public virtual IEnumerable Validate(ValidationContext validationContext) diff --git a/Extensions/Cosmos/README.md b/Extensions/Cosmos/README.md index a460fb9..02a50b2 100644 --- a/Extensions/Cosmos/README.md +++ b/Extensions/Cosmos/README.md @@ -7,10 +7,12 @@ The Cosmos data transfer extension provides source and sink capabilities for rea ## Settings Source and sink require settings used to locate and access the Cosmos DB account. This can be done in one of two ways: + - Using a `ConnectionString` that includes an AccountEndpoint and AccountKey -- Using RBAC (Role Based Access Control) by setting `UseRbacAuth` to true and specifying `AccountEndpoint` and optionally `EnableInteractiveCredentials` to prompt the user to log in to Azure if default credentials are not available. +- Using RBAC (Role Based Access Control) by setting `UseRbacAuth` to true and specifying `AccountEndpoint` and optionally `EnableInteractiveCredentials` to prompt the user to log in to Azure if default credentials are not available. See ([migrate-passwordless](https://learn.microsoft.com/azure/cosmos-db/nosql/migrate-passwordless?tabs=sign-in-azure-cli%2Cdotnet%2Cazure-portal-create%2Cazure-portal-associate%2Capp-service-identity) for how to configure Cosmos DB for passwordless access. + +Source and sink settings also both require parameters to specify the data location within a Cosmos DB account: -Source and sink settings also both require parameters to specify the data location within a Cosmos DB account: - `Database` - `Container`