From 427d396573c0f5e408ea9423f3db4cf637af51db Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 16 Mar 2022 19:44:50 +0100 Subject: [PATCH] Extracting vmId form Azure instance metadata (#27692) * Extracting vmId form Azure instance metadata (which can be mapped to containerId/nodeId) easily * Update CHANGELOG.md * Responded to code review feedback * Update ClientTelemetryInfo.java * Update CHANGELOG.md * Added more tests * Update pom.xml * Update module-info.java * Dummy * Update pom.xml * Fixes static analysis findings * Reverting chaning package name * Addressed code review comments * Update RxDocumentClientImpl.java * Reacted to code review comments * Update spotbugs-exclude.xml * Revert spotbug changes --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 3 + .../DiagnosticsClientContext.java | 9 +++ .../implementation/RxDocumentClientImpl.java | 8 ++- .../clienttelemetry/AzureVMMetadata.java | 13 ++++ .../clienttelemetry/ClientTelemetry.java | 72 +++++++++++++++---- .../clienttelemetry/ClientTelemetryInfo.java | 13 +++- .../ClientTelemetrySerializer.java | 1 + .../StoreClientFactory.java | 2 - .../RntbdChannelAcquisitionTimeline.java | 1 - .../com/azure/cosmos/ClientTelemetryTest.java | 5 +- .../azure/cosmos/CosmosDiagnosticsTest.java | 35 +++++++++ .../CilentConfigDiagnosticsTest.java | 13 ++++ 12 files changed, 154 insertions(+), 21 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 97229036077b9..f5adc91366cc0 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -1,6 +1,9 @@ ## Release History ### 4.28.0-beta.1 (Unreleased) +#### Features Added +* Added the "VM Unique ID" - see [Accessing and Using Azure VM Unique ID](https://azure.microsoft.com/blog/accessing-and-using-azure-vm-unique-id/) - to the request diagnostics. This information helps to simplify investigating any network issues between an application hosted in Azure and the corresponding Cosmos DB service endpoint. - See [PR 27692](https://github.com/Azure/azure-sdk-for-java/pull/27692) + #### Key Bugs Fixes * Added `decodeTime` in `CosmosDiagnostics` - See [PR 22808](https://github.com/Azure/azure-sdk-for-java/pull/22808) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java index 26d1339a2c270..3c77ebac31140 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java @@ -6,6 +6,7 @@ import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.directconnectivity.RntbdTransportClient; import com.azure.cosmos.implementation.guava27.Strings; import com.azure.cosmos.implementation.http.HttpClientConfig; @@ -50,6 +51,7 @@ public void serialize(DiagnosticsClientContext clientContext, JsonGenerator gene generator.writeStartObject(); try { generator.writeNumberField("id", clientContext.getConfig().getClientId()); + generator.writeStringField("machineId", ClientTelemetry.getMachineId(clientContext)); generator.writeStringField("connectionMode", clientContext.getConfig().getConnectionMode().toString()); generator.writeNumberField("numberOfClients", clientContext.getConfig().getActiveClientsCount()); generator.writeObjectFieldStart("connCfg"); @@ -87,6 +89,11 @@ class DiagnosticsClientConfig { private RntbdTransportClient.Options options; private String rntbdConfigAsString; private ConnectionMode connectionMode; + private String machineId; + + public void withMachineId(String machineId) { + this.machineId = machineId; + } public void withActiveClientCounter(AtomicInteger activeClientsCnt) { this.activeClientsCnt = activeClientsCnt; @@ -178,6 +185,8 @@ public int getClientId() { return this.clientId; } + public String getMachineId() { return this.machineId; } + public int getActiveClientsCount() { return this.activeClientsCnt != null ? this.activeClientsCnt.get() : -1; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 0610c594dce39..07ccb682af3ec 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -12,7 +12,6 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; -import com.azure.cosmos.implementation.ApiType; import com.azure.cosmos.implementation.batch.BatchResponseParser; import com.azure.cosmos.implementation.batch.PartitionKeyRangeServerBatchRequest; import com.azure.cosmos.implementation.batch.ServerBatchRequest; @@ -110,6 +109,7 @@ */ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorizationTokenProvider, CpuMemoryListener, DiagnosticsClientContext { + private static final String tempMachineId = "uuid:" + UUID.randomUUID(); private static final AtomicInteger activeClientsCnt = new AtomicInteger(0); private static final AtomicInteger clientIdGenerator = new AtomicInteger(0); private static final Range RANGE_INCLUDING_ALL_PARTITION_KEY_RANGES = new Range<>( @@ -289,7 +289,7 @@ private RxDocumentClientImpl(URI serviceEndpoint, ApiType apiType) { activeClientsCnt.incrementAndGet(); - this.clientId = clientIdGenerator.getAndDecrement(); + this.clientId = clientIdGenerator.incrementAndGet(); this.diagnosticsClientConfig = new DiagnosticsClientConfig(); this.diagnosticsClientConfig.withClientId(this.clientId); this.diagnosticsClientConfig.withActiveClientCounter(activeClientsCnt); @@ -350,6 +350,7 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.diagnosticsClientConfig.withMultipleWriteRegionsEnabled(this.connectionPolicy.isMultipleWriteRegionsEnabled()); this.diagnosticsClientConfig.withEndpointDiscoveryEnabled(this.connectionPolicy.isEndpointDiscoveryEnabled()); this.diagnosticsClientConfig.withPreferredRegions(this.connectionPolicy.getPreferredRegions()); + this.diagnosticsClientConfig.withMachineId(tempMachineId); boolean disableSessionCapturing = (ConsistencyLevel.SESSION != consistencyLevel && !sessionCapturingOverrideEnabled); @@ -457,7 +458,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func collectionCache); updateGatewayProxy(); - clientTelemetry = new ClientTelemetry(null, UUID.randomUUID().toString(), + clientTelemetry = new ClientTelemetry(this, null, UUID.randomUUID().toString(), ManagementFactory.getRuntimeMXBean().getName(), userAgentContainer.getUserAgent(), connectionPolicy.getConnectionMode(), globalEndpointManager.getLatestDatabaseAccount().getId(), null, null, this.reactorHttpClient, connectionPolicy.isClientTelemetryEnabled(), this, this.connectionPolicy.getPreferredRegions()); @@ -4058,6 +4059,7 @@ private RxStoreModel getStoreProxy(RxDocumentServiceRequest request) { public void close() { logger.info("Attempting to close client {}", this.clientId); if (!closed.getAndSet(true)) { + activeClientsCnt.decrementAndGet(); logger.info("Shutting down ..."); logger.info("Closing Global Endpoint Manager ..."); LifeCycleUtils.closeQuietly(this.globalEndpointManager); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/AzureVMMetadata.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/AzureVMMetadata.java index a94ea77720c23..6ace1a7438f6c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/AzureVMMetadata.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/AzureVMMetadata.java @@ -29,6 +29,10 @@ public String getVmSize() { return compute != null ? compute.getVmSize() : StringUtils.EMPTY; } + public String getVmId() { + return compute != null ? compute.getVmId() : StringUtils.EMPTY; + } + public Compute getCompute() { return compute; } @@ -44,6 +48,7 @@ public static class Compute { private String azEnvironment; private String osType; private String vmSize; + private String vmId; public String getSku() { return sku; @@ -84,5 +89,13 @@ public String getVmSize() { public void setVmSize(String vmSize) { this.vmSize = vmSize; } + + public String getVmId() { + return vmId; + } + + public void setVmId(String vmId) { + this.vmId = vmId; + } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetry.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetry.java index f570393466c4b..8e160adfd9142 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetry.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetry.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.Constants; import com.azure.cosmos.implementation.CosmosDaemonThreadFactory; import com.azure.cosmos.implementation.CosmosSchedulers; +import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.RequestVerb; @@ -47,6 +48,7 @@ import java.util.Map; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; public class ClientTelemetry { public final static int ONE_KB_TO_BYTES = 1024; @@ -78,6 +80,8 @@ public class ClientTelemetry { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final static AtomicLong instanceCount = new AtomicLong(0); + private final static AtomicReference azureVmMetaDataSingleton = + new AtomicReference<>(null); private ClientTelemetryInfo clientTelemetryInfo; private final HttpClient httpClient; private final ScheduledThreadPoolExecutor scheduledExecutorService = new ScheduledThreadPoolExecutor(1, @@ -98,7 +102,8 @@ public class ClientTelemetry { private final IAuthorizationTokenProvider tokenProvider; private final String globalDatabaseAccountName; - public ClientTelemetry(Boolean acceleratedNetworking, + public ClientTelemetry(DiagnosticsClientContext diagnosticsClientContext, + Boolean acceleratedNetworking, String clientId, String processId, String userAgent, @@ -111,8 +116,17 @@ public ClientTelemetry(Boolean acceleratedNetworking, IAuthorizationTokenProvider tokenProvider, List preferredRegions ) { - clientTelemetryInfo = new ClientTelemetryInfo(clientId, processId, userAgent, connectionMode, - globalDatabaseAccountName, applicationRegion, hostEnvInfo, acceleratedNetworking, preferredRegions); + clientTelemetryInfo = new ClientTelemetryInfo( + getMachineId(diagnosticsClientContext), + clientId, + processId, + userAgent, + connectionMode, + globalDatabaseAccountName, + applicationRegion, + hostEnvInfo, + acceleratedNetworking, + preferredRegions); this.isClosed = false; this.httpClient = httpClient; this.isClientTelemetryEnabled = isClientTelemetryEnabled; @@ -125,6 +139,24 @@ public ClientTelemetryInfo getClientTelemetryInfo() { return clientTelemetryInfo; } + public static String getMachineId(DiagnosticsClientContext diagnosticsClientContext) { + AzureVMMetadata metadataSnapshot = azureVmMetaDataSingleton.get(); + + if (metadataSnapshot != null && metadataSnapshot.getVmId() != null) { + String machineId = "vmId:" + metadataSnapshot.getVmId(); + if (diagnosticsClientContext != null) { + diagnosticsClientContext.getConfig().withMachineId(machineId); + } + return machineId; + } + + if (diagnosticsClientContext == null) { + return ""; + } + + return diagnosticsClientContext.getConfig().getMachineId(); + } + public static void recordValue(ConcurrentDoubleHistogram doubleHistogram, long value) { try { doubleHistogram.recordValue(value); @@ -239,7 +271,21 @@ private Mono sendClientTelemetry() { }).subscribeOn(scheduler); } + private void populateAzureVmMetaData(AzureVMMetadata azureVMMetadata) { + this.clientTelemetryInfo.setApplicationRegion(azureVMMetadata.getLocation()); + this.clientTelemetryInfo.setMachineId("vmId:" + azureVMMetadata.getVmId()); + this.clientTelemetryInfo.setHostEnvInfo(azureVMMetadata.getOsType() + "|" + azureVMMetadata.getSku() + + "|" + azureVMMetadata.getVmSize() + "|" + azureVMMetadata.getAzEnvironment()); + } + private void loadAzureVmMetaData() { + AzureVMMetadata metadataSnapshot = azureVmMetaDataSingleton.get(); + + if (metadataSnapshot != null) { + this.populateAzureVmMetaData(metadataSnapshot); + return; + } + URI targetEndpoint = null; try { targetEndpoint = new URI(AZURE_VM_METADATA); @@ -253,16 +299,16 @@ private void loadAzureVmMetaData() { HttpRequest httpRequest = new HttpRequest(HttpMethod.GET, targetEndpoint, targetEndpoint.getPort(), httpHeaders); Mono httpResponseMono = this.httpClient.send(httpRequest); - httpResponseMono.flatMap(response -> response.bodyAsString()).map(metadataJson -> parse(metadataJson, - AzureVMMetadata.class)).doOnSuccess(azureVMMetadata -> { - this.clientTelemetryInfo.setApplicationRegion(azureVMMetadata.getLocation()); - this.clientTelemetryInfo.setHostEnvInfo(azureVMMetadata.getOsType() + "|" + azureVMMetadata.getSku() + - "|" + azureVMMetadata.getVmSize() + "|" + azureVMMetadata.getAzEnvironment()); - }).onErrorResume(throwable -> { - logger.info("Client is not on azure vm"); - logger.debug("Unable to get azure vm metadata", throwable); - return Mono.empty(); - }).subscribe(); + httpResponseMono + .flatMap(response -> response.bodyAsString()).map(metadataJson -> parse(metadataJson, + AzureVMMetadata.class)).doOnSuccess(metadata -> { + azureVmMetaDataSingleton.compareAndSet(null, metadata); + this.populateAzureVmMetaData(metadata); + }).onErrorResume(throwable -> { + logger.info("Client is not on azure vm"); + logger.debug("Unable to get azure vm metadata", throwable); + return Mono.empty(); + }).subscribe(); } private static T parse(String itemResponseBodyAsString, Class itemClassType) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetryInfo.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetryInfo.java index 214c226124dd6..43e3727e08fdc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetryInfo.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetryInfo.java @@ -14,6 +14,7 @@ @JsonSerialize(using = ClientTelemetrySerializer.class) public class ClientTelemetryInfo { private String timeStamp; + private String machineId; private String clientId; private String processId; private String userAgent; @@ -28,7 +29,8 @@ public class ClientTelemetryInfo { private Map cacheRefreshInfoMap; private Map operationInfoMap; - public ClientTelemetryInfo(String clientId, + public ClientTelemetryInfo(String machineId, + String clientId, String processId, String userAgent, ConnectionMode connectionMode, @@ -37,6 +39,7 @@ public ClientTelemetryInfo(String clientId, String hostEnvInfo, Boolean acceleratedNetworking, List preferredRegions) { + this.machineId = machineId; this.clientId = clientId; this.processId = processId; this.userAgent = userAgent; @@ -108,6 +111,14 @@ public void setApplicationRegion(String applicationRegion) { this.applicationRegion = applicationRegion; } + public String getMachineId() { + return machineId; + } + + public void setMachineId(String machineId) { + this.machineId = machineId; + } + public String getHostEnvInfo() { return hostEnvInfo; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetrySerializer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetrySerializer.java index 7fd4a0f6feb5d..ca6e855175acb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetrySerializer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetrySerializer.java @@ -20,6 +20,7 @@ public class ClientTelemetrySerializer extends StdSerializer