Skip to content

Commit

Permalink
Add telemetry for repository usage (#112133)
Browse files Browse the repository at this point in the history
Adds to the `GET _cluster/stats` endpoint information about the snapshot
repositories in use, including their types, whether they are read-only
or read-write, and for Azure repositories the kind of credentials in
use.
  • Loading branch information
DaveCTurner authored Aug 27, 2024
1 parent fb32adc commit f150e2c
Show file tree
Hide file tree
Showing 27 changed files with 400 additions and 37 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/112133.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112133
summary: Add telemetry for repository usage
area: Snapshot/Restore
type: enhancement
issues: []
31 changes: 30 additions & 1 deletion docs/reference/cluster/stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,31 @@ They are included here for expert users, but should otherwise be ignored.
=====

====
`repositories`::
(object) Contains statistics about the <<snapshot-restore,snapshot>> repositories defined in the cluster, broken down
by repository type.
+
.Properties of `repositories`
[%collapsible%open]
=====

`count`:::
(integer) The number of repositories of this type in the cluster.

`read_only`:::
(integer) The number of repositories of this type in the cluster which are registered read-only.

`read_write`:::
(integer) The number of repositories of this type in the cluster which are not registered as read-only.

Each repository type may also include other statistics about the repositories of that type here.

=====
====

[[cluster-stats-api-example]]
==== {api-examples-title}

Expand Down Expand Up @@ -1579,6 +1604,9 @@ The API returns the following response:
},
"snapshots": {
...
},
"repositories": {
...
}
}
--------------------------------------------------
Expand All @@ -1589,6 +1617,7 @@ The API returns the following response:
// TESTRESPONSE[s/"count": \{[^\}]*\}/"count": $body.$_path/]
// TESTRESPONSE[s/"packaging_types": \[[^\]]*\]/"packaging_types": $body.$_path/]
// TESTRESPONSE[s/"snapshots": \{[^\}]*\}/"snapshots": $body.$_path/]
// TESTRESPONSE[s/"repositories": \{[^\}]*\}/"repositories": $body.$_path/]
// TESTRESPONSE[s/"field_types": \[[^\]]*\]/"field_types": $body.$_path/]
// TESTRESPONSE[s/"runtime_field_types": \[[^\]]*\]/"runtime_field_types": $body.$_path/]
// TESTRESPONSE[s/"search": \{[^\}]*\}/"search": $body.$_path/]
Expand All @@ -1600,7 +1629,7 @@ The API returns the following response:
// the plugins that will be in it. And because we figure folks don't need to
// see an exhaustive list anyway.
// 2. Similarly, ignore the contents of `network_types`, `discovery_types`,
// `packaging_types` and `snapshots`.
// `packaging_types`, `snapshots` and `repositories`.
// 3. Ignore the contents of the (nodes) count object, as what's shown here
// depends on the license. Voting-only nodes are e.g. only shown when this
// test runs with a basic license.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;

import static org.elasticsearch.core.Strings.format;
Expand Down Expand Up @@ -175,4 +176,9 @@ protected ByteSizeValue chunkSize() {
public boolean isReadOnly() {
return readonly;
}

@Override
protected Set<String> getExtraUsageFeatures() {
return storageService.getExtraUsageFeatures(Repository.CLIENT_NAME.get(getMetadata().settings()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.net.Proxy;
import java.net.URL;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;

import static java.util.Collections.emptyMap;
Expand Down Expand Up @@ -165,4 +166,15 @@ public void refreshSettings(Map<String, AzureStorageSettings> clientsSettings) {
this.storageSettings = Map.copyOf(clientsSettings);
// clients are built lazily by {@link client(String, LocationMode)}
}

/**
* For Azure repositories, we report the different kinds of credentials in use in the telemetry.
*/
public Set<String> getExtraUsageFeatures(String clientName) {
try {
return getClientSettings(clientName).credentialsUsageFeatures();
} catch (Exception e) {
return Set.of();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

final class AzureStorageSettings {

Expand Down Expand Up @@ -130,6 +131,7 @@ final class AzureStorageSettings {
private final int maxRetries;
private final Proxy proxy;
private final boolean hasCredentials;
private final Set<String> credentialsUsageFeatures;

private AzureStorageSettings(
String account,
Expand All @@ -150,6 +152,12 @@ private AzureStorageSettings(
this.endpointSuffix = endpointSuffix;
this.timeout = timeout;
this.maxRetries = maxRetries;
this.credentialsUsageFeatures = Strings.hasText(key) ? Set.of("uses_key_credentials")
: Strings.hasText(sasToken) ? Set.of("uses_sas_token")
: SocketAccess.doPrivilegedException(() -> System.getenv("AZURE_FEDERATED_TOKEN_FILE")) == null
? Set.of("uses_default_credentials", "uses_managed_identity")
: Set.of("uses_default_credentials", "uses_workload_identity");

// Register the proxy if we have any
// Validate proxy settings
if (proxyType.equals(Proxy.Type.DIRECT) && ((proxyPort != 0) || Strings.hasText(proxyHost))) {
Expand Down Expand Up @@ -366,4 +374,8 @@ private String deriveURIFromSettings(boolean isPrimary) {
throw new IllegalArgumentException(e);
}
}

public Set<String> credentialsUsageFeatures() {
return credentialsUsageFeatures;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.azure.count: 1 }
- gte: { repositories.azure.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.gcs.count: 1 }
- gte: { repositories.gcs.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.s3.count: 1 }
- gte: { repositories.s3.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.s3.count: 1 }
- gte: { repositories.s3.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.s3.count: 1 }
- gte: { repositories.s3.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.s3.count: 1 }
- gte: { repositories.s3.read_write: 1 }

---
teardown:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,19 @@ setup:
snapshot: missing
wait_for_completion: true

---
"Usage stats":
- requires:
cluster_features:
- repositories.supports_usage_stats
reason: requires this feature

- do:
cluster.stats: {}

- gte: { repositories.s3.count: 1 }
- gte: { repositories.s3.read_write: 1 }

---
teardown:

Expand Down
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@
org.elasticsearch.cluster.metadata.MetadataFeatures,
org.elasticsearch.rest.RestFeatures,
org.elasticsearch.indices.IndicesFeatures,
org.elasticsearch.repositories.RepositoriesFeatures,
org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures,
org.elasticsearch.index.mapper.MapperFeatures,
org.elasticsearch.ingest.IngestGeoIpFeatures,
Expand Down
2 changes: 2 additions & 0 deletions server/src/main/java/org/elasticsearch/TransportVersions.java
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ static TransportVersion def(int id) {
public static final TransportVersion RANK_DOCS_RETRIEVER = def(8_729_00_0);
public static final TransportVersion ESQL_ES_FIELD_CACHED_SERIALIZATION = def(8_730_00_0);
public static final TransportVersion ADD_MANAGE_ROLES_PRIVILEGE = def(8_731_00_0);
public static final TransportVersion REPOSITORIES_TELEMETRY = def(8_732_00_0);

/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,33 @@
import org.elasticsearch.core.Nullable;

import java.io.IOException;
import java.util.Objects;

public class ClusterStatsNodeResponse extends BaseNodeResponse {

private final NodeInfo nodeInfo;
private final NodeStats nodeStats;
private final ShardStats[] shardsStats;
private ClusterHealthStatus clusterStatus;
private final ClusterHealthStatus clusterStatus;
private final SearchUsageStats searchUsageStats;
private final RepositoryUsageStats repositoryUsageStats;

public ClusterStatsNodeResponse(StreamInput in) throws IOException {
super(in);
clusterStatus = null;
if (in.readBoolean()) {
clusterStatus = ClusterHealthStatus.readFrom(in);
}
this.clusterStatus = in.readOptionalWriteable(ClusterHealthStatus::readFrom);
this.nodeInfo = new NodeInfo(in);
this.nodeStats = new NodeStats(in);
shardsStats = in.readArray(ShardStats::new, ShardStats[]::new);
this.shardsStats = in.readArray(ShardStats::new, ShardStats[]::new);
if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_6_0)) {
searchUsageStats = new SearchUsageStats(in);
} else {
searchUsageStats = new SearchUsageStats();
}
if (in.getTransportVersion().onOrAfter(TransportVersions.REPOSITORIES_TELEMETRY)) {
repositoryUsageStats = RepositoryUsageStats.readFrom(in);
} else {
repositoryUsageStats = RepositoryUsageStats.EMPTY;
}
}

public ClusterStatsNodeResponse(
Expand All @@ -51,14 +55,16 @@ public ClusterStatsNodeResponse(
NodeInfo nodeInfo,
NodeStats nodeStats,
ShardStats[] shardsStats,
SearchUsageStats searchUsageStats
SearchUsageStats searchUsageStats,
RepositoryUsageStats repositoryUsageStats
) {
super(node);
this.nodeInfo = nodeInfo;
this.nodeStats = nodeStats;
this.shardsStats = shardsStats;
this.clusterStatus = clusterStatus;
this.searchUsageStats = searchUsageStats;
this.searchUsageStats = Objects.requireNonNull(searchUsageStats);
this.repositoryUsageStats = Objects.requireNonNull(repositoryUsageStats);
}

public NodeInfo nodeInfo() {
Expand All @@ -85,20 +91,22 @@ public SearchUsageStats searchUsageStats() {
return searchUsageStats;
}

public RepositoryUsageStats repositoryUsageStats() {
return repositoryUsageStats;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
if (clusterStatus == null) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeByte(clusterStatus.value());
}
out.writeOptionalWriteable(clusterStatus);
nodeInfo.writeTo(out);
nodeStats.writeTo(out);
out.writeArray(shardsStats);
if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_6_0)) {
searchUsageStats.writeTo(out);
}
if (out.getTransportVersion().onOrAfter(TransportVersions.REPOSITORIES_TELEMETRY)) {
repositoryUsageStats.writeTo(out);
} // else just drop these stats, ok for bwc
}
}
Loading

0 comments on commit f150e2c

Please sign in to comment.