Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Profiling] Switch to OTEL cloud.provider, cloud.region, host.type #106656

Merged
merged 2 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,28 @@
"type": "date",
"format": "epoch_second"
},
"host.id": {
"type": "keyword"
"host": {
"properties": {
"arch": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"type": {
"type": "keyword"
}
}
},
"host.arch": {
"type": "keyword"
"cloud": {
"properties": {
"provider": {
"type": "keyword"
},
"region": {
"type": "keyword"
}
}
},
"profiling": {
"properties": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"create": {"_index": "profiling-hosts","_id":"eLH27YsBj2lLi3tJYlvr"}}
{"profiling.project.id":100,"host.id":"8457605156473051743","@timestamp":1700504426,"ecs.version":"1.12.0","profiling.agent.build_timestamp":1688111067,"profiling.instance.private_ipv4s":["192.168.1.2"],"ec2.instance_life_cycle":"on-demand","profiling.agent.config.map_scale_factor":0,"ec2.instance_type":"i3.2xlarge","profiling.host.ip":"192.168.1.2","profiling.agent.config.bpf_log_level":0,"profiling.host.sysctl.net.core.bpf_jit_enable":1,"profiling.agent.config.file":"/etc/prodfiler/prodfiler.conf","ec2.local_ipv4":"192.168.1.2","profiling.agent.config.no_kernel_version_check":false,"host.arch":"amd64","profiling.host.tags":["cloud_provider:aws","cloud_environment:qa","cloud_region:eu-west-1"],"profiling.agent.config.probabilistic_threshold":100,"profiling.agent.config.disable_tls":false,"profiling.agent.config.tracers":"all","profiling.agent.start_time":1700090045589,"profiling.agent.config.max_elements_per_interval":800,"ec2.placement.region":"eu-west-1","profiling.agent.config.present_cpu_cores":8,"profiling.host.kernel_version":"9.9.9-0-aws","profiling.agent.config.bpf_log_size":65536,"profiling.agent.config.known_traces_entries":65536,"profiling.host.sysctl.kernel.unprivileged_bpf_disabled":1,"profiling.agent.config.verbose":false,"profiling.agent.config.probabilistic_interval":"1m0s","ec2.placement.availability_zone_id":"euw1-az1","ec2.security_groups":"","ec2.local_hostname":"ip-192-168-1-2.eu-west-1.compute.internal","ec2.placement.availability_zone":"eu-west-1c","profiling.agent.config.upload_symbols":false,"profiling.host.sysctl.kernel.bpf_stats_enabled":0,"profiling.host.name":"ip-192-168-1-2","ec2.mac":"00:11:22:33:44:55","profiling.host.kernel_proc_version":"Linux version 9.9.9-0-aws","profiling.agent.config.cache_directory":"/var/cache/optimyze/","profiling.agent.version":"v8.12.0","ec2.hostname":"ip-192-168-1-2.eu-west-1.compute.internal","profiling.agent.config.elastic_mode":false,"ec2.ami_id":"ami-aaaaaaaaaaa","ec2.instance_id":"i-0b999999999999999"}
{"profiling.project.id":100,"host.id":"8457605156473051743","@timestamp":1700504426,"ecs.version":"1.12.0","profiling.agent.build_timestamp":1688111067,"profiling.instance.private_ipv4s":["192.168.1.2"],"ec2.instance_life_cycle":"on-demand","profiling.agent.config.map_scale_factor":0,"host.type":"i3.2xlarge","profiling.host.ip":"192.168.1.2","profiling.agent.config.bpf_log_level":0,"profiling.host.sysctl.net.core.bpf_jit_enable":1,"profiling.agent.config.file":"/etc/prodfiler/prodfiler.conf","ec2.local_ipv4":"192.168.1.2","profiling.agent.config.no_kernel_version_check":false,"host.arch":"amd64","profiling.host.tags":["cloud_provider:aws","cloud_environment:qa","cloud_region:eu-west-1"],"profiling.agent.config.probabilistic_threshold":100,"profiling.agent.config.disable_tls":false,"profiling.agent.config.tracers":"all","profiling.agent.start_time":1700090045589,"profiling.agent.config.max_elements_per_interval":800,"cloud.provider":"aws","cloud.region":"eu-west-1","profiling.agent.config.present_cpu_cores":8,"profiling.host.kernel_version":"9.9.9-0-aws","profiling.agent.config.bpf_log_size":65536,"profiling.agent.config.known_traces_entries":65536,"profiling.host.sysctl.kernel.unprivileged_bpf_disabled":1,"profiling.agent.config.verbose":false,"profiling.agent.config.probabilistic_interval":"1m0s","ec2.placement.availability_zone_id":"euw1-az1","ec2.security_groups":"","ec2.local_hostname":"ip-192-168-1-2.eu-west-1.compute.internal","ec2.placement.availability_zone":"eu-west-1c","profiling.agent.config.upload_symbols":false,"profiling.host.sysctl.kernel.bpf_stats_enabled":0,"profiling.host.name":"ip-192-168-1-2","ec2.mac":"00:11:22:33:44:55","profiling.host.kernel_proc_version":"Linux version 9.9.9-0-aws","profiling.agent.config.cache_directory":"/var/cache/optimyze/","profiling.agent.version":"v8.12.0","ec2.hostname":"ip-192-168-1-2.eu-west-1.compute.internal","profiling.agent.config.elastic_mode":false,"ec2.ami_id":"ami-aaaaaaaaaaa","ec2.instance_id":"i-0b999999999999999"}
{"create": {"_index": "profiling-hosts", "_id": "u_fHlYwBkmZvQ6tVo1Lr"}}
{"profiling.project.id":100,"host.id":"7416508186220657211","@timestamp":1703319912,"ecs.version":"1.12.0","profiling.agent.version":"8.11.0","profiling.agent.config.map_scale_factor":0,"profiling.agent.config.probabilistic_threshold":100,"profiling.host.name":"ip-192-186-1-3","profiling.agent.config.no_kernel_version_check":false,"profiling.host.sysctl.net.core.bpf_jit_enable":1,"profiling.agent.config.elastic_mode":false,"azure.compute.vmsize":"Standard_D4s_v3","azure.compute.environment":"AzurePublicCloud","profiling.agent.config.bpf_log_level":0,"profiling.agent.config.known_traces_entries":65536,"profiling.agent.config.ca_address":"example.com:443","profiling.agent.config.tags":"cloud_provider:azure;cloud_environment:qa;cloud_region:eastus2","profiling.host.tags":["cloud_provider:azure","cloud_environment:qa","cloud_region:eastus2"],"profiling.host.kernel_version":"9.9.9-0-azure","profiling.agent.revision":"head-52cc2030","azure.compute.subscriptionid":"1-2-3-4-5","profiling.host.sysctl.kernel.bpf_stats_enabled":0,"host.arch":"amd64","azure.compute.zone":"3","profiling.agent.config.cache_directory":"/var/cache/Elastic/universal-profiling","azure.compute.name":"example-qa-eastus2-001-v1-zone3_6","profiling.agent.config.probabilistic_interval":"1m0s","azure.compute.location":"eastus2","azure.compute.version":"1234.20230510.233254","profiling.instance.private_ipv4s":["192.168.1.3"],"profiling.agent.build_timestamp":1699000836,"profiling.agent.config.file":"/etc/Elastic/universal-profiling/pf-host-agent.conf","profiling.agent.config.bpf_log_size":65536,"profiling.host.sysctl.kernel.unprivileged_bpf_disabled":1,"profiling.agent.config.tracers":"all","profiling.agent.config.present_cpu_cores":4,"profiling.agent.start_time":1702306987358,"profiling.agent.config.disable_tls":false,"azure.compute.ostype":"Linux","profiling.host.ip":"192.168.1.3","profiling.agent.config.max_elements_per_interval":400,"profiling.agent.config.upload_symbols":false,"azure.compute.tags":"bootstrap-version:v1;ece-id:001;environment:qa;identifier:v1;initial-config:;managed-by:terraform;monitored-by:core-infrastructure;owner:core-infrastructure;region_type:ess;role:blueprint;secondary_role:;vars-identifier:eastus2-001-v1","profiling.host.kernel_proc_version":"Linux version 9.9.9-0-azure","profiling.agent.config.verbose":false,"azure.compute.vmid":"1-2-3-4-5"}
{"profiling.project.id":100,"host.id":"7416508186220657211","@timestamp":1703319912,"ecs.version":"1.12.0","profiling.agent.version":"8.11.0","profiling.agent.config.map_scale_factor":0,"profiling.agent.config.probabilistic_threshold":100,"profiling.host.name":"ip-192-186-1-3","profiling.agent.config.no_kernel_version_check":false,"profiling.host.sysctl.net.core.bpf_jit_enable":1,"profiling.agent.config.elastic_mode":false,"host.type":"Standard_D4s_v3","azure.compute.environment":"AzurePublicCloud","profiling.agent.config.bpf_log_level":0,"profiling.agent.config.known_traces_entries":65536,"profiling.agent.config.ca_address":"example.com:443","profiling.agent.config.tags":"cloud_provider:azure;cloud_environment:qa;cloud_region:eastus2","profiling.host.tags":["cloud_provider:azure","cloud_environment:qa","cloud_region:eastus2"],"profiling.host.kernel_version":"9.9.9-0-azure","profiling.agent.revision":"head-52cc2030","azure.compute.subscriptionid":"1-2-3-4-5","profiling.host.sysctl.kernel.bpf_stats_enabled":0,"host.arch":"amd64","azure.compute.zone":"3","profiling.agent.config.cache_directory":"/var/cache/Elastic/universal-profiling","azure.compute.name":"example-qa-eastus2-001-v1-zone3_6","profiling.agent.config.probabilistic_interval":"1m0s","cloud.provider":"azure","cloud.region":"eastus2","azure.compute.version":"1234.20230510.233254","profiling.instance.private_ipv4s":["192.168.1.3"],"profiling.agent.build_timestamp":1699000836,"profiling.agent.config.file":"/etc/Elastic/universal-profiling/pf-host-agent.conf","profiling.agent.config.bpf_log_size":65536,"profiling.host.sysctl.kernel.unprivileged_bpf_disabled":1,"profiling.agent.config.tracers":"all","profiling.agent.config.present_cpu_cores":4,"profiling.agent.start_time":1702306987358,"profiling.agent.config.disable_tls":false,"azure.compute.ostype":"Linux","profiling.host.ip":"192.168.1.3","profiling.agent.config.max_elements_per_interval":400,"profiling.agent.config.upload_symbols":false,"azure.compute.tags":"bootstrap-version:v1;ece-id:001;environment:qa;identifier:v1;initial-config:;managed-by:terraform;monitored-by:core-infrastructure;owner:core-infrastructure;region_type:ess;role:blueprint;secondary_role:;vars-identifier:eastus2-001-v1","profiling.host.kernel_proc_version":"Linux version 9.9.9-0-azure","profiling.agent.config.verbose":false,"azure.compute.vmid":"1-2-3-4-5"}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,45 @@ final class InstanceType implements ToXContentObject {
* @return the {@link InstanceType}
*/
public static InstanceType fromHostSource(Map<String, Object> source) {
String provider = (String) source.get("cloud.provider");
if (provider != null) {
String region = (String) source.get("cloud.region");
String instanceType = (String) source.get("host.type");
return new InstanceType(provider, region, instanceType);
}

// Check and handle pre-8.14.0 host sources for backwards-compatibility.
InstanceType instanceType = fromObsoleteHostSource(source);
if (instanceType != null) {
return instanceType;
}

// Support for configured tags (ECS).
// Example of tags:
// "profiling.host.tags": [
// "cloud_provider:aws",
// "cloud_environment:qa",
// "cloud_region:eu-west-1",
// ],
String region = null;
List<String> tags = listOf(source.get("profiling.host.tags"));
for (String tag : tags) {
String[] kv = tag.toLowerCase(Locale.ROOT).split(":", 2);
if (kv.length != 2) {
continue;
}
if ("cloud_provider".equals(kv[0])) {
provider = kv[1];
}
if ("cloud_region".equals(kv[0])) {
region = kv[1];
}
}

return new InstanceType(provider, region, null);
}

private static InstanceType fromObsoleteHostSource(Map<String, Object> source) {
// Check and handle AWS.
String region = (String) source.get("ec2.placement.region");
if (region != null) {
Expand Down Expand Up @@ -67,30 +106,7 @@ public static InstanceType fromHostSource(Map<String, Object> source) {
return new InstanceType("azure", region, instanceType);
}

// Support for configured tags (ECS).
// Example of tags:
// "profiling.host.tags": [
// "cloud_provider:aws",
// "cloud_environment:qa",
// "cloud_region:eu-west-1",
// ],
String provider = null;
region = null;
List<String> tags = listOf(source.get("profiling.host.tags"));
for (String tag : tags) {
String[] kv = tag.toLowerCase(Locale.ROOT).split(":", 2);
if (kv.length != 2) {
continue;
}
if ("cloud_provider".equals(kv[0])) {
provider = kv[1];
}
if ("cloud_region".equals(kv[0])) {
region = kv[1];
}
}

return new InstanceType(provider, region, null);
return null;
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ public class ProfilingIndexTemplateRegistry extends IndexTemplateRegistry {
// version 4: Added 'service.name' keyword mapping to profiling-events
// version 5: Add optional component template '<idx-name>@custom' to all index templates that reference component templates
// version 6: Added 'host.arch' keyword mapping to profiling-hosts
public static final int INDEX_TEMPLATE_VERSION = 6;
// version 7: Added 'host.type', 'cloud.provider', 'cloud.region' keyword mappings to profiling-hosts
public static final int INDEX_TEMPLATE_VERSION = 7;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this in the last review but it is actually not required that we bump the index template version on every change. Instead, it is sufficient to bump it once per release because no cluster can observe the intermediate index template versions.

Long story short: Can you please change this value to 5 (which applies to 8.14.0) and merge the comments accordingly to include all changes that went into version 5?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good point I already thought of. My conclusion was that I do not know whether version 5 or 6 has already been deployed (locally) and used by developers. And if you don't know that, why risking to break things even if it is just one or two people?

What is the downside of increasing the version several times per release cycle?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, it's less risky to bump the version all the time, even if this means version jumps between releases (which are harmless) so let's stick to this convention in the future.


// history for individual indices / index templates. Only bump these for breaking changes that require to create a new index
public static final int PROFILING_EVENTS_VERSION = 2;
public static final int PROFILING_EXECUTABLES_VERSION = 1;
public static final int PROFILING_METRICS_VERSION = 1;
public static final int PROFILING_HOSTS_VERSION = 1;
public static final int PROFILING_HOSTS_VERSION = 2;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided for a roll-over since it's easier to argue about changes to be done in the collector.

public static final int PROFILING_STACKFRAMES_VERSION = 1;
public static final int PROFILING_STACKTRACES_VERSION = 1;
public static final int PROFILING_SYMBOLS_VERSION = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,41 @@

public class HostMetadataTests extends ESTestCase {
public void testCreateFromSourceAWS() {
final String hostID = "1440256254710195396";
final String arch = "amd64";
final String provider = "aws";
final String region = "eu-west-1";
final String instanceType = "md5x.large";

// tag::noformat
HostMetadata host = HostMetadata.fromSource (
Map.of (
"host.id", hostID,
"host.arch", arch,
"host.type", instanceType,
"cloud.provider", provider,
"cloud.region", region
)
);
// end::noformat

assertEquals(hostID, host.hostID);
assertEquals(arch, host.hostArchitecture);
assertEquals(provider, host.instanceType.provider);
assertEquals(region, host.instanceType.region);
assertEquals(instanceType, host.instanceType.name);
}

public void testCreateFromSourceAWSCompat() {
final String hostID = "1440256254710195396";
final String arch = "x86_64";
final String provider = "aws";
final String region = "eu-west-1";
final String instanceType = "md5x.large";

// tag::noformat
HostMetadata host = HostMetadata.fromSource(
Map.of(
HostMetadata host = HostMetadata.fromSource (
Map.of (
"host.id", hostID,
"host.arch", arch,
"ec2.instance_type", instanceType,
Expand All @@ -39,6 +65,32 @@ public void testCreateFromSourceAWS() {
}

public void testCreateFromSourceGCP() {
final String hostID = "1440256254710195396";
final String arch = "amd64";
final String provider = "gcp";
final String[] regions = { "", "", "europe-west1", "europewest", "europe-west1" };

for (String region : regions) {
// tag::noformat
HostMetadata host = HostMetadata.fromSource (
Map.of (
"host.id", hostID,
"host.arch", arch,
"cloud.provider", provider,
"cloud.region", region
)
);
// end::noformat

assertEquals(hostID, host.hostID);
assertEquals(arch, host.hostArchitecture);
assertEquals(provider, host.instanceType.provider);
assertEquals(region, host.instanceType.region);
assertEquals("", host.instanceType.name);
}
}

public void testCreateFromSourceGCPCompat() {
final String hostID = "1440256254710195396";
final String arch = "x86_64";
final String provider = "gcp";
Expand Down Expand Up @@ -142,8 +194,8 @@ public void testCreateFromSourceECS() {
Map.of(
"host.id", hostID,
"host.arch", arch,
"profiling.host.tags", Arrays.asList(
"cloud_provider:"+provider, "cloud_environment:qa", "cloud_region:"+region)
"profiling.host.tags", Arrays.asList (
"cloud_provider:" + provider, "cloud_environment:qa", "cloud_region:" + region)
)
);
// end::noformat
Expand Down
Loading