From d7abb542d46ed189a0135617c99ce356f1b692de Mon Sep 17 00:00:00 2001 From: Patrice Chalin Date: Fri, 7 Jul 2023 08:41:31 -0400 Subject: [PATCH] Update semantic-conventions (#2982) --- .htmltest.yml | 4 + content-modules/semantic-conventions | 2 +- content/en/docs/specs/semconv/_index.md | 6 - hugo.yaml | 10 +- scripts/content-modules/adjust-pages.pl | 106 +++++++++++- scripts/content-modules/cp-pages.sh | 4 +- scripts/content-modules/normalize-titles.pl | 180 ++++++++++++++++++++ static/refcache.json | 52 ++++++ 8 files changed, 345 insertions(+), 19 deletions(-) delete mode 100644 content/en/docs/specs/semconv/_index.md create mode 100755 scripts/content-modules/normalize-titles.pl diff --git a/.htmltest.yml b/.htmltest.yml index dee278b00fe0..d29cf03b3de6 100644 --- a/.htmltest.yml +++ b/.htmltest.yml @@ -33,6 +33,10 @@ IgnoreURLs: # list of regexs of paths or URLs to be ignored # Ignore links to GH repo content for now. - ^https?://github\.com/.*?/.*?/(blob|tree)/ + # Too many redirects as the server tries to figure out the country and language, + # e.g.: https://www.microsoft.com/en-ca/sql-server. + - ^https://www.microsoft.com/sql-server$ + # TODO: drop after fix to https://github.com/rust-lang/crates.io/issues/788 - ^https://crates\.io/crates # TODO: drop after fix to https://github.com/micrometer-metrics/micrometer-docs/issues/239 diff --git a/content-modules/semantic-conventions b/content-modules/semantic-conventions index 2246279243d7..4142dd35ddbe 160000 --- a/content-modules/semantic-conventions +++ b/content-modules/semantic-conventions @@ -1 +1 @@ -Subproject commit 2246279243d743c6e073470f4e3551826f3115bc +Subproject commit 4142dd35ddbe984fa685abcfd4da7ca0ed1a2ac4 diff --git a/content/en/docs/specs/semconv/_index.md b/content/en/docs/specs/semconv/_index.md deleted file mode 100644 index a4f31fd464aa..000000000000 --- a/content/en/docs/specs/semconv/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: OpenTelemetry Semantic Conventions -linkTitle: Semantic Conventions -cascade: - draft: true ---- diff --git a/hugo.yaml b/hugo.yaml index 2151f5273937..be846c108599 100644 --- a/hugo.yaml +++ b/hugo.yaml @@ -187,14 +187,8 @@ module: target: content/docs/specs/otlp/_index.md - source: tmp/otlp/docs/img target: content/docs/specs/otlp/img - - source: tmp/semconv/specification/logs - target: content/docs/specs/semconv/logs - - source: tmp/semconv/specification/metrics - target: content/docs/specs/semconv/metrics - - source: tmp/semconv/specification/resource - target: content/docs/specs/semconv/resource - - source: tmp/semconv/specification/trace - target: content/docs/specs/semconv/trace + - source: tmp/semconv/docs + target: content/docs/specs/semconv - source: tmp/community/mission-vision-values.md target: content/community/mission.md - source: tmp/community/roadmap.md diff --git a/scripts/content-modules/adjust-pages.pl b/scripts/content-modules/adjust-pages.pl index a8cb70cf13fe..aafe59fdd1d8 100755 --- a/scripts/content-modules/adjust-pages.pl +++ b/scripts/content-modules/adjust-pages.pl @@ -14,6 +14,7 @@ my $otelSpecRepoUrl = 'https://github.com/open-telemetry/opentelemetry-specification'; my $otlpSpecRepoUrl = 'https://github.com/open-telemetry/opentelemetry-proto'; my $opAmpSpecRepoUrl = 'https://github.com/open-telemetry/opamp-spec'; +my $semconvSpecRepoUrl = 'https://github.com/open-telemetry/semantic-conventions'; my $semConvRef = "$otelSpecRepoUrl/blob/main/semantic_conventions/README.md"; my $specBasePath = '/docs/specs'; my $path_base_for_github_subdir = "content/en$specBasePath"; @@ -23,7 +24,7 @@ ); my $otelSpecVers = $versions{'spec:'}; my $otlpSpecVers = $versions{'otlp:'}; -my $unused; + # TODO: remove once OpAMP spec has been updated my $opampFrontMatter = << "EOS"; title: Open Agent Management Protocol @@ -36,6 +37,47 @@ to: specification.md EOS +# TODO: remove once Semconv spec has been updated +my $semconvFrontMatter = << "EOS"; +linkTitle: Semantic Conventions +no_list: true +cascade: + body_class: otel-docs-spec + github_repo: &repo $semconvSpecRepoUrl + github_subdir: docs + path_base_for_github_subdir: content/en/docs/specs/semconv/ + github_project_repo: *repo + draft: true +EOS + +# Adjust semconv title capitalization +sub toTitleCase($) { + my $str = shift; + my @specialCaseWords = qw( + CloudEvents + CouchDB + DynamoDB + FaaS + GraphQL + gRPC + HBase + MongoDB + OpenTelemetry + RabbitMQ + RocketMQ + ); + my %specialCases = map { lc($_) => $_ } @specialCaseWords; + while ($str =~ /(\b[A-Z]+\b)/g) { + $specialCases{lc $1} = $1; + } + $str =~ s/(\w+)/\u\L$1/g; + while (my ($key, $value) = each %specialCases) { + $str =~ s/\b\u\L$key\b/$value/g; + } + $str =~ s/\b(A|And|As|For|In|On)\b/\L$1/g; + return $str; +} + sub printTitleAndFrontMatter() { print "---\n"; if ($title eq 'OpenTelemetry Specification') { @@ -49,10 +91,52 @@ () $frontMatterFromFile .= "weight: 20\n" if $frontMatterFromFile !~ /^\s*weight/; } elsif ($title eq 'OpAMP: Open Agent Management Protocol') { $frontMatterFromFile = $opampFrontMatter unless $frontMatterFromFile; + } elsif ($title eq 'OpenTelemetry Semantic Conventions') { + $frontMatterFromFile = $semconvFrontMatter unless $frontMatterFromFile; + } elsif ($ARGV =~ /tmp\/semconv\/docs/) { + $title = toTitleCase($title); + $linkTitle = 'Database' if $title =~ /Database Calls and Systems$/i; + if ($linkTitle =~ /^Database (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^FaaS (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^HTTP (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^Microsoft (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^RPC (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^(Exceptions|Feature Flags) .. (.*)$/i) { + $linkTitle = "$2"; + } + if ($linkTitle =~ /^(.*) Attributes$/i && $title ne 'General Attributes') { + $linkTitle = "$1"; + } + $linkTitle = 'Attributes' if $title eq 'General Attributes'; + $linkTitle = 'Events' if $linkTitle eq 'Event'; + $linkTitle = 'Logs' if $title =~ /Logs Attributes$/; + $linkTitle = 'Connect' if $title =~ /Connect RPC$/; + $linkTitle = 'SQL' if $title =~ /SQL Databases$/; + $title = 'Semantic Conventions for Function-as-a-Service' if $title eq 'Semantic Conventions for FaaS'; + $linkTitle = 'Tracing Compatibility' if $linkTitle eq 'Tracing Compatibility Components'; + if ($title =~ /Semantic Convention\b/) { + $title =~ s/Semantic Convention\b/$&s/g; + } } my $titleMaybeQuoted = ($title =~ ':') ? "\"$title\"" : $title; print "title: $titleMaybeQuoted\n" if $frontMatterFromFile !~ /title: /; - ($unused, $linkTitle) = $title =~ /^OpenTelemetry (Protocol )?(.*)/; + printf STDOUT ">1 $title -> $linkTitle\n" if $title =~ /Function/; + if ($title =~ /^OpenTelemetry (Protocol )?(.*)/) { + $linkTitle = $2; + } elsif ($title =~ /^(.*?) Semantic Conventions?$/i && !$linkTitle) { + $linkTitle = $1; + } elsif ($title =~ /^Semantic Conventions? for (.*)$/i && !$linkTitle) { + $linkTitle = $1; + } + if ($linkTitle =~ /^Function.as.a.Service$/i) { + $linkTitle = 'FaaS'; + } + printf STDOUT ">2 $title -> $linkTitle\n" if $title =~ /Function/; # TODO: add to front matter of OTel spec file and drop next line: $linkTitle = 'Design Goals' if $title eq 'Design Goals for OpenTelemetry Wire Protocol'; print "linkTitle: $linkTitle\n" if $linkTitle and $frontMatterFromFile !~ /linkTitle: /; @@ -61,6 +145,13 @@ () print "path_base_for_github_subdir:\n"; print " from: $path_base_for_github_subdir/otel/$1_index.md\n"; print " to: $1README.md\n"; + } elsif ($ARGV =~ /tmp\/semconv\/docs\/(.*?)_index.md$/) { + print "path_base_for_github_subdir:\n"; + print " from: $path_base_for_github_subdir/semconv/$1_index.md\n"; + print " to: $1README.md\n"; + if ($linkTitle eq 'General') { + print "weight: -1\n"; + } } print "---\n"; } @@ -84,6 +175,7 @@ () } if(! $title) { ($title) = /^#\s+(.*)/; + $linkTitle = ''; printTitleAndFrontMatter() if $title; next; } @@ -97,6 +189,16 @@ () next; } + ## Semconv + + if ($ARGV =~ /\/semconv/) { + s|(\]\()/docs/|$1$specBasePath/semconv/|g; + s|(\]:\s*)/docs/|$1$specBasePath/semconv/|; + + # TODO: drop once semconv pages are fixed: + s|(/resource/faas\.md)#function-as-a-service|$1|; + } + # SPECIFICATION custom processing s|\(https://github.com/open-telemetry/opentelemetry-specification\)|($specBasePath/otel/)|; diff --git a/scripts/content-modules/cp-pages.sh b/scripts/content-modules/cp-pages.sh index f9e3d2d970c3..a0f7486b961b 100755 --- a/scripts/content-modules/cp-pages.sh +++ b/scripts/content-modules/cp-pages.sh @@ -68,8 +68,8 @@ echo "COMMUNITY pages: copied and processed" ## Semantic Conventions -SRC=content-modules/semantic-conventions/specification -DEST=$DEST_BASE/semconv/specification +SRC=content-modules/semantic-conventions/docs +DEST=$DEST_BASE/semconv/docs rm -Rf $DEST mkdir -p $DEST diff --git a/scripts/content-modules/normalize-titles.pl b/scripts/content-modules/normalize-titles.pl new file mode 100755 index 000000000000..3229eaea5101 --- /dev/null +++ b/scripts/content-modules/normalize-titles.pl @@ -0,0 +1,180 @@ +#!/usr/bin/perl -w -i +# +# DRAFT script used to normalize semconv doc-page tiles and add Hugo front matter +# + +$^W = 1; + +use strict; +use warnings; +use diagnostics; + +my $file = ''; +my $frontMatterFromFile = ''; +my $title = ''; +my $linkTitle = ''; +my $gD = 0; +my $otelSpecRepoUrl = 'https://github.com/open-telemetry/opentelemetry-specification'; +my $otlpSpecRepoUrl = 'https://github.com/open-telemetry/opentelemetry-proto'; +my $opAmpSpecRepoUrl = 'https://github.com/open-telemetry/opamp-spec'; +my $semconvSpecRepoUrl = 'https://github.com/open-telemetry/semantic-conventions'; +my $semConvRef = "$otelSpecRepoUrl/blob/main/semantic_conventions/README.md"; +my $specBasePath = '/docs/specs'; +my $path_base_for_github_subdir = "content/en$specBasePath"; +my %versions = qw( + spec: 1.22.0 + otlp: 1.0.0 +); +my $otelSpecVers = $versions{'spec:'}; +my $otlpSpecVers = $versions{'otlp:'}; + +# TODO: remove once OpAMP spec has been updated +my $opampFrontMatter = << "EOS"; +title: Open Agent Management Protocol +linkTitle: OpAMP +body_class: otel-docs-spec +github_repo: &repo $opAmpSpecRepoUrl +github_project_repo: *repo +path_base_for_github_subdir: + from: content/en/docs/specs/opamp/index.md + to: specification.md +EOS + +# TODO: remove once Semconv spec has been updated +my $semconvFrontMatter = << "EOS"; +linkTitle: Semantic Conventions +# no_list: true +cascade: + body_class: otel-docs-spec + github_repo: &repo $semconvSpecRepoUrl + github_subdir: docs + path_base_for_github_subdir: content/en/docs/specs/semconv/ + github_project_repo: *repo +EOS + +# Adjust semconv title capitalization +sub toTitleCase($) { + my $str = shift; + my @specialCaseWords = qw( + CloudEvents + CouchDB + DynamoDB + FaaS + GraphQL + gRPC + HBase + MongoDB + OpenTelemetry + RabbitMQ + RocketMQ + ); + my %specialCases = map { lc($_) => $_ } @specialCaseWords; + while ($str =~ /(\b[A-Z]+\b)/g) { + $specialCases{lc $1} = $1; + } + $str =~ s/(\w+)/\u\L$1/g; + while (my ($key, $value) = each %specialCases) { + $str =~ s/\b\u\L$key\b/$value/g; + } + $str =~ s/\b(A|And|As|For|In|On)\b/\L$1/g; + return $str; +} + +sub printTitleAndFrontMatter() { + my $frontMatter = ''; + my $originalTitle = $title; + if ($frontMatterFromFile) { + # printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD; + $frontMatterFromFile = '' unless $ARGV =~ /\/system\/[^R]/; + # printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/; + } + if ($title eq 'OpenTelemetry Semantic Conventions') { + $frontMatterFromFile = $semconvFrontMatter unless $frontMatterFromFile; + } elsif ($ARGV =~ /json-rpc/) { + $title = 'Semantic Conventions for JSON-RPC'; + } + $title = toTitleCase($title); + my $titleMaybeQuoted = ($title =~ ':') ? "\"$title\"" : $title; + # $frontMatter .= "title: $titleMaybeQuoted\n" if $frontMatterFromFile !~ /title: /; + if ($title =~ /^OpenTelemetry (Protocol )?(.*)/) { + $linkTitle = $2; + } elsif ($title =~ /^(.*?) Semantic Conventions?$/i) { + $linkTitle = toTitleCase($1); + } elsif ($title =~ /^Semantic Conventions? for (.*)$/i) { + $linkTitle = toTitleCase($1); + } + if ($linkTitle =~ /^Function.as.a.Service$/i) { + $linkTitle = 'FaaS'; + } + $linkTitle = 'Database' if $title =~ /Database Calls and Systems$/i; + if ($linkTitle =~ /^Database (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^FaaS (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^HTTP (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^Microsoft (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^RPC (.*)$/i) { + $linkTitle = "$1"; + } elsif ($linkTitle =~ /^(Exceptions|Feature Flags) .. (.*)$/i) { + $linkTitle = "$2"; + } + if ($linkTitle =~ /^(.*) Attributes$/i && $title ne 'General Attributes') { + $linkTitle = "$1"; + } + $linkTitle = 'Attributes' if $title eq 'General Attributes'; + $linkTitle = 'Events' if $linkTitle eq 'Event'; + $linkTitle = 'Logs' if $title =~ /Logs Attributes$/; + $linkTitle = 'Connect' if $title =~ /Connect RPC$/; + $linkTitle = 'SQL' if $title =~ /SQL Databases$/; + $title = 'Semantic Conventions for Function-as-a-Service' if $title eq 'Semantic Conventions for FaaS'; + $linkTitle = 'Tracing Compatibility' if $linkTitle eq 'Tracing Compatibility Components'; + if ($title =~ /Semantic Convention\b/) { + $title =~ s/Semantic Convention\b/$&s/g; + printf STDOUT "> $title -> $linkTitle\n"; + } + + $frontMatter .= "linkTitle: $linkTitle\n" if $linkTitle and $frontMatterFromFile !~ /linkTitle: /; + $frontMatter .= $frontMatterFromFile if $frontMatterFromFile; + if ($ARGV =~ /docs\/(.*?)README.md$/) { + $frontMatter .= "path_base_for_github_subdir:\n"; + $frontMatter .= " from: $path_base_for_github_subdir/semconv/$1_index.md\n"; + $frontMatter .= " to: $1README.md\n"; + } + $frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions'; + if ($frontMatter) { + $frontMatter = "\n"; + print "$frontMatter\n"; + } + print "# $title\n" +} + +# main + +while(<>) { + # printf STDOUT "$ARGV Got: $_" if $gD; + + if ($file ne $ARGV) { + $file = $ARGV; + # printf STDOUT "> $file\n"; # if $gD; + $frontMatterFromFile = ''; + $title = ''; + if (/^/; + $frontMatterFromFile .= $_; + } + next; + } + } + if(! $title) { + ($title) = /^#\s+(.*)/; + $linkTitle = ''; + printTitleAndFrontMatter() if $title; + next; + } + + print; +} diff --git a/static/refcache.json b/static/refcache.json index 0ac7a574ffc9..2424d677c7be 100644 --- a/static/refcache.json +++ b/static/refcache.json @@ -123,6 +123,10 @@ "StatusCode": 200, "LastSeen": "2023-06-29T12:28:24.723237-04:00" }, + "https://aws.amazon.com/dynamodb/": { + "StatusCode": 200, + "LastSeen": "2023-07-06T17:57:47.141869-04:00" + }, "https://aws.amazon.com/ecs/": { "StatusCode": 200, "LastSeen": "2023-06-29T13:39:56.688276-04:00" @@ -147,6 +151,10 @@ "StatusCode": 206, "LastSeen": "2023-06-29T12:28:09.411462-04:00" }, + "https://azure.microsoft.com/products/cosmos-db/": { + "StatusCode": 200, + "LastSeen": "2023-07-06T17:58:02.342209-04:00" + }, "https://bazel.build/": { "StatusCode": 200, "LastSeen": "2023-06-29T13:39:27.033525-04:00" @@ -163,6 +171,10 @@ "StatusCode": 200, "LastSeen": "2023-06-29T15:55:02.67164-04:00" }, + "https://cassandra.apache.org/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:07.715816-04:00" + }, "https://cdn.jsdelivr.net/npm/mermaid@9.3.0/dist/mermaid.min.js": { "StatusCode": 206, "LastSeen": "2023-06-30T09:16:02.918744-04:00" @@ -411,6 +423,10 @@ "StatusCode": 206, "LastSeen": "2023-06-29T13:38:26.570786-04:00" }, + "https://couchdb.apache.org/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:57:36.620937-04:00" + }, "https://danielabaron.me": { "StatusCode": 206, "LastSeen": "2023-06-29T16:16:03.628698-04:00" @@ -751,6 +767,10 @@ "StatusCode": 206, "LastSeen": "2023-06-29T16:16:24.371007-04:00" }, + "https://docs.docker.com/engine/api/v1.43/#tag/Container/operation/ContainerInspect": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:46.994822-04:00" + }, "https://docs.docker.com/engine/reference/run/#container-identification": { "StatusCode": 206, "LastSeen": "2023-06-29T16:07:35.082785-04:00" @@ -2815,6 +2835,10 @@ "StatusCode": 200, "LastSeen": "2023-06-30T11:43:23.38558-04:00" }, + "https://github.com/open-telemetry/semantic-conventions/issues/new": { + "StatusCode": 200, + "LastSeen": "2023-07-06T18:29:34.673526-04:00" + }, "https://github.com/opentracing": { "StatusCode": 200, "LastSeen": "2023-06-30T08:40:39.909083-04:00" @@ -3087,6 +3111,10 @@ "StatusCode": 206, "LastSeen": "2023-06-29T16:15:53.26896-04:00" }, + "https://hbase.apache.org/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:13.148749-04:00" + }, "https://helm.sh": { "StatusCode": 206, "LastSeen": "2023-06-29T16:04:48.516571-04:00" @@ -3911,6 +3939,10 @@ "StatusCode": 206, "LastSeen": "2023-06-29T18:38:09.870827-04:00" }, + "https://raw.githubusercontent.com/elastic/elasticsearch-specification/main/output/schema/schema.json": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:33.547228-04:00" + }, "https://reactivex.io/RxJava/2.x/javadoc/index.html": { "StatusCode": 206, "LastSeen": "2023-06-29T18:48:07.197849-04:00" @@ -3919,6 +3951,10 @@ "StatusCode": 200, "LastSeen": "2023-06-30T16:26:43.920023-04:00" }, + "https://redis.com/": { + "StatusCode": 200, + "LastSeen": "2023-07-06T17:57:41.927739-04:00" + }, "https://redis.io/commands/hmset": { "StatusCode": 206, "LastSeen": "2023-06-29T18:39:33.193535-04:00" @@ -3935,6 +3971,10 @@ "StatusCode": 206, "LastSeen": "2023-06-30T09:40:22.564248-04:00" }, + "https://rocketmq.apache.org/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:59.087632-04:00" + }, "https://rubiksqube.com/#/": { "StatusCode": 206, "LastSeen": "2023-06-29T18:50:46.242437-04:00" @@ -4367,6 +4407,10 @@ "StatusCode": 206, "LastSeen": "2023-06-30T09:33:47.364453-04:00" }, + "https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:38.92195-04:00" + }, "https://www.envoyproxy.io": { "StatusCode": 206, "LastSeen": "2023-06-30T08:51:02.248507-04:00" @@ -4603,6 +4647,10 @@ "StatusCode": 206, "LastSeen": "2023-06-30T16:26:05.762975-04:00" }, + "https://www.mongodb.com/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:57:52.359895-04:00" + }, "https://www.nomadproject.io": { "StatusCode": 206, "LastSeen": "2023-06-30T08:50:51.721695-04:00" @@ -4755,6 +4803,10 @@ "StatusCode": 206, "LastSeen": "2023-06-30T08:38:26.348608-04:00" }, + "https://www.rabbitmq.com/": { + "StatusCode": 206, + "LastSeen": "2023-07-06T17:58:53.682551-04:00" + }, "https://www.reactive-streams.org/reactive-streams-1.0.1-javadoc/org/reactivestreams/Publisher.html": { "StatusCode": 206, "LastSeen": "2023-06-29T18:47:51.093028-04:00"