Skip to content

Commit

Permalink
[infra] Move en words into separate word list, set up lists for es an…
Browse files Browse the repository at this point in the history
…d pt (#5011)
  • Loading branch information
chalin authored Aug 9, 2024
1 parent 3c3342e commit be54d6b
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 150 deletions.
159 changes: 21 additions & 138 deletions .cspell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ ignorePaths:
- vendors.yaml
- content/ja
- content/zh
# words here are only listed for their spelling, if there is a certain way
# to write a word (e.g. OpenTelemetry vs Opentelemetry or cloud native vs
# cloud-native), edit the text-lint rules in .textlintrc.yml
patterns:
- name: CodeBlock
pattern: |
Expand All @@ -28,140 +25,26 @@ languageSettings:
- languageId: markdown
ignoreRegExpList:
- CodeBlock
dictionaryDefinitions:
- name: en-words
path: .cspell/en-words.txt
- name: es-palabras
path: .cspell/es-palabras.txt
- name: pt-palavras
path: .cspell/pt-palavras.txt
dictionaries:
[pt-br, es-es, companies, cpp, dotnet, golang, node, softwareTerms]
words:
- accountingservice
- actix
- adservice
- alibaba
- Alolita
- APAC
- appdynamics
- appender
- appenders
- aspecto
- autoconfiguration
- autoinstrumentation
- autoloaded
- autoloader
- autoloading
- backoff
- caml
- cartservice
- cassandra
- checkoutservice
- Chronosphere
- classpath
- cncf
- currencyservice
- daemonset
- datadog
- discoverability
# Natural languages
- es-es
- pt-br
# Local word lists
- en-words
- es-palabras
- pt-palavras
# Programming languages and software terms
- cpp
- dotnet
- Dyla
- dynatrace
- emailservice
- EMEA
- erlang
- errorf
- featureflagservice
- frauddetectionservice
- frontendproxy
- github
- gitpod
- grafana
- Hausenblas
- hugo
- initializers
- instana
- istio
- jaeger
- jaegertracing
- javaagent
- javadoc
- jboss
- jdbc
- julia
- Juraci
- knative
- kotlin
- Kröhling
- kubecon
- kubernetes
- laravel
- lifecycles
- lightstep
- Loffay
- Mancuso
- microservices
- mongodb
- ndjson
- Neumann
- nginx
- ocaml
- opamp
- opencensus
- opensearch
- opentelemetry
- opentracing
- openzipkin
- OSTIF
- otel
- otel-comms
- otel-endusers
- otelcol
- otep
- otlp
- packagist
- pageinfo
- Paixão
- parentbased
- Pavol
- paymentservice
- postgresql
- Pranay
- Prateek
- prepper
- productcatalogservice
- prometheus
- proto
- protobuf
- quantile
- quantiles
- quarkus
- quoteservice
- recommendationservice
- redis
- relref
- Rexed
- Rynn
- semconv
- servlet
- Severin
- Sharma
- shippingservice
- Socha
- Stalnaker
- stdoutmetric
- Strimzi
- symfony
- tabpane
- textlint
- thanos
- tocstop
- tracecontext
- traceidratio
- traceloop
- traceparent
- traefik
- Trask
- uids
- unsampled
- unshallow
- upstreamed
- Villela
- wordpress
- WSGI
- zend
- zipkin
- golang
- node
- softwareTerms
# Other
- companies
138 changes: 138 additions & 0 deletions .cspell/en-words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Words listed here are only for their spelling. If there is a certain way to
# capitalize the word, add capitalization rules to text-lint rules in
# .textlintrc.yml

accountingservice
actix
adservice
alibaba
Alolita
APAC
appdynamics
appender
appenders
aspecto
autoconfiguration
autoinstrumentation
autoloaded
autoloader
autoloading
backoff
caml
cartservice
cassandra
checkoutservice
Chronosphere
classpath
cncf
currencyservice
daemonset
datadog
discoverability
dotnet
Dyla
dynatrace
emailservice
EMEA
erlang
errorf
featureflagservice
frauddetectionservice
frontendproxy
github
gitpod
grafana
Hausenblas
hugo
initializers
instana
istio
jaeger
jaegertracing
javaagent
javadoc
jboss
jdbc
julia
Juraci
knative
kotlin
Kröhling
kubecon
kubernetes
laravel
lifecycles
lightstep
Loffay
Mancuso
microservices
mongodb
ndjson
Neumann
nginx
ocaml
opamp
opencensus
opensearch
opentelemetry
opentracing
openzipkin
OSTIF
otel
otel-comms
otel-endusers
otelcol
otep
otlp
packagist
pageinfo
Paixão
parentbased
Pavol
paymentservice
postgresql
Pranay
Prateek
prepper
productcatalogservice
prometheus
proto
protobuf
quantile
quantiles
quarkus
quoteservice
recommendationservice
redis
relref
Rexed
Rynn
semconv
servlet
Severin
Sharma
shippingservice
Socha
Stalnaker
stdoutmetric
Strimzi
symfony
tabpane
textlint
thanos
tocstop
tracecontext
traceidratio
traceloop
traceparent
traefik
Trask
uids
unsampled
unshallow
upstreamed
Villela
wordpress
WSGI
zend
zipkin
2 changes: 2 additions & 0 deletions .cspell/es-palabras.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
observabilidad
telemetría
Empty file added .cspell/pt-palavras.txt
Empty file.
1 change: 0 additions & 1 deletion content/es/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ developer_note:
de imagen que contenga la palabra "background" en su nombre.
show_banner: true
default_lang_commit: 7ac35d6b429165bbe6c28bdd91feeae83fd35142
cSpell:ignore: observabilidad telemetría
---

<div class="d-none"><a rel="me" href="https://fosstodon.org/@opentelemetry"></a></div>
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"diff:check": "npm run _diff:check || (echo; echo 'WARNING: the files above have not been committed'; echo)",
"diff:fail": "npm run _diff:check || (echo; echo 'ERROR: the files above have changed. Locally rerun `npm run test-and-fix` and commit changes'; echo; exit 1)",
"fix:all": "npm run seq -- $(npm -s run _list:fix:*)",
"fix:dict": "find content layouts -name \"*.md\" -print0 | xargs -0 scripts/normalize-cspell-front-matter.pl",
"fix:dict": "find content/en layouts -name \"*.md\" -print0 | xargs -0 scripts/normalize-cspell-front-matter.pl",
"fix:filenames": "npm run _rename-to-kebab-case",
"fix:format": "npm run format",
"fix:i18n:all": "scripts/check-i18n.sh -a -c HEAD",
Expand Down
27 changes: 17 additions & 10 deletions scripts/normalize-cspell-front-matter.pl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
my $lineLenLimit = 79;
my $last_file = '';
my $last_line = '';
my %dictionary = getSiteWideDictWords('.cspell.yml', '.textlintrc.yml');
my %dictionary = getSiteWideDictWords('.cspell/en-words.txt', '.textlintrc.yml');

while (<>) {
if (/^\s*(spelling: |-\s*)?cSpell:ignore:?\s*(.*)$/
Expand Down Expand Up @@ -43,15 +43,16 @@ sub getSiteWideDictWords {
my $dictionary_file = shift;
my $textlintrc_file = shift;

my %dictionary = readYmlListOfWords('words', $dictionary_file);
my %textlintDictionary = readYmlListOfWords('terms', $textlintrc_file);
my %dictionary = readYmOrPlainlListOfWords('', $dictionary_file);
my %textlintDictionary = readYmOrPlainlListOfWords('terms', $textlintrc_file);
# Merge dictionaries
@dictionary{keys %textlintDictionary} = values %textlintDictionary;

return %dictionary;
}

sub readYmlListOfWords {
sub readYmOrPlainlListOfWords {
# Read plain list of words if $wordsFieldName is empty
my $wordsFieldName = shift;
my $file_path = shift;
my $fh = FileHandle->new($file_path, "r") or die "Could not open file '$file_path': $!";
Expand All @@ -60,18 +61,24 @@ sub readYmlListOfWords {

my %dictionary;
my $indentation = '';
my $in_terms = 0;
my $in_terms = $wordsFieldName eq '' ? 1 : 0;
foreach my $line (@lines) {
chomp $line;
if ($line =~ /^(\s*)$wordsFieldName:/) {
$indentation = $1 || '';
next if $line =~ /^\s*#|^\s*$/;
# print "> $line\n" if $wordsFieldName;

if ($wordsFieldName && $line =~ /^(\s*)$wordsFieldName:/) {
$indentation = "$1 - " || '';
$in_terms = 1;
# print STDOUT "Found terms!";
} elsif ($line =~ /^$indentation - (\w[^\s]*)$/ && $in_terms) {
# print "> FOUND $wordsFieldName keyword\n"
} elsif ($line =~ /^$indentation(\w[^\s]*)$/ && $in_terms) {
my $term = $1;
$dictionary{$term} = 1 if $term;
} elsif ($line !~ /^ / && $in_terms) {
} elsif ($wordsFieldName && $line !~ /^ / && $in_terms) {
$in_terms = 0;
# print "FINISHE word list\n" if $in_terms;
} else {
# print "OOPS LINE DID NOT MATCH\n" if $in_terms;
}
}

Expand Down

0 comments on commit be54d6b

Please sign in to comment.