diff --git a/.github/workflows/check-i18n.yml b/.github/workflows/check-i18n.yml index 88d8f3033ab0..6595288da458 100644 --- a/.github/workflows/check-i18n.yml +++ b/.github/workflows/check-i18n.yml @@ -4,12 +4,21 @@ on: pull_request: jobs: - i18n-check: + check-i18n: name: I18N check runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: + ref: main fetch-depth: 0 # all - - run: scripts/i18n-check.sh -u - - run: .github/workflows/scripts/i18n-check-helper.sh + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 # all + # Only check for missing hashes in new files + - name: Any files missing hash key? + run: scripts/check-i18n.sh -n -x + - name: Any files with invalid hash keys? + run: scripts/check-i18n.sh + - run: .github/workflows/scripts/check-i18n-helper.sh diff --git a/.github/workflows/scripts/i18n-check-helper.sh b/.github/workflows/scripts/check-i18n-helper.sh similarity index 57% rename from .github/workflows/scripts/i18n-check-helper.sh rename to .github/workflows/scripts/check-i18n-helper.sh index 8887a0dcf45e..f83f3825279b 100755 --- a/.github/workflows/scripts/i18n-check-helper.sh +++ b/.github/workflows/scripts/check-i18n-helper.sh @@ -10,12 +10,12 @@ if [[ -z $CHANGES ]]; then fi cat </page.md`, this translation is based on a specific [`main` branch commit][main] of the corresponding English language version of the page at -`content/en//page.md`. Every localized page identifies this commit in -the page's front matter as follows: +`content/en//page.md`. In this repository, every localized page +identifies the English page commit in the localized page's front matter as +follows: ```markdown --- title: Your localized page title ... -## default_lang_commit: +default_lang_commit: ``` The front matter above would be in `content/zh//page.md`. The commit -corresponds to the latest commit of `content/en//page.md` in `main`. +would correspond to the latest commit of `content/en//page.md` in +`main`. ### Tracking changes to English pages @@ -49,25 +51,26 @@ corresponding localized pages that need updating by running the following command: ```console -$ scripts/i18n-check.sh +$ npm run check:i18n 1 1 content/en/docs/kubernetes/_index.md - content/zh/docs/kubernetes/_index.md ... ``` -Specify the path to your localization to restrict the output, for example: +You can restrict the target pages to one or more localizations by providing +path(s) like this: ```sh -scripts/i18n-check.sh content/zh +npm run check:i18n -- content/zh ``` ### Viewing change details For any given localized pages that need updating, you can see the diff details of the corresponding English language pages by using the `-d` flag and providing -the paths to your localized pages. For example: +the paths to your localized pages, or omit the paths to see all. For example: ```console -$ scripts/i18n-check.sh -d content/zh/docs/kubernetes +$ npm run check:i18n -- -d content/zh/docs/kubernetes diff --git a/content/en/docs/kubernetes/_index.md b/content/en/docs/kubernetes/_index.md index 3592df5d..c7980653 100644 --- a/content/en/docs/kubernetes/_index.md @@ -87,12 +90,20 @@ index 3592df5d..c7980653 100644 As you create pages for your localization, remember to add `default_lang_commit` to the page front matter along with an appropriate commit hash from `main`. -If your translation is based on an English page in `main` at `HEAD`, then run -the following command to automatically add `default_lang_commit` to your page -file's front matter using the commit hash at `HEAD`: +If your page translation is based on an English page in `main` at ``, then +run the following command to automatically add `default_lang_commit` to your +page file's front matter using the commit ``. You can specify `HEAD` as an +argument if your pages are now synced with `main` at `HEAD`. For example: ```sh -scripts/i18n-check.sh -u +npm run check:i18n -- -n -c 1ca30b4d content/ja +npm run check:i18n -- -n -c HEAD content/zh/docs/concepts +``` + +To list localization page files with missing hash keys, run: + +```sh +npm run check:i18n -- -n ``` ### Updating `default_lang_commit` for existing pages @@ -105,11 +116,32 @@ commit hash. If your localized page now corresponds to the English language version in `main` at `HEAD`, then erase the commit hash value in the front matter, and run the -update command given in the previous section to automatically refresh the +**add** command given in the previous section to automatically refresh the `default_lang_commit` field value. {{% /alert %}} +If you have batch updated all of your localization pages that had drifted, you +can update the commit hash of these files using the `-u` flag followed by a +commit hash or 'HEAD' to use `main@HEAD`. + +```sh +npm run check:i18n -- -c +npm run check:i18n -- -c HEAD +``` + +{{% alert title="Important" %}} + +When you use `HEAD` as a hash specifier, the script will use the hash of `main` +at HEAD in your **local environment**. Make sure that you fetch and pull `main`, +if you want HEAD to correspond to `main` in GitHub. + +{{% /alert %}} + +### Script help + +For more details about the script, run `npm run check:i18n -- -h`. + ## New localizations (Section To Be Completed soon with information about how to propose a new diff --git a/package.json b/package.json index 2fb33e4a9a0f..e07bc6d524f1 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "cd:public": "cd public &&", "check:filenames": "test -z \"$(npm run -s _ls-bad-filenames)\" || npm run -s _filename-error", "check:format": "npm run _check:format && npm run _check:format:ja+zh || (echo '[help] Run: npm run fix:format'; exit 1)", + "check:i18n": "scripts/check-i18n.sh", "check:links:internal": "npm run _check:links:internal", "check:links": "npm run _check:links", "check:markdown": "scripts/check-markdown-wrapper.sh", @@ -58,7 +59,10 @@ "fix:dict": "find content layouts -name \"*.md\" -print0 | xargs -0 scripts/normalize-cspell-front-matter.pl", "fix:filenames": "npm run _rename-to-kebab-case", "fix:format": "npm run format", - "fix:i18n": "scripts/i18n-check.sh -u", + "fix:i18n:all": "scripts/check-i18n.sh -a -c HEAD", + "fix:i18n:drifted": "scripts/check-i18n.sh -c HEAD", + "fix:i18n:new": "scripts/check-i18n.sh -n -c HEAD", + "fix:i18n": "npm run fix:i18n:new", "fix:markdown": "npm run check:markdown -- --fix", "fix:refcache": "npm run check:links", "fix:submodules": "npm run _sync", diff --git a/scripts/check-i18n.sh b/scripts/check-i18n.sh new file mode 100755 index 000000000000..53bc7fa842ee --- /dev/null +++ b/scripts/check-i18n.sh @@ -0,0 +1,316 @@ +#!/bin/bash +# +# Inspired by https://github.com/kubernetes/website/blob/main/scripts/lsync.sh + +COMMIT_HASH_ARG="" +DEFAULT_LANG="en" +DEFAULT_TARGET="content" +EXIT_STATUS=0 +EXTRA_DIFF_ARGS="--numstat" +FLAG_DIFF_DETAILS="" +FLAG_FAIL_ON_LIST_OR_MISSING=0 +FLAG_INFO="" +FLAG_QUIET="" +FLAG_VERBOSE="" +I18N_DLC_KEY="default_lang_commit" +LIST_KIND="DRIFTED" # or "ALL" or "NEW" +TARGET_PATHS="" + + +function _usage() { + cat <&2 + exit $status +} + +function process_CLI_args() { + while getopts ":ac:dhinqvx" opt; do + case $opt in + a) + LIST_KIND="ALL";; + c) + COMMIT_HASH_ARG="$OPTARG";; + d) + FLAG_DIFF_DETAILS=1 + EXTRA_DIFF_ARGS="";; + h) + usage;; + i) + FLAG_INFO=1;; + n) + LIST_KIND="NEW";; + q) + FLAG_QUIET=1;; + v) + FLAG_VERBOSE=1;; + x) + FLAG_FAIL_ON_LIST_OR_MISSING=1;; + \?) + echo -e "ERROR: invalid option: -$OPTARG\n" >&2 + usage 1;; + :) + echo -e "ERROR: option -$OPTARG requires an argument.\n" >&2 + usage 1;; + esac + done + + if [[ -n $COMMIT_HASH_ARG ]]; then + COMMIT_HASH_ARG=$(echo $COMMIT_HASH_ARG | tr '[:upper:]' '[:lower:]') + validate_hash $COMMIT_HASH_ARG + + if [[ -n $FLAG_DIFF_DETAILS ]]; then + echo -e "ERROR: you can't use -c and -d at the same time, specify one or the other.\n" + usage 1 + fi + fi + + if [[ -n $FLAG_QUIET && -n $FLAG_DIFF_DETAILS ]]; then + echo -e "ERROR: you can't use -d and -q at the same time, specify one or the other.\n" + usage 1 + fi + + if [[ $LIST_KIND == "ALL" && -n $COMMIT_HASH_ARG ]]; then + read -p "CAUTION! Set hash for all targets? (y/n): " response + if [[ ! $response =~ ^[Yy] ]]; then + echo "Aborting" + exit 1 + fi + fi + + shift $((OPTIND-1)) + TARGET_PATHS="$@" + + if [[ -z "$TARGET_PATHS" ]]; then + TARGET_PATHS="$DEFAULT_TARGET" + # [[ -n $FLAG_VERBOSE ]] || echo "INFO: using default target path: $TARGET_PATHS" + fi + if [[ -z $FLAG_QUIET ]]; then + echo "Processing paths: $TARGET_PATHS" + fi + + if [[ -f "TARGET_PATHS" && ! -e "$TARGET_PATHS" ]] ; then + echo -e "ERROR: path not found: '$TARGET_PATHS'\n" >&2 + exit 2 + fi +} + +validate_hash() { + local hash=$1 + + if [[ $hash =~ ^\s*$ ]]; then + echo -e "ERROR: empty hash argument.\n" >&2 + exit 1 + fi + + if [[ $hash == "head" ]]; then return; fi + + if ! [[ $hash =~ ^[0-9a-fA-F]{7,40}(\+[0-9]+)?$ ]]; then + echo -e "ERROR: invalid hash '$hash'\n" >&2 + usage 1 + fi +} + +BRANCH_MAIN_HASH="" # commit at which this branch joins `main` +MAIN_HEAD_HASH="" # commit of `main` at HEAD + +function get_and_print_hashes_of_main() { + BRANCH_MAIN_HASH=$(git merge-base main HEAD) + MAIN_HEAD_HASH=$(git rev-parse main) + + if [[ -z $FLAG_INFO ]]; then return; fi + echo "$BRANCH_MAIN_HASH - hash at which current branch joins 'main'" + echo "$MAIN_HEAD_HASH - hash of 'main' at HEAD" +} + +function set_file_i18n_hash() { + # Arguments: [] + # + # Sets the front matter field $I18N_DLC_KEY to , + # or adds the key if missing. + + local f="$1" + local HASH="$2" + local pre_msg="${3:--\t-}" + local post_msg="${4:-key set}" + + if grep -q "^$I18N_DLC_KEY:" "$f"; then + perl -i -pe "s/(^$I18N_DLC_KEY):.*$/\$1: $HASH/" "$f" + else + perl -i -0777 -pe "s/^(---.*?)(\n---\n)/\$1\n$I18N_DLC_KEY: $HASH\$2/sm" "$f" + fi + if [[ -z $FLAG_QUIET ]]; then + echo -e "$pre_msg\t$f $HASH $post_msg" + fi +} + +function update_file_i18n_hash() { + local f="$1" + local HASH="$2" + local pre_msg="$3" + local post_msg="${4:- UPDATED key}" + + if [[ -z $HASH ]]; then + echo "INTERNAL ERROR: update_file_i18n_hash: hash should not be empty - $f $msg" + exit 1 + fi + + # if ! git branch --contains $HASH | grep -q "^\s*main\b"; then + # HASH=$MAIN_HEAD_HASH + # echo "WARNING: the given hash is not on 'main', using this instead: $HASH" >&2 + # fi + + if ! (git branch --contains $HASH | grep -q "^\s*main\b"); then + echo "ERROR: hash is empty or isn't on 'main', aborting: $HASH - $f" >&2 + exit 1 + fi + + set_file_i18n_hash "$f" "$HASH" "$msg" $pre_msg $post_msg +} + +function main() { + process_CLI_args "$@" + + get_and_print_hashes_of_main + if [[ -n $FLAG_INFO ]]; then return; fi + + if [ -f "$TARGET_PATHS" ] ; then + TARGETS="$TARGET_PATHS" + else + # TODO: better handle errors reported by find? + TARGETS=$(find $TARGET_PATHS -name "*.md" -not -path "*/$DEFAULT_LANG/*") + if [[ -z "$TARGETS" ]]; then + echo "ERROR: target directory contains no markdown files: '$TARGET_PATHS'" >&2 + exit 1 + fi + # if [[ -n $FLAG_VERBOSE ]]; then echo -e "All targets: $TARGETS"; fi + fi + + # set -x + # git branch -vv + + local LASTCOMMIT_FF="" # commit From File (FF), i.e., $f in the loop below + local LASTCOMMIT_GIT="" # last commit of `en` version of $f from git + local FILE_COUNT=0 # Number of TLP + local FILE_PROCESSED_COUNT=0 # Number of TLP actually listed + + if [[ $COMMIT_HASH_ARG == "head" ]]; then + COMMIT_HASH_ARG=$MAIN_HEAD_HASH + fi + + for f in $TARGETS; do + ((FILE_COUNT++)) + local LIST=0 + + LASTCOMMIT_FF=$(perl -ne "print \"\$1\" if /^$I18N_DLC_KEY:\\s*(.*)/" "$f") + LASTCOMMIT="$LASTCOMMIT_FF" + + if [[ $LIST_KIND == "ALL" && -n $COMMIT_HASH_ARG ]]; then + ((FILE_PROCESSED_COUNT++)) + set_file_i18n_hash "$f" "$COMMIT_HASH_ARG" + continue + fi + + if [[ $LIST_KIND == "NEW" ]]; then + if [[ -n $LASTCOMMIT_FF ]]; then continue; fi + ((FILE_PROCESSED_COUNT++)) + if [[ -n $COMMIT_HASH_ARG ]]; then + set_file_i18n_hash "$f" "$COMMIT_HASH_ARG" "" "key ADDED" + elif [[ -z $FLAG_QUIET ]]; then + echo "$f - has no $I18N_DLC_KEY front-matter key" + fi + continue + fi + + ## Processing $LIST_KIND DRIFTED + + # Does $f have an default-language version? + EN_VERSION=$(echo "$f" | sed "s/content\/.\{2,5\}\//content\/en\//g") + if [[ ! -e "$EN_VERSION" ]]; then + ((FILE_PROCESSED_COUNT++)) + echo -e "File not found\t$EN_VERSION - $f - $DEFAULT_LANG was removed or renamed" + continue + fi + + # Check default-language version for changes + DIFF=$(git diff --exit-code $EXTRA_DIFF_ARGS $LASTCOMMIT...HEAD "$EN_VERSION" 2>&1) + DIFF_STATUS=$? + if [ $DIFF_STATUS -gt 1 ]; then + ((FILE_PROCESSED_COUNT++)) + EXIT_STATUS=$DIFF_STATUS + # if [[ -z $FLAG_QUIET ]]; then + echo -e "HASH\tERROR\t$f: invalid hash or 'git diff' error (status: $DIFF_STATUS). For details, use -v." + # fi + if [[ -n $FLAG_VERBOSE ]]; then echo "$DIFF"; fi + continue + elif [[ -n "$DIFF" ]]; then + ((FILE_PROCESSED_COUNT++)) + if [[ -n $FLAG_DIFF_DETAILS ]]; then + echo -n "$DIFF" + elif [[ -n $COMMIT_HASH_ARG ]]; then + update_file_i18n_hash "$f" "$COMMIT_HASH_ARG" "$DIFF" + elif [[ -z $FLAG_QUIET ]]; then + echo "$DIFF - $f" + fi + elif [[ -z $LASTCOMMIT ]]; then + ((FILE_PROCESSED_COUNT++)) + local msg="New i18n file" + if [[ -n $COMMIT_HASH_ARG ]]; then + set_file_i18n_hash "$f" "$COMMIT_HASH_ARG" "$msg" "key ADDED" + elif [[ -z $FLAG_QUIET ]]; then + echo "$msg - $f" + fi + elif [[ $LIST_KIND == "ALL" || -n $FLAG_VERBOSE ]]; then + ((FILE_PROCESSED_COUNT++)) + echo -e "File is in sync\t$f - $LASTCOMMIT" + fi + done + + if [[ -z $FLAG_QUIET ]]; then + echo "$LIST_KIND files: $FILE_PROCESSED_COUNT out of $FILE_COUNT" + fi + + if [[ $FILE_PROCESSED_COUNT -gt 0 && -z $COMMIT_HASH_ARG ]]; then + EXIT_STATUS=$((EXIT_STATUS || FLAG_FAIL_ON_LIST_OR_MISSING)) + fi + exit $EXIT_STATUS +} + +main "$@" diff --git a/scripts/i18n-check.sh b/scripts/i18n-check.sh deleted file mode 100755 index 6cf0d3f2ff10..000000000000 --- a/scripts/i18n-check.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/bash -# -# Inspired by https://github.com/kubernetes/website/blob/main/scripts/lsync.sh - -DEFAULT_LANG="en" -DEFAULT_TARGET="content" -EXTRA_DIFF_ARGS="--numstat" -FLAG_DIFF_DETAILS="" -FLAG_UPDATE="" -FLAG_VERBOSE="" -I18N_DLC_KEY="default_lang_commit" -TARGET_PATHS="" - -function _usage() { - cat <&2 - exit $status -} - -function process_CLI_args() { - while getopts ":hduv" opt; do - case $opt in - h) - usage - ;; - d) - FLAG_DIFF_DETAILS=1 - EXTRA_DIFF_ARGS="" - ;; - u) - FLAG_UPDATE=1 - ;; - v) - FLAG_VERBOSE=1 - ;; - \?) - echo "ERROR: unrecognized flag: -$OPTARG" - usage 1 - ;; - esac - done - - shift $((OPTIND-1)) - TARGET_PATHS="$@" - - if [[ -z "$TARGET_PATHS" ]]; then - TARGET_PATHS="$DEFAULT_TARGET" - if [[ -n $FLAG_VERBOSE ]]; then echo "INFO: using default target path: $TARGET_PATHS"; fi - fi - - if [[ -f "TARGET_PATHS" && ! -e "$TARGET_PATHS" ]] ; then - echo "Path not found: '$TARGET_PATHS'" >&2 - exit 2 - fi -} - -function update_i18n_hash() { - # Usage: update_i18n_hash - # - # Adds to or updates the file's front matter's field - # $I18N_DLC_KEY with value . - - local LASTCOMMIT="$1" - local f="$2" - - if grep -q "^$I18N_DLC_KEY:" "$f"; then - perl -i -pe "s/(^$I18N_DLC_KEY):.*/\$1: $LASTCOMMIT/" "$f" - else - perl -i -0777 -pe "s/^(---.*?)(\n---\n)/\$1\n$I18N_DLC_KEY: $LASTCOMMIT\$2/sm" "$f" - fi - if [[ -n $FLAG_VERBOSE ]]; then - echo -e "i18n commit ID\t$f $LASTCOMMIT - updated" - fi -} - -function main() { - process_CLI_args "$@" - - if [ -f "$TARGET_PATHS" ] ; then - TARGETS="$TARGET_PATHS" - else - TARGETS=$(find $TARGET_PATHS -name "*.md" -not -path "*/$DEFAULT_LANG/*") - if [[ -z "$TARGETS" ]]; then - echo "ERROR: target directory contains no markdown files: '$TARGET_PATHS'" >&2 - exit 1 - fi - # if [[ -n $FLAG_VERBOSE ]]; then echo -e "All targets: $TARGETS"; fi - fi - - # set -x - # git branch -vv - - SYNCED=1 - for f in $TARGETS; do - # if [[ -n $FLAG_VERBOSE ]]; then echo -e "Checking\t$f"; fi - EN_VERSION=$(echo "$f" | sed "s/content\/.\{2,5\}\//content\/en\//g") - - # Try to get commit ref from file front matter - LASTCOMMIT=$(perl -ne "print \"\$1\" if /^$I18N_DLC_KEY:\\s*(.*)/" "$f") - if [[ -z $LASTCOMMIT ]]; then - # Get commit hash from git commit info - LASTCOMMIT=$(git log -n 1 --pretty=format:%h -- "$f") - fi - if [[ -z $LASTCOMMIT ]]; then - # Get last commit of `main` that this branch is rooted from. - LASTCOMMIT=$(git merge-base main HEAD) - # elif ! git branch --contains $LASTCOMMIT | grep -q "^\s*main\b"; then # HERE - # # Get last commit of `main` that this branch is rooted from. - # LASTCOMMIT=$(git merge-base main HEAD) - # fi - - # if ! (git branch --contains $LASTCOMMIT | grep -q "^\s*main\b"); then - # echo "Something is wrong, the hash is empty or isn't on 'main', aborting: $LASTCOMMIT - $f" - # exit 2 - fi - - if [[ -n $FLAG_UPDATE ]]; then - update_i18n_hash "$LASTCOMMIT" "$f" - fi - - if [[ ! -e "$EN_VERSION" ]]; then - echo -e "File not found\t$EN_VERSION - $f - $DEFAULT_LANG was removed or renamed" - SYNCED=0 - continue - fi - - DIFF=$(git diff --exit-code $EXTRA_DIFF_ARGS $LASTCOMMIT...HEAD "$EN_VERSION") - if [[ -n "$DIFF" ]]; then # [[ $? -ne 0 ]] - SYNCED=0 - if [[ -n "$FLAG_DIFF_DETAILS" ]]; then - echo -n "$DIFF" - else - echo "$DIFF - $f" - fi - elif [[ -n $FLAG_VERBOSE ]]; then - echo -e "File is in sync\t$f" - fi - done - if [ $SYNCED -ne 1 ]; then - exit - fi - - echo "$TARGET_PATHS is still in sync" -} - -main "$@"