Add the new moodle.FilesLangFilesOrdering sniff

This sniff examines exclusively lang files, only for Moodle 4.4 and up, and has the following features: - Detect "groups" of lang strings, (normal, deprecated) within the lang file, checking for correct order within each group. - If other comments, different from the deprecated ones, are found, for safety the sniff will stop fixing strings after that point, although it will continue reporting them as out of order. - Detect wrong variable names (different from `$string`). - Detect duplicate strings within each lang file group. - Detect strings with incorrect trailing contents after it (php code, invalid comment...). - Detect strings out of order within each lang file group and fix them. Notes about the fixing: - Due to CodeSniffer fixing limits, this has been the really complex thing to implement and to guarantee that the fixer works also for "big" lang files (say error.php and similar ones). - To do so, instead of fixing every case 1 by 1, while we are checking the file... we have to accumulate all the changes in custom structure and then, apply everything together as a big changeset. - As commented above, for safety, if an unexpected comment is found, the fixer stops applying any change after that line. In order to allow it to continue fixing, the comment must be removed. Only comments allowed are the "// Deprecated since " ones.
moodlehq · Mar 4, 2024 · f33a5ac · f33a5ac
1 parent cc7a974
commit f33a5ac
Show file tree

Hide file tree

Showing 8 changed files with 664 additions and 0 deletions.
diff --git a/moodle/Sniffs/Files/LangFilesOrderingSniff.php b/moodle/Sniffs/Files/LangFilesOrderingSniff.php
@@ -0,0 +1,333 @@
+<?php
+
+// This file is part of Moodle - https://moodle.org/
+//
+// Moodle is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Moodle is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Moodle.  If not, see <https://www.gnu.org/licenses/>.
+
+/**
+ * This sniff verifies that lang files are sorted alphabetically by string key.
+ *
+ * @copyright 2024 onwards Eloy Lafuente (stronk7) {@link https://stronk7.com}
+ * @license   https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
+ */
+
+namespace MoodleHQ\MoodleCS\moodle\Sniffs\Files;
+
+use MoodleHQ\MoodleCS\moodle\Util\MoodleUtil;
+use PHP_CodeSniffer\Files\File;
+use PHP_CodeSniffer\Sniffs\Sniff;
+
+class LangFilesOrderingSniff implements Sniff
+{
+    /**
+     * @var string|null The previous string that has been processed.
+     *
+     * We use this variable to compare the current string with the previous one. And decide
+     * if the current string is a duplicate or if it's out of order.
+     */
+    protected ?string $previousString = null;
+
+    /**
+     * @var int pointer to the token where we should stop fixing the file (defaults to last token).
+     *
+     * When we find a comment that is not a "Deprecated since Moodle" one, we will stop fixing the file.
+     */
+    protected int $stopFixingPtr = 999999999;
+
+    /**
+     * @var array An array which keys are all the known strings, grouped, and the values are the start and end pointers to them.
+     *
+     * We use this array to, accurately, know where to move every string on each fixing iteration.
+     */
+    protected array $strings = [];
+
+    public function register(): array {
+        return [T_OPEN_TAG]; // We are going to process the whole file, finding all the strings and comments within it.
+    }
+
+    public function process(File $phpcsFile, $stackPtr): void {
+        // If the file is not a lang file, return.
+        if (!MoodleUtil::isLangFile($phpcsFile)) {
+            return;
+        }
+
+        // Only for Moodle 4.4dev (404) and up.
+        // Make and exception for unit tests, so they are run always.
+        if (!MoodleUtil::meetsMinimumMoodleVersion($phpcsFile, 404) && !MoodleUtil::isUnitTestRunning()) {
+            return; // @codeCoverageIgnore
+        }
+
+        // Get the file tokens, for ease of use.
+        $tokens = $phpcsFile->getTokens();
+
+        // Let's find the first variable and start the process.
+        $currentPtr = $phpcsFile->findNext(T_VARIABLE, $stackPtr + 1);
+        if ($currentPtr === false) {
+            return; // No strings found, nothing to do.
+        }
+
+        // It's time to iterate over all the strings and comments till the end of the file.
+        // We'll go accumulating all the strings by group, with their start and end pointers as values.
+        $currentGroup = 'main'; // The default group to start with, we'll change it each time we find a new section.
+        do {
+            // Let's manage comments first (so we know if we are changing the current group).
+
+            // Correct comments are in new line and begin with "// Deprecated since ".
+            if (
+                $tokens[$currentPtr]['code'] === T_COMMENT &&
+                strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === 0 &&
+                $tokens[$currentPtr - 1]['content'] === "\n"
+            ) {
+                $currentGroup = trim($tokens[$currentPtr]['content']);
+            }
+
+            // If we find a comment that is not the standard one, we will stop fixing the file here. And error.
+            if (
+                $tokens[$currentPtr]['code'] === T_COMMENT &&
+                strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === false
+            ) {
+                $phpcsFile->addWarning(
+                    'Unexpected comment found. Auto-fixing will not work after this comment',
+                    $currentPtr,
+                    'UnexpectedComment'
+                );
+                if ($this->stopFixingPtr > $currentPtr) {
+                    $this->stopFixingPtr = $currentPtr; // Update the stop fixing pointer.
+                }
+            }
+
+            if ($tokens[$currentPtr]['code'] === T_COMMENT) {
+                continue; // We are done for comment tokens.
+            }
+
+            // Arrived here, all the tokens are variables, so we don't need to check for that.
+
+            // If the name of the variable is not "$string", error.
+            if ($tokens[$currentPtr]['content'] !== '$string') {
+                $phpcsFile->addError(
+                    'Variable "%s" is not expected in a lang file',
+                    $currentPtr,
+                    'UnexpectedVariable',
+                    [$tokens[$currentPtr]['content']]
+                );
+                continue; // We are done for this token.
+            }
+
+            // Get the string key, if any.
+            if (!$stringKey = $this->getStringKey($phpcsFile, $currentPtr)) {
+                continue; // Problems with this string key, skip it (has been already reported).
+            }
+
+            // Have found a valid $string[KEY], let's calculate the end and store it.
+            if ($currentEnd = $this->getStringEnd($phpcsFile, $currentPtr)) {
+                if (!isset($this->strings[$currentGroup])) {
+                    $this->strings[$currentGroup] = [];
+                    $this->previousString = null; // Reset the previous string on new group.
+                }
+                // Check if the string already has been found earlier.
+                if (isset($this->strings[$currentGroup][$stringKey])) {
+                    $phpcsFile->addError('The string key "%s" is duplicated', $currentPtr, 'DuplicatedKey', [$stringKey]);
+                    continue; // We are done for this string, won't report anything about it till fixed.
+                } else {
+                    // We can safely add the string to the group.
+                    $this->strings[$currentGroup][$stringKey] = [$currentPtr, $currentEnd];
+                }
+            }
+
+            if (null === $currentEnd) {
+                // The string end is not as expected, report as error unless the next token
+                // after the semicolon is a comment. In that case, we won't report it because
+                // UnexpectedComment will take on it.
+                $delegateToUnexpectedDuplicated = false;
+                $semicolonPtr = $phpcsFile->findNext(T_SEMICOLON, $currentPtr + 1);
+                if (
+                    $tokens[$semicolonPtr + 1]['code'] === T_COMMENT || // Very next in a comment.
+                    $tokens[$semicolonPtr + 1]['code'] === T_WHITESPACE && // Or a whitespace and then a comment.
+                    $tokens[$semicolonPtr + 2]['code'] === T_COMMENT
+                ) {
+                    $delegateToUnexpectedDuplicated = true;
+                }
+                if (!$delegateToUnexpectedDuplicated) {
+                    $phpcsFile->addError(
+                        'Unexpected string end, it should be a line feed after a semicolon',
+                        $currentPtr,
+                        'UnexpectedEnd'
+                    );
+                    continue; // We are done for this string, won't report anything about it till fixed.
+                }
+            }
+
+            // Note: We only issue these warnings if there are previous strings to compare with,
+            // and, obviously, if the string is out of order.
+            if ($this->previousString && strcmp($this->previousString, $stringKey) > 0) {
+                // We are going to add this as fixable warning only if we are
+                // before the last pointer to fix. This is an unordered string.
+                $phpcsFile->addWarning(
+                    'The string key "%s" is not in the correct order, it should be before "%s"',
+                    $currentPtr,
+                    'IncorrectOrder',
+                    [$stringKey, $this->previousString],
+                    0,
+                    $currentPtr < $this->stopFixingPtr
+                );
+            }
+
+            // Feed $previousString with the current string key.
+            $this->previousString = $stringKey;
+        } while ($currentPtr = $phpcsFile->findNext([T_VARIABLE, T_COMMENT], $currentPtr + 1)); // Move to next.
+
+        // If we are fixing the file, we need to sort the strings and move them to the correct position.
+        if ($phpcsFile->fixer->enabled) {
+            $this->sortStringsAndFix($phpcsFile);
+        }
+    }
+
+    /**
+     * Given a lang file, fix all the sorting issues found.
+     *
+     * This is really similar to the insertion-sort algorithm, but with
+     * a few optimisations to avoid unnecessary iterations. Should be
+     * efficient enough against lists that are expected to be not
+     * too long and already mostly sorted.
+     *
+     * @param File $phpcsFile The lang file being processed.
+     */
+    protected function sortStringsAndFix(File $phpcsFile): void {
+        // Because of hard restrictions in CodeSniffer fixer (we cannot apply more than one change
+        // to the same token in the same pass), we need to accumulate all the changes and apply them
+        // at the end of the process. So we are going to build a big changeset to be applied all together.
+        // Keys will be the token index and values an array, with operation (index, DELETE, INSERT) and content.
+        $changeSet = [];
+
+        // Get the file tokens, for ease of use.
+        $tokens = $phpcsFile->getTokens();
+
+        // We are going to perform the sorting within each detected group/section.
+        foreach ($this->strings as $group => $strings) {
+            $alreadyOrdered = 0; // The number of strings that are already in place.
+
+            do {
+                $changes = false; // We are going to track if we have made any change in this pass.
+
+                $strings = $this->strings[$group];
+                // Let's compare the keys in the array of strings with the sorted version of it.
+                $sorted = $unSorted = array_keys($strings);
+                sort($sorted, SORT_STRING);
+                $count = count($sorted);
+                for ($i = $alreadyOrdered; $i < $count; $i++) {
+                    $sortedKey = $sorted[$i];
+                    // If the key is after the last pointer to fix, we are done with the loop.
+                    if ($strings[$sortedKey][1] > $this->stopFixingPtr) {
+                        break;
+                    }
+                    $stringsKey = $unSorted[$i];
+                    if ($sortedKey !== $stringsKey) {
+                        // Apply the changes to the strings array.
+                        // Move the key to the correct position.
+                        $keyValue = $strings[$sortedKey];
+                        // Remove the element to move.
+                        unset($strings[$sortedKey]);
+                        // Rebuild the array, with the element in new position.
+                        $strings = array_slice($strings, 0, $i, true) +
+                            [$sortedKey => $keyValue] +
+                            array_slice($strings, $i, null, true);
+                        $this->strings[$group] = $strings; // Update the group array with the rebuilt version.
+                        $unSorted = array_keys($strings); // Update the unsorted keys array.
+
+                        // For every token in the string being moved, delete it and add it in the correct position.
+                        foreach (range($keyValue[0], $keyValue[1]) as $tokenIndex) {
+                            $tempToken = $tokens[$tokenIndex]; // Store the token.
+                            $changeSet[$tokenIndex]['DELETE'] = ''; // Delete the current string token.
+                            // Insert the token before the previous string.
+                            if (!isset($changeSet[$strings[$stringsKey][0] - 1]['INSERT'])) {
+                                $changeSet[$strings[$stringsKey][0] - 1]['INSERT'] = '';
+                            }
+                            $changeSet[$strings[$stringsKey][0] - 1]['INSERT'] .= $tempToken['content'];
+                        }
+                        $changes = true;
+                    }
+                    // The string was already in place, we don't need it anymore
+                    // in the array for future comparisons.
+                    $alreadyOrdered = $i + 1;
+                }
+                // Let's apply the accumulated changes to the file.
+                if ($changes) {
+                    $phpcsFile->fixer->beginChangeset();
+                    foreach ($changeSet as $tokenIndex => $operations) {
+                        if (isset($operations['DELETE'])) {
+                            $phpcsFile->fixer->replaceToken($tokenIndex, '');
+                        }
+                        if (isset($operations['INSERT'])) {
+                            $phpcsFile->fixer->addContent($tokenIndex, $operations['INSERT']);
+                        }
+                    }
+                    $phpcsFile->fixer->endChangeset();
+                }
+            } while ($changes);
+        }
+    }
+
+    /**
+     * Return the string key corresponding to the string at the pointer.
+     * Note that the key has got any quote (single or double) trimmed.
+     *
+     * @param File $phpcsFile
+     * @param int $stackPtr
+     * @return string|null
+     */
+    protected function getStringKey(File $phpcsFile, int $stackPtr): ?string {
+        $tokens = $phpcsFile->getTokens();
+
+        // If the structure is not exactly: $string[KEY], add error and return null.
+        if (
+            $tokens[$stackPtr + 1]['code'] !== T_OPEN_SQUARE_BRACKET ||
+            $tokens[$stackPtr + 2]['code'] !== T_CONSTANT_ENCAPSED_STRING ||
+            $tokens[$stackPtr + 3]['code'] !== T_CLOSE_SQUARE_BRACKET
+        ) {
+            $phpcsFile->addError(
+                "Unexpected string syntax, it should be `\$string['key']`",
+                $stackPtr,
+                'UnexpectedSyntax'
+            );
+            return null;
+        }
+
+        // Now we can safely extract the string key and return it.
+        return trim($tokens[$stackPtr + 2]['content'], "'\"");
+    }
+
+    /**
+     * Return the string final pointer, it should be always a \n after a T_SEMICOLON.
+     *
+     * @param File $phpcsFile
+     * @param int $stackPtr
+     * @return int|null The pointer to the end of the string, or null if it's not an expected string end.
+     */
+    protected function getStringEnd(File $phpcsFile, int $stackPtr): ?int {
+        $tokens = $phpcsFile->getTokens();
+
+        $currentEndToken = $phpcsFile->findNext(T_SEMICOLON, $stackPtr + 1) + 1;
+
+        // Verify that the current end token is a line feed, if not, we won't be able to fix (swap).
+        if (
+            !isset($tokens[$currentEndToken]) ||
+            $tokens[$currentEndToken]['code'] !== T_WHITESPACE ||
+            $tokens[$currentEndToken]['content'] !== "\n"
+        ) {
+            return null; // This is not an expected string end.
+        }
+
+        return $currentEndToken;
+    }
+}