fix bad text lint rules by testing with code tokens and strings, phet…

…sims/chipper#791
phetsims · Oct 22, 2024 · 8c67e0f · 8c67e0f
1 parent 5ed7798
commit 8c67e0f
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 26 deletions.
diff --git a/eslint/rules/bad-sim-text.js b/eslint/rules/bad-sim-text.js
@@ -16,22 +16,22 @@ module.exports = function( context ) {
   var badTextsForSimCode = [
 
     // babel doesn't support compiling static getters, see https://github.com/phetsims/tasks/issues/983
-    ' static get ',
+    { name: ' static get ', codeTokens: [ 'static', 'get' ] },
 
     // should be using dot.Util.roundSymmetric, Math.round does not treat positive and negative numbers
     // symmetrically see https://github.com/phetsims/dot/issues/35#issuecomment-113587879
-    'Math.round',
+    { name: 'Math.round', codeTokens: [ 'Math', '.', 'round' ] },
 
     // should be using `phet.joist.random`
-    'Math.random()',
-    '_.shuffle(',
-    '_.sample(',
-    '_.random(',
-    'new Random()',
+    { name: 'Math.random()', codeTokens: [ 'Math', '.', 'random', '(', ')' ] },
+    { name: '_.shuffle(', codeTokens: [ '_', '.', 'shuffle', '(' ] },
+    { name: '_.sample(', codeTokens: [ '_', '.', 'sample', '(' ] },
+    { name: '_.random(', codeTokens: [ '_', '.', 'random', '(' ] },
+    { name: 'new Random()', codeTokens: [ 'new', 'Random', '(', ')' ] },
 
     // IE doesn't support:
-    'Number.parseInt(',
-    'Array.prototype.find'
+    { name: 'Number.parseInt(', codeTokens: [ 'Number', '.', 'parseInt', '(' ] },
+    { name: 'Array.prototype.find', codeTokens: [ 'Array', '.', 'prototype', '.', 'find' ] }
 
     // DOT/Util.toFixed or DOT/Util.toFixedNumber should be used instead of toFixed.
     // JavaScript's toFixed is notoriously buggy. Behavior differs depending on browser,

diff --git a/eslint/rules/bad-text.js b/eslint/rules/bad-text.js
@@ -32,7 +32,7 @@ module.exports = function( context ) {
     'COMBOBOX', // prefer COMBO_BOX
 
     // In ES6, extending object causes methods to be dropped
-    'extends Object ',
+    { name: 'extends Object ', codeTokens: [ 'extends', 'Object' ] },
 
     // Forbid common duplicate words
     ' the the ',
@@ -47,6 +47,8 @@ module.exports = function( context ) {
     'Phet-iO',
 
     '@return ',
+
+    // TODO: this isn't yet supported with current getBadTextTester.js
     ' => { return ', // if on a one line arrow function returning something, prefer instead `() => theReturn`, see https://github.com/phetsims/chipper/issues/790
   ];
 

diff --git a/eslint/rules/getBadTextTester.js b/eslint/rules/getBadTextTester.js
@@ -2,10 +2,20 @@
 /* eslint-disable */
 
 /**
- * Bad text testing function for testing bad text in the project. Supports bad text as string or regex
+ * Bad text testing function for testing bad text in the project. Supports bad text as string ForbiddenTextObject.
+ * ForbiddenTextObject's schema was designed to support getting accurate line numbers from bad text in comments and
+ * in code. To support code, use `codeTokens` to specify how the bad text will be tokenized into Esprima nodes.
  *
- * @param {Array.<string|Object>} badTexts - if object, should look like {name: {string}, regex: {Regexp}}
+ * @author Michael Kauzmann (PhET Interactive Simulations)
+ * @author Jesse Greenberg (PhET Interactive Simulations)
+ */
+
+const assert = require( 'assert' );
+
+/**
+ * @param {Array.<string|ForbiddenTextObject>} badTexts - if a string, will be converted into a ForbiddenTextObject
  * @param {Object} context - eslinting context given from the engine
+ * @returns {function(node:Object)} - function that reports any bad text lint errors to the context
  */
 module.exports = ( badTexts, context ) => {
   'use strict';
@@ -14,25 +24,92 @@ module.exports = ( badTexts, context ) => {
     const sourceCode = context.getSourceCode();
     const codeTokens = sourceCode.getTokens( node );
     const commentTokens = sourceCode.getAllComments();
-    const allTokens = codeTokens.concat( commentTokens );
+    const text = sourceCode.text;
 
-    badTexts.forEach( badText => {
-      allTokens.forEach( token => {
-        const tokenValue = token.value;
+    // test an array of code tokens
+    const testCodeTokens = forbiddenTextObject => {
+      const codeTokensArray = forbiddenTextObject.codeTokens;
 
-        let failedText = null;
-        if ( badText.regex instanceof RegExp && badText.regex.test( token.value ) ) {
-          failedText = badText.name;
-        }
-        if ( token.value.indexOf( badText ) >= 0 ) {
-          failedText = badText;
+      // iterate through each code token in the node
+      for ( let i = 0; i < codeTokens.length; i++ ) {
+        const token = codeTokens[ i ]; // {Token}
+        const failures = []; // a list of the tokens that match the forbidden code tokens,
+
+        // loop through, looking ahead at each subsequent code token, breaking if they don't match the  forbidden tokens
+        for ( let j = 0; j < codeTokensArray.length; j++ ) {
+          const forbiddenCodePart = codeTokensArray[ j ];
+          const combinedIndex = i + j;
+
+          const tokenValue = codeTokens[ combinedIndex ].value;
+
+          // multiple code tokens must match perfectly to conglomerate
+          if ( ( combinedIndex < codeTokens.length && tokenValue === forbiddenCodePart ) ||
+
+               // if there is only one, then check as a substring too
+               ( codeTokensArray.length === 1 && tokenValue.indexOf( forbiddenCodePart ) >= 0 ) ) {
+
+            // if at the end of the sequence, then we have successfully found bad code text, add it to a list of failures.
+            if ( j === codeTokensArray.length - 1 ) {
+              failures.push( forbiddenTextObject );
+            }
+          }
+          else {
+            break; // quit early because it was a non-match
+          }
         }
 
-        failedText && context.report( {
-          loc: token.loc.start,
-          message: 'File contains bad text: \'' + failedText + '\''
+        failures.forEach( failedTextObject => {
+          context.report( {
+            loc: token.loc.start,
+            message: `bad code text: "${failedTextObject.name}"`,
+          } );
+        } );
+      }
+    };
+    /**
+     *
+     * @param forbiddenText
+     */
+    const testBadText = forbiddenText => {
+
+      // no need to iterate through tokens if it isn't anywhere in the source code
+      if ( text.indexOf( forbiddenText.name ) >= 0 ) {
+
+        // search through the tokenized code for the forbidden code tokens, like [ 'Math', '.', 'round' ],
+        testCodeTokens( forbiddenText );
+
+        // look through comments
+        commentTokens.forEach( token => {
+          if ( token.value.indexOf( forbiddenText.name ) >= 0 ) {
+            context.report( {
+              loc: token.loc.start,
+              message: `bad comment text: "${forbiddenText.name}"`,
+            } );
+          }
         } );
-      } );
+
+        // TODO: support REGEX
+        // if ( forbiddenText.regex instanceof RegExp && forbiddenText.regex.test( token.value ) ) {
+        //   failedText = forbiddenText.name;
+        // }
+      }
+    };
+
+    badTexts.forEach( badText => {
+      if ( typeof badText === 'string' ) {
+        badText = { name: badText, codeTokens: [ badText ] }
+      }
+      assert( typeof badText.name === 'string', 'name required' );
+      assert( Array.isArray( badText.codeTokens ), 'code tokens expected' );
+      testBadText( badText );
     } );
   };
+
+  /**
+   * @typedef {Object} ForbiddenTextObject
+   * @property {string} name - the "string-form" name of the bad text. should occur in the source code. Also what is
+   *                            displayed on error. Used when checking for bad text in comments.
+   * @property {Array.<string>} codeTokens - a list of the tokenized, ordered code sections that make up the bad text
+   *                                          within the javascript code (not used for checking comments)
+   */
 };