From 15ed6a45937fbc4e8cb45403e1afe6fbaf2f73ac Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Wed, 19 May 2021 18:07:38 +0100 Subject: [PATCH] enh(r) Add operators and punctuation (#3195) This change adds highlighting for operators and punctuation, and fixes the issues described in #3194. * Give R a relevance boost from arrow-assign * Make `<-` less of a signal boost for R * Rebalance relevance of common syntactic constructs * Fix Vala having too much relevance for `^#` (meta/comment) --- CHANGES.md | 2 + src/languages/r.js | 113 +++++++++++++++++++++++-------- src/languages/vala.js | 1 - test/markup/r/names.expect.txt | 12 ++-- test/markup/r/numbers.expect.txt | 10 +-- test/markup/r/numbers.txt | 2 + test/markup/r/ops.expect.r | 94 +++++++++++++------------ test/markup/r/ops.r | 1 + test/markup/r/roxygen.expect.txt | 8 +-- 9 files changed, 157 insertions(+), 86 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index caf43acc9b..619e37ca79 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -46,6 +46,8 @@ Language Grammars: Parser: +- enh(vala) improve language detection for Vala (#3195) [Konrad Rudolph][] +- enh(r) add support for operators, fix number highlighting bug (#3194, #3195) [Konrad Rudolph][] - enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][] - enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][] - enh(parser) `highlightElement` now always tags blocks with a consistent `language-[name]` class [Josh Goebel][] diff --git a/src/languages/r.js b/src/languages/r.js index a4348678fa..b88d76c6f1 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -18,13 +18,27 @@ export default function(hljs) { // handled in a separate mode. See `test/markup/r/names.txt` for examples. // FIXME: Support Unicode identifiers. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/; - const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/; + const NUMBER_TYPES_RE = regex.either( + // Special case: only hexadecimal binary powers can contain fractions + /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/, + // Hexadecimal numbers without fraction and optional binary power + /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/, + // Decimal numbers + /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/ + ); + const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/; + const PUNCTUATION_RE = regex.either( + /[()]/, + /[{}]/, + /\[\[/, + /[[\]]/, + /\\/, + /,/ + ); return { name: 'R', - // only in Haskell, not R - illegal: /->/, keywords: { $pattern: IDENT_RE, keyword: @@ -56,6 +70,7 @@ export default function(hljs) { 'standardGeneric substitute sum switch tan tanh tanpi tracemem ' + 'trigamma trunc unclass untracemem UseMethod xtfrm', }, + contains: [ // Roxygen comments hljs.COMMENT( @@ -69,7 +84,7 @@ export default function(hljs) { // preventing highlighting. This code is example R code, so nested // doctags shouldn’t be treated as such. See // `test/markup/r/roxygen.txt` for an example. - className: 'doctag', + scope: 'doctag', begin: '@examples', starts: { contains: [ @@ -89,12 +104,12 @@ export default function(hljs) { { // Handle `@param` to highlight the parameter name following // after. - className: 'doctag', + scope: 'doctag', begin: '@param', end: /$/, contains: [ { - className: 'variable', + scope: 'variable', variants: [ { begin: IDENT_RE }, { begin: /`(?:\\.|[^`\\])+`/ } @@ -104,11 +119,11 @@ export default function(hljs) { ] }, { - className: 'doctag', + scope: 'doctag', begin: /@[a-zA-Z]+/ }, { - className: 'keyword', + scope: 'keyword', begin: /\\[a-zA-Z]+/, } ] @@ -118,7 +133,7 @@ export default function(hljs) { hljs.HASH_COMMENT_MODE, { - className: 'string', + scope: 'string', contains: [hljs.BACKSLASH_ESCAPE], variants: [ hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }), @@ -131,48 +146,88 @@ export default function(hljs) { {begin: "'", end: "'", relevance: 0} ], }, + + // Matching numbers immediately following punctuation and operators is + // tricky since we need to look at the character ahead of a number to + // ensure the number is not part of an identifier, and we cannot use + // negative look-behind assertions. So instead we explicitly handle all + // possible combinations of (operator|punctuation), number. + // TODO: replace with negative look-behind when available + // { begin: /(?# Valid names -a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, c, T, F, ._1 +a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, c, T, F, ._1 # Reserved Words -NA, NA_integer_, NA_real_, NA_character_, NA_complex_, NULL, NaN, Inf +NA, NA_integer_, NA_real_, NA_character_, NA_complex_, NULL, NaN, Inf # Keywords -function, while, repeat, for, if, in, else, next, break +function, while, repeat, for, if, in, else, next, break # Not reserved -NULLa, NULL1, NULL., `NULL`, 'NULL', NA_foo_, na_real_, Function, for. +NULLa, NULL1, NULL., `NULL`, 'NULL', NA_foo_, na_real_, Function, for. # Primitive built-ins -return, switch, sum +return, switch, sum # Non-primitive base functions -stop, try +stop, try # Quoted identifiers diff --git a/test/markup/r/numbers.expect.txt b/test/markup/r/numbers.expect.txt index 5ab3296803..e2fec64097 100644 --- a/test/markup/r/numbers.expect.txt +++ b/test/markup/r/numbers.expect.txt @@ -1,18 +1,20 @@ +1 # Regression caused numbers at beginning not to be highlighted. + # Numbers -0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. +0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. # Integers -123L, -50L +123L, -50L # Imaginary numbers -123i, -123i, 1.2e-3i, 1.i, .0i +123i, -123i, 1.2e-3i, 1.i, .0i # Hex numbers -0x0, 0xabcdefABCDEF01234, 0xabcp123, 0xabcP-123, 0x1.2p2, 0xa.bp-3i +0x0, 0xabcdefABCDEF01234, 0xabcp123, 0xabcP-123, 0x1.2p2, 0xa.bp-3i # Invalid/not literals (for reference) diff --git a/test/markup/r/numbers.txt b/test/markup/r/numbers.txt index 423cbaf9d7..45e288b54a 100644 --- a/test/markup/r/numbers.txt +++ b/test/markup/r/numbers.txt @@ -1,3 +1,5 @@ +1 # Regression caused numbers at beginning not to be highlighted. + # Numbers 0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. diff --git a/test/markup/r/ops.expect.r b/test/markup/r/ops.expect.r index aca8f5e926..001c695e10 100644 --- a/test/markup/r/ops.expect.r +++ b/test/markup/r/ops.expect.r @@ -1,53 +1,63 @@ # General operators (from R documentation `?Syntax`) -:: ::: -$ @ -[ [[ -^ -- + -: -%any% -* / -+ - -< > <= >= == != -! -& && -| || -~ --> ->> -<- <<- -= -? +:: ::: +$ @ +[ [[ +^ +- + +: +%any% +* / ++ - +< > <= >= == != +! +& && +| || +~ +-> ->> +<- <<- += +? # Subset extraction -x[3] -x[["a"]] -x$y -x$`a a` -x$"a b" +x[3] +x[["a"]] +x$y +x$`a a` +x$"a b" # Operators -2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 -~a+b -!TRUE -?help, ?`?`, methods?show, ??topic -TRUE&FALSE, T|F -TRUE&&FALSE, T||F -base::sum, base:::sum +2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 +a <- 10 +~a+b +!TRUE +?help, ?`?`, methods?show, ??topic +TRUE&FALSE, T|F +TRUE&&FALSE, T||F +base::sum, base:::sum # Custom operators -2%*%3 -a%<>%b -2%in%y -a %`tick`% b -a %'quot'% b -a %"quot"% b -a %for% b -a %\% b -a %`% b - -`% %` = paste -"foo"`% %`"bar" +2%*%3 +a%<>%b +2%in%y +a %`tick`% b +a %'quot'% b +a %"quot"% b +a %for% b +a %\% b +a %`% b + +# R 4.1 lambda + +f = \(x) x * 2 + +# R 4.1 pipe + +1 : 10 |> f() +10 |> x => rnorm(1, mean = x) + +`% %` = paste +"foo"`% %`"bar" \ No newline at end of file diff --git a/test/markup/r/ops.r b/test/markup/r/ops.r index 11a7a1e889..3a8682dbca 100644 --- a/test/markup/r/ops.r +++ b/test/markup/r/ops.r @@ -30,6 +30,7 @@ x$"a b" # Operators 2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 +a <- 10 ~a+b !TRUE ?help, ?`?`, methods?show, ??topic diff --git a/test/markup/r/roxygen.expect.txt b/test/markup/r/roxygen.expect.txt index aba284971a..9133dc9e25 100644 --- a/test/markup/r/roxygen.expect.txt +++ b/test/markup/r/roxygen.expect.txt @@ -4,7 +4,7 @@ #' @param x the object of type \code{numeric} #' @return A logical indicating whether \code{x == 0} #' @export -is_zero = function (x) x == 0 +is_zero = function (x) x == 0 #' Sum of numbers #' @@ -15,9 +15,9 @@ is_zero = function (x) x == @seealso \link[base]{sum} # this SHOULD be highlighted again. # comment -sum_all <- function (...) { - sum(..., na.rm = TRUE) -} +sum_all <- function (...) { + sum(..., na.rm = TRUE) +} # Weird identifier in @param string