Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh(r) Add operators and punctuation #3195

Merged
merged 12 commits into from
May 19, 2021
98 changes: 69 additions & 29 deletions src/languages/r.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@ export default function(hljs) {
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
// FIXME: Support Unicode identifiers.
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/;
const NUMBER_TYPES = regex.either(
// Special case: only hexadecimal binary powers can contain fractions
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
// Hexadecimal numbers without fraction and optional binary power
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
// Decimal numbers
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
);
const OPERATORS = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
const PUNCTUATION = /\[\[|[(){}[\]\\,]/;
joshgoebel marked this conversation as resolved.
Show resolved Hide resolved

return {
name: 'R',

// only in Haskell, not R
illegal: /->/,
keywords: {
$pattern: IDENT_RE,
keyword:
Expand Down Expand Up @@ -56,6 +63,7 @@ export default function(hljs) {
'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
'trigamma trunc unclass untracemem UseMethod xtfrm',
},

contains: [
// Roxygen comments
hljs.COMMENT(
Expand All @@ -69,7 +77,7 @@ export default function(hljs) {
// preventing highlighting. This code is example R code, so nested
// doctags shouldn’t be treated as such. See
// `test/markup/r/roxygen.txt` for an example.
className: 'doctag',
scope: 'doctag',
begin: '@examples',
starts: {
contains: [
Expand All @@ -89,12 +97,12 @@ export default function(hljs) {
{
// Handle `@param` to highlight the parameter name following
// after.
className: 'doctag',
scope: 'doctag',
begin: '@param',
end: /$/,
contains: [
{
className: 'variable',
scope: 'variable',
variants: [
{ begin: IDENT_RE },
{ begin: /`(?:\\.|[^`\\])+`/ }
Expand All @@ -104,11 +112,11 @@ export default function(hljs) {
]
},
{
className: 'doctag',
scope: 'doctag',
begin: /@[a-zA-Z]+/
},
{
className: 'keyword',
scope: 'keyword',
begin: /\\[a-zA-Z]+/,
}
]
Expand All @@ -118,7 +126,7 @@ export default function(hljs) {
hljs.HASH_COMMENT_MODE,

{
className: 'string',
scope: 'string',
klmr marked this conversation as resolved.
Show resolved Hide resolved
contains: [hljs.BACKSLASH_ESCAPE],
variants: [
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
Expand All @@ -131,48 +139,80 @@ export default function(hljs) {
{begin: "'", end: "'", relevance: 0}
],
},

// Matching numbers immediately following punctuation and operators is
// tricky since we need to look at the character ahead of a number to
// ensure the number is not part of an identifier, and we cannot use
// negative look-behind assertions. So instead we explicitly handle all
// possible combinations of (operator|punctuation), number.
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
relevance: 0,
className: {
2: "number"
},
variants: [
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
// Special case: only hexadecimal binary powers can contain fractions.
scope: {
1: 'operator',
2: 'number'
},
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/
OPERATORS,
NUMBER_TYPES
]
},
{
scope: {
1: 'operator',
2: 'number'
},
match: [
/%[^%]*%/,
joshgoebel marked this conversation as resolved.
Show resolved Hide resolved
NUMBER_TYPES
]
},
{
scope: {
1: 'punctuation',
2: 'number'
},
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/
PUNCTUATION_RE,
NUMBER_TYPES_RE
klmr marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
scope: { 2: 'number' },
relevance: 0,
match: [
/[^a-zA-Z0-9._]/, // not part of an identifier
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
/[^a-zA-Z0-9._]|^/, // not part of an identifier
klmr marked this conversation as resolved.
Show resolved Hide resolved
NUMBER_TYPES
]
}
]
},

// Operators/punctuation when they're not directly followed by numbers
{
// infix operator
begin: '%',
end: '%'
scope: 'operator',
variants: [
{
relevance: 0,
match: OPERATORS
},
{ match: /%[^%]*%/ }
joshgoebel marked this conversation as resolved.
Show resolved Hide resolved
]
},
// relevance boost for assignment

{
begin: regex.concat(SIMPLE_IDENT, "\\s+<-\\s+")
scope: 'punctuation',
relevance: 0,
match: PUNCTUATION
},

{
// escaped identifier
// Escaped identifier
begin: '`',
end: '`',
contains: [
Expand Down
12 changes: 6 additions & 6 deletions test/markup/r/names.expect.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
<span class="hljs-comment"># Valid names</span>

a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, <span class="hljs-built_in">c</span>, <span class="hljs-built_in">T</span>, <span class="hljs-built_in">F</span>, ._1
a1_foo<span class="hljs-punctuation">,</span> A1_FOO<span class="hljs-punctuation">,</span> .foo_<span class="hljs-punctuation">,</span> ._foo<span class="hljs-punctuation">,</span> Bar.42<span class="hljs-punctuation">,</span> foo..1<span class="hljs-punctuation">,</span> .<span class="hljs-punctuation">,</span> ._<span class="hljs-punctuation">,</span> ..<span class="hljs-punctuation">,</span> ...<span class="hljs-punctuation">,</span> ..1<span class="hljs-punctuation">,</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">,</span> ._1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Long-term I do worry this makes the tests much harder to read, but I don't have a quick solution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it hasn’t been great during debugging. :-(

Maybe I should just replace the commas by spaces?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe. That wouldn't help in actual code examples though... :-)


<span class="hljs-comment"># Reserved Words</span>

<span class="hljs-literal">NA</span>, <span class="hljs-literal">NA_integer_</span>, <span class="hljs-literal">NA_real_</span>, <span class="hljs-literal">NA_character_</span>, <span class="hljs-literal">NA_complex_</span>, <span class="hljs-literal">NULL</span>, <span class="hljs-literal">NaN</span>, <span class="hljs-literal">Inf</span>
<span class="hljs-literal">NA</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_integer_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_real_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_character_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_complex_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NULL</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NaN</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">Inf</span>

<span class="hljs-comment"># Keywords</span>

<span class="hljs-keyword">function</span>, <span class="hljs-keyword">while</span>, <span class="hljs-keyword">repeat</span>, <span class="hljs-keyword">for</span>, <span class="hljs-keyword">if</span>, <span class="hljs-keyword">in</span>, <span class="hljs-keyword">else</span>, <span class="hljs-keyword">next</span>, <span class="hljs-keyword">break</span>
<span class="hljs-keyword">function</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">while</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">repeat</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">for</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">if</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">in</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">else</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">next</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">break</span>

<span class="hljs-comment"># Not reserved</span>

NULLa, NULL1, NULL., `NULL`, <span class="hljs-string">&#x27;NULL&#x27;</span>, NA_foo_, na_real_, Function, for.
NULLa<span class="hljs-punctuation">,</span> NULL1<span class="hljs-punctuation">,</span> NULL.<span class="hljs-punctuation">,</span> `NULL`<span class="hljs-punctuation">,</span> <span class="hljs-string">&#x27;NULL&#x27;</span><span class="hljs-punctuation">,</span> NA_foo_<span class="hljs-punctuation">,</span> na_real_<span class="hljs-punctuation">,</span> Function<span class="hljs-punctuation">,</span> for.

<span class="hljs-comment"># Primitive built-ins</span>

<span class="hljs-built_in">return</span>, <span class="hljs-built_in">switch</span>, <span class="hljs-built_in">sum</span>
<span class="hljs-built_in">return</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">switch</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">sum</span>

<span class="hljs-comment"># Non-primitive base functions</span>

stop, try
stop<span class="hljs-punctuation">,</span> try

<span class="hljs-comment"># Quoted identifiers</span>

Expand Down
10 changes: 6 additions & 4 deletions test/markup/r/numbers.expect.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
<span class="hljs-number">1</span> <span class="hljs-comment"># Regression caused numbers at beginning not to be highlighted.</span>

<span class="hljs-comment"># Numbers</span>

<span class="hljs-number">0</span>, <span class="hljs-number">01</span>, <span class="hljs-number">08</span>, <span class="hljs-number">123456</span>, <span class="hljs-number">1256.701</span>, <span class="hljs-number">123e3</span>, <span class="hljs-number">123E+3</span>, <span class="hljs-number">1.23e-3</span>, <span class="hljs-number">1.23E3</span>, <span class="hljs-number">.25</span>, <span class="hljs-number">2.</span>
<span class="hljs-number">0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">01</span><span class="hljs-punctuation">,</span> <span class="hljs-number">08</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123456</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1256.701</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123e3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123E+3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23e-3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23E3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.25</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2.</span>

<span class="hljs-comment"># Integers</span>

<span class="hljs-number">123L</span>, -<span class="hljs-number">50L</span>
<span class="hljs-number">123L</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">50L</span>

<span class="hljs-comment"># Imaginary numbers</span>

<span class="hljs-number">123i</span>, -<span class="hljs-number">123i</span>, <span class="hljs-number">1.2e-3i</span>, <span class="hljs-number">1.i</span>, <span class="hljs-number">.0i</span>
<span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.2e-3i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.0i</span>

<span class="hljs-comment"># Hex numbers</span>

<span class="hljs-number">0x0</span>, <span class="hljs-number">0xabcdefABCDEF01234</span>, <span class="hljs-number">0xabcp123</span>, <span class="hljs-number">0xabcP-123</span>, <span class="hljs-number">0x1.2p2</span>, <span class="hljs-number">0xa.bp-3i</span>
<span class="hljs-number">0x0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcdefABCDEF01234</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcp123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcP-123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0x1.2p2</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xa.bp-3i</span>

<span class="hljs-comment"># Invalid/not literals (for reference)</span>

Expand Down
2 changes: 2 additions & 0 deletions test/markup/r/numbers.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
1 # Regression caused numbers at beginning not to be highlighted.

# Numbers

0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2.
Expand Down
Loading