Skip to content

Commit

Permalink
FIX: allow various variants of arrow-like words
Browse files Browse the repository at this point in the history
  • Loading branch information
Oldes committed Apr 2, 2021
1 parent c2833d9 commit cefbc96
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 28 deletions.
138 changes: 110 additions & 28 deletions src/core/l-scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,55 @@
return 0;
}

/***********************************************************************
**
*/ static const REBYTE* Skip_Left_Arrow(const REBYTE* cp)
/*
** Skip the entire contents of a `left arrow` like words.
** Zero is returned on errors.
**
***********************************************************************/
{
while (*cp == '<') cp++;
while (*cp) {
if (*cp == '-' || *cp == '=' || *cp == '>' || *cp == '~') {
cp++;
continue;
}
if (IS_LEX_DELIMIT(*cp)) break;
if (*cp == ':') {
cp++;
break;
}
return 0;
}
return cp;
}

/***********************************************************************
**
*/ static const REBYTE* Skip_Right_Arrow(const REBYTE* cp)
/*
** Skip the entire contents of a `right arrow` like words.
** Zero is returned on errors.
**
***********************************************************************/
{
while (*cp) {
if (*cp == '-' || *cp == '=' || *cp == '>' || *cp == '~') {
cp++;
continue;
}
if (IS_LEX_DELIMIT(*cp)) break;
if (*cp == ':') {
cp++;
break;
}
return 0;
}
return cp;
}


/***********************************************************************
**
Expand Down Expand Up @@ -777,6 +826,7 @@
REBCNT flags;
const REBYTE *cp;
REBINT type;
REBYTE *np = NULL;

flags = Prescan(scan_state);
cp = scan_state->begin;
Expand Down Expand Up @@ -847,9 +897,9 @@
if (ONLY_LEX_FLAG(flags, LEX_SPECIAL_WORD)) return type;
goto scanword;
}
if (cp[0] == '<' || cp[0] == '>') {
scan_state->end = cp+1;
return -TOKEN_REFINE;
if (*cp == '<' || *cp == '>') {
type = TOKEN_REFINE;
goto scan_arrow_word;
}
scan_state->end = cp;
return TOKEN_WORD;
Expand Down Expand Up @@ -902,10 +952,8 @@
// Various special cases of < << <> >> > >= <=
if (cp[1] == '<' || cp[1] == '>') {
cp++;
if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=') cp++;
if (!IS_LEX_DELIMIT(cp[1])) return -TOKEN_GET;
scan_state->end = cp+1;
return TOKEN_GET;
type = TOKEN_GET;
goto scan_arrow_word;
}
if (cp[1] == '%' && IS_LEX_DELIMIT(cp[2])) {
if (cp[2] == '"' || cp[2] == '/') { // no :%"" or :%/
Expand All @@ -922,25 +970,22 @@
if (IS_LEX_NUMBER(cp[1])) return -TOKEN_LIT; // no '2nd
if (cp[1] == ':') return -TOKEN_LIT; // no ':X
if (ONLY_LEX_FLAG(flags, LEX_SPECIAL_WORD)) return TOKEN_LIT; /* common case */
cp++;
if (*cp == '<' || *cp == '>') {
type = TOKEN_LIT;
goto scan_arrow_word;
}
if (!IS_LEX_WORD(cp[1])) {
// Various special cases of < << <> >> > >= <=
if ((cp[1] == '-' || cp[1] == '+') && IS_LEX_NUMBER(cp[2])) return -TOKEN_WORD;
if (cp[1] == '<' || cp[1] == '>') {
cp++;
if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=') cp++;
if (!IS_LEX_DELIMIT(cp[1])) return -TOKEN_LIT;
scan_state->end = cp+1;
return TOKEN_LIT;
}
if (cp[1] == '%' && IS_LEX_DELIMIT(cp[2])) {
if (cp[2] == '"' || cp[2] == '/') { // no '%"" or '%/
scan_state->end = cp + 3;
if ((*cp == '-' || *cp == '+') && IS_LEX_NUMBER(cp[1])) return -TOKEN_WORD;
if (*cp == '%' && IS_LEX_DELIMIT(cp[1])) {
if (cp[1] == '"' || cp[1] == '/') { // no '%"" or '%/
scan_state->end = cp + 2;
return -TOKEN_LIT;
}
return TOKEN_LIT; // allowed '%
}
}
if (cp[1] == '\'') return -TOKEN_LIT; // no ''foo
if (*cp == '\'') return -TOKEN_LIT; // no ''foo
type = TOKEN_LIT;
goto scanword;

Expand All @@ -954,15 +999,28 @@

case LEX_SPECIAL_GREATER:
if (IS_LEX_DELIMIT(cp[1])) return TOKEN_WORD; // RAMBO 3903
if (cp[1] == '>') {
if (IS_LEX_DELIMIT(cp[2])) return TOKEN_WORD;
return -TOKEN_WORD;
if (cp[1] == '>' || cp[1] == '=' || cp[1] == '-' || cp[1] == '~') {
np = Skip_Right_Arrow(cp);
if (np != NULL) {
scan_state->end = np;
return (np[-1] == ':' ? TOKEN_SET : TOKEN_WORD);
}
}
return -TOKEN_WORD;

case LEX_SPECIAL_LESSER:
if (IS_LEX_ANY_SPACE(cp[1]) || cp[1] == ']' || cp[1] == 0) return TOKEN_WORD; // CES.9121 Was LEX_DELIMIT - changed for </tag>
if ((cp[0] == '<' && cp[1] == '<') || cp[1] == '=' || cp[1] == '>') {
if (IS_LEX_DELIMIT(cp[2])) return TOKEN_WORD;
return -TOKEN_WORD;

if (IS_LEX_DELIMIT(cp[2]) && (cp[1] == '>' || cp[1] == '=' || cp[1] == '<')) {
return TOKEN_WORD; // common cases: <> <= <<
}

if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=' || cp[1] == '-' || cp[1] == '~') {
np = Skip_Left_Arrow(cp);
if (np != NULL) {
scan_state->end = np;
return (np[-1] == ':' ? TOKEN_SET : TOKEN_WORD);
}
}
if (GET_LEX_VALUE(*cp) == LEX_SPECIAL_GREATER) return -TOKEN_WORD;
cp = Skip_Tag(cp);
Expand Down Expand Up @@ -1169,8 +1227,33 @@
/*bogus: if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER) &&
Skip_To_Char(scan_state->begin, cp, '>')) return -TOKEN_WORD; */
scan_state->end = cp;
} else if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER)) return -type;
}
else if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER)) {
if (*cp == '=' || *cp == '-' || *cp == '~') {
np = Skip_Right_Arrow(cp);
if (np != NULL) {
scan_state->end = np;
return (np[-1] == ':' ? TOKEN_SET : type);
}
}
return -type;
}
return type;

scan_arrow_word:
// Various special cases of < << <> >> > >= <= <--- >--->
if (cp[0] == '<') {
np = Skip_Left_Arrow(cp);
if (!np) return -type;
scan_state->end = np;
return type;
}
else {
np = Skip_Right_Arrow(cp);
if (!np) return -type;
scan_state->end = np;
return type;
}
}


Expand Down Expand Up @@ -1822,7 +1905,6 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
// (space and tab chars at tail are truncated and so accepted)
&& scan_state.end == scan_state.limit)
return Make_Word(cp, len);

return 0;
}

Expand Down
128 changes: 128 additions & 0 deletions src/tests/units/lexer-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,134 @@ Rebol [

===end-group===

===start-group=== "Special arrow-like words"
;@@ https://github.com/Oldes/Rebol-issues/issues/1302
;@@ https://github.com/Oldes/Rebol-issues/issues/1318
;@@ https://github.com/Oldes/Rebol-issues/issues/1342
;@@ https://github.com/Oldes/Rebol-issues/issues/1478

--test-- "valid arrow-like words"
--assert word? try [load {<-->}]
--assert word? try [load {<==>}]
--assert word? try [load {<-==->}]
--assert word? try [load {<~~~>}]

--test-- "valid left-arrow-like words"
--assert word? try [load {<<}]
--assert word? try [load {<<<}]
--assert word? try [load {<<<<}]
--assert word? try [load {<<==}]
--assert word? try [load {<===}]
--assert word? try [load {<---}]
--assert word? try [load {<~~~}]
--assert all [block? b: try [load {<<<""}] parse b [word! string!]]

--test-- "valid right-arrow-like words"
--assert word? try [load {>>}]
--assert word? try [load {>>>}]
--assert word? try [load {>>>>}]
--assert word? try [load {==>>}]
--assert word? try [load {===>}]
--assert word? try [load {--->}]
--assert word? try [load {~~~>}]
--assert all [block? b: try [load {>>>""}] parse b [word! string!]]

--test-- "invalid cases"
--assert error? try [load {a<}]
--assert error? try [load {a>}]
--assert error? try [load {a<--}]
--assert error? try [load {a-->}]

--test-- "valid arrow-like lit-words"
--assert lit-word? try [load {'<-->}]
--assert lit-word? try [load {'<==>}]
--assert lit-word? try [load {'<-==->}]
--assert lit-word? try [load {'<~~~>}]

--test-- "valid left-arrow-like lit-words"
--assert lit-word? try [load {'<<}]
--assert lit-word? try [load {'<<<}]
--assert lit-word? try [load {'<<<<}]
--assert all [block? b: try [load {'<<<""}] parse b [lit-word! string!]]

--test-- "valid right-arrow-like lit-words"
--assert lit-word? try [load {'>>}]
--assert lit-word? try [load {'>>>}]
--assert lit-word? try [load {'>>>>}]
--assert lit-word? try [load {'==>>}]
--assert lit-word? try [load {'===>}]
--assert lit-word? try [load {'--->}]
--assert lit-word? try [load {'~~~>}]
--assert all [block? b: try [load {'>>>""}] parse b [lit-word! string!]]

--test-- "valid arrow-like get-words"
--assert get-word? try [load {:<-->}]
--assert get-word? try [load {:<==>}]
--assert get-word? try [load {:<-==->}]
--assert get-word? try [load {:<~~~>}]

--test-- "valid left-arrow-like get-words"
--assert get-word? try [load {:<<}]
--assert get-word? try [load {:<<<}]
--assert get-word? try [load {:<<<<}]
--assert all [block? b: try [load {:<<<""}] parse b [get-word! string!]]

--test-- "valid right-arrow-like get-words"
--assert get-word? try [load {:>>}]
--assert get-word? try [load {:>>>}]
--assert get-word? try [load {:>>>>}]
--assert get-word? try [load {:==>>}]
--assert get-word? try [load {:===>}]
--assert get-word? try [load {:--->}]
--assert get-word? try [load {:~~~>}]
--assert all [block? b: try [load {:>>>""}] parse b [get-word! string!]]

--test-- "valid arrow-like set-words"
--assert set-word? try [load {<-->:}]
--assert set-word? try [load {<==>:}]
--assert set-word? try [load {<-==->:}]
--assert set-word? try [load {<~~~>:}]

--test-- "valid left-arrow-like set-words"
--assert set-word? try [load {<<:}]
--assert set-word? try [load {<<<:}]
--assert set-word? try [load {<<<<:}]
--assert all [block? b: try [load {<<<:""}] parse b [set-word! string!]]

--test-- "valid right-arrow-like set-words"
--assert set-word? try [load {>>:}]
--assert set-word? try [load {>>>:}]
--assert set-word? try [load {>>>>:}]
--assert set-word? try [load {==>>:}]
--assert set-word? try [load {===>:}]
--assert set-word? try [load {--->:}]
--assert set-word? try [load {~~~>:}]
--assert all [block? b: try [load {>>>:""}] parse b [set-word! string!]]

--test-- "valid arrow-like refinements"
--assert refinement? try [load {/<-->}]
--assert refinement? try [load {/<==>}]
--assert refinement? try [load {/<-==->}]
--assert refinement? try [load {/<~~~>}]

--test-- "valid left-arrow-like refinements"
--assert refinement? try [load {/<<}]
--assert refinement? try [load {/<<<}]
--assert refinement? try [load {/<<<<}]
--assert all [block? b: try [load {/<<<""}] parse b [refinement! string!]]

--test-- "valid right-arrow-like refinements"
--assert refinement? try [load {/>>}]
--assert refinement? try [load {/>>>}]
--assert refinement? try [load {/>>>>}]
--assert refinement? try [load {/==>>}]
--assert refinement? try [load {/===>}]
--assert refinement? try [load {/--->}]
--assert refinement? try [load {/~~~>}]
--assert all [block? b: try [load {/>>>""}] parse b [refinement! string!]]

===end-group===

===start-group=== "Email"
--test-- "valid `emails`"
--assert email? load {name@where}
Expand Down

0 comments on commit cefbc96

Please sign in to comment.