Skip to content

Commit

Permalink
Generalize "Arrow Words" and fix quoting/setting/etc.
Browse files Browse the repository at this point in the history
Words containing `<` and `>` have presented historical problems, such
as not being able to assign them via `>:` or quote them with `'`.
There was also an open question about exactly how many forms of words
would be allowed, when it would come into conflict with TAG!

This commit patches in a self-contained bit of arrow-word code that
lets you build any WORD! you like out of `>`, `<`, `|`, `+`, `-`, `=`.
The only rule is that such words cannot both begin with a `<` and
end with a `>`.

A current special exemption is made for `<>`, which has a concept
for being both a tag and executable that has not yet been done.

Includes tests of transcode for the various forms.
  • Loading branch information
hostilefork committed Sep 23, 2020
1 parent fbc1107 commit 5a617b8
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 57 deletions.
124 changes: 68 additions & 56 deletions src/core/l-scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,16 @@ static REBLEN Prescan_Token(SCAN_STATE *ss)
DEAD_END;
}

// We'd like to test the fingerprint for lex flags that would be in an arrow
// but all 16 bits are used. Here's a set of everything *but* =. It might
// be that backslash for invalid word is wasted and could be retaken if it
// were checked for another way.
//
#define LEX_FLAGS_ARROW_EXCEPT_EQUAL \
(LEX_FLAG(LEX_SPECIAL_GREATER) | LEX_FLAG(LEX_SPECIAL_LESSER) | \
LEX_FLAG(LEX_SPECIAL_PLUS) | LEX_FLAG(LEX_SPECIAL_MINUS) | \
LEX_FLAG(LEX_SPECIAL_BAR))


//
// Locate_Token_May_Push_Mold: C
Expand Down Expand Up @@ -1021,6 +1031,64 @@ static enum Reb_Token Locate_Token_May_Push_Mold(

enum Reb_Token token; // only set if falling through to `scan_word`

// Up-front, do a check for "arrow words". This test bails out if any
// non-arrow word characters are seen.
//
if (*cp == '<' and *ss->end == '>') { // "tag-shaped" <...> so not a word
if (cp + 1 == ss->end) // `<>`
return TOKEN_WORD; // !!! TBD: TOKEN_TAG with executable "magic"

// Fall through to old validation in switch() for tag validation
}
else if (
0 == (flags & ~( // check flags for any obvious non-arrow characters
LEX_FLAGS_ARROW_EXCEPT_EQUAL
// don't count LEX_SPECIAL_AT; only valid at head, so not in flags
| LEX_FLAG(LEX_SPECIAL_COLON) // may be last char if SET-WORD!
| LEX_FLAG(LEX_SPECIAL_WORD) // `=` is WORD!-character, sets this
))
){
const REBYTE *temp = cp;
if (*temp == ':' or *temp == '@')
++temp;

while (
*temp == '<' or *temp == '>'
or *temp == '+' or *temp == '-'
or *temp == '=' or *temp == '|'
){
++temp;
if (temp != ss->end)
continue;
if (*cp == '<' and *temp == '/') {
//
// The prescan for </foo> thinks that it might be a PATH! like
// `</foo` so it stops at the slash. To solve this, we only
// support the `</foo>` and <foo />` cases of slashes in TAG!.
// We know this is not the latter, because we did not hit a
// space while we were processing. For the former case, we
// look to see if we get to a `>` before we hit a delimiter.
//
const REBYTE *seek = temp + 1;
for (; not IS_LEX_DELIMIT(*seek); ++seek) {
if (*seek == '>') { // hit close of tag first
ss->end = seek + 1;
return TOKEN_TAG;
}
}
// Hit a delimiter first, so go ahead with our arrow and let
// the scan of a PATH! proceed after that.
}
if (*cp == ':')
return TOKEN_GET;
if (*cp == '@')
return TOKEN_SYM;
return TOKEN_WORD;
}
if (*temp == ':' and temp + 1 == ss->end)
return TOKEN_SET;
}

switch (GET_LEX_CLASS(*cp)) {
case LEX_CLASS_DELIMIT:
switch (GET_LEX_VALUE(*cp)) {
Expand Down Expand Up @@ -1190,16 +1258,6 @@ static enum Reb_Token Locate_Token_May_Push_Mold(
if (cp[1] == '\'')
fail (Error_Syntax(ss, TOKEN_WORD));

// Various special cases of < << <> >> > >= <=
if (cp[1] == '<' or cp[1] == '>') {
cp++;
if (cp[1] == '<' or cp[1] == '>' or cp[1] == '=')
++cp;
if (not IS_LEX_DELIMIT(cp[1]))
fail (Error_Syntax(ss, TOKEN_GET));
ss->end = cp + 1;
return TOKEN_GET;
}
token = TOKEN_GET;
++cp; // skip ':'
goto scanword;
Expand All @@ -1221,41 +1279,10 @@ static enum Reb_Token Locate_Token_May_Push_Mold(
goto scanword;

case LEX_SPECIAL_GREATER:
if (IS_LEX_DELIMIT(cp[1]))
return TOKEN_WORD;
if (cp[1] == '>') {
if (IS_LEX_DELIMIT(cp[2]))
return TOKEN_WORD;
fail (Error_Syntax(ss, TOKEN_WORD));
}
goto special_lesser;

case LEX_SPECIAL_LESSER:
special_lesser:;
if (
IS_LEX_ANY_SPACE(cp[1])
or cp[1] == ']' or cp[1] == ')' or cp[1] == 0
){
return TOKEN_WORD; // changed for </tag>
}
if (
(cp[0] == '<' and cp[1] == '<')
or cp[1] == '='
or cp[1] == '>'
){
if (IS_LEX_DELIMIT(cp[2]))
return TOKEN_WORD;
fail (Error_Syntax(ss, TOKEN_WORD));
}
if (
cp[0] == '<' and (cp[1] == '-' or cp[1] == '|')
and (IS_LEX_DELIMIT(cp[2]) or IS_LEX_ANY_SPACE(cp[2]))
){
return TOKEN_WORD; // "<|" and "<-"
}
if (GET_LEX_VALUE(*cp) == LEX_SPECIAL_GREATER)
fail (Error_Syntax(ss, TOKEN_WORD));

cp = Skip_Tag(cp);
if (not cp)
fail (Error_Syntax(ss, TOKEN_TAG));
Expand Down Expand Up @@ -1288,24 +1315,12 @@ static enum Reb_Token Locate_Token_May_Push_Mold(
token = TOKEN_WORD;
goto scanword;
}
if (
*cp == '>'
and (IS_LEX_DELIMIT(cp[1]) or IS_LEX_ANY_SPACE(cp[1]))
){
return TOKEN_WORD; // Special exemption for ->
}
fail (Error_Syntax(ss, TOKEN_WORD));
}
token = TOKEN_WORD;
goto scanword;

case LEX_SPECIAL_BAR:
if (
cp[1] == '>'
and (IS_LEX_DELIMIT(cp[2]) or IS_LEX_ANY_SPACE(cp[2]))
){
return TOKEN_WORD; // for `|>`
}
token = TOKEN_WORD;
goto scanword;

Expand Down Expand Up @@ -1520,9 +1535,6 @@ static enum Reb_Token Locate_Token_May_Push_Mold(
if (HAS_LEX_FLAG(flags, LEX_SPECIAL_LESSER)) {
// Allow word<tag> and word</tag> but not word< word<= word<> etc.

if (*cp == '=' and cp[1] == '<' and IS_LEX_DELIMIT(cp[2]))
return TOKEN_WORD; // enable `=<`

cp = Skip_To_Byte(cp, ss->end, '<');
if (
cp[1] == '<' or cp[1] == '>' or cp[1] == '='
Expand Down
2 changes: 1 addition & 1 deletion tests/convert/load.test.reb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@
error? trap [load "[a>]"]
error? trap [load "[a+<]"]
error? trap [load "[1<]"]
error? trap [load "[+<]"]
error? trap [load "[+a<]"]
]
)]
43 changes: 43 additions & 0 deletions tests/datatypes/word.test.reb
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,46 @@
a-value: 'a
:a-value == a-value
)

[#1461 #1478 (
for-each [str] [
{<} {+} {|} {=} {-} {>}

{>=} {=|<} {<><} {-=>} {<-<=}

{<<} {>>} {>>=} {<<=} {>>=<->}

{|->} {-<=>-} {-<>-} {>=<}
][
[word pos]: transcode str
assert [pos = ""]

assert [word = to word! str]
assert [str = as text! word]

[path pos]: transcode unspaced ["a/" str "/b"]
assert [pos = ""]
assert [path = compose 'a/(word)/b]

[block pos]: transcode unspaced ["[" str "]"]
assert [pos = ""]
assert [block = reduce [word]]

[q pos]: transcode unspaced ["'" str]
assert [pos = ""]
assert [q = quote word]

[s pos]: transcode unspaced [str ":"]
assert [pos = ""]
assert [s = as set-word! word]

[g pos]: transcode unspaced [":" str]
assert [pos = ""]
assert [g = as get-word! word]

[l pos]: transcode unspaced ["@" str]
assert [pos = ""]
assert [l = as get-word! word]
]
true)
]

0 comments on commit 5a617b8

Please sign in to comment.