Skip to content

Commit

Permalink
FEAT: Allow line-comments inside binary value specification.
Browse files Browse the repository at this point in the history
  • Loading branch information
Oldes committed Dec 7, 2020
1 parent e0bc23b commit 52a67db
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 8 deletions.
88 changes: 86 additions & 2 deletions src/core/l-scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,84 @@
return src;
}

/***********************************************************************
**
*/ const REBYTE *Scan_Quote_Binary(const REBYTE *src, SCAN_STATE *scan_state)
/*
** Scan a binary string, remove spaceces and comments.
**
** The result will be put into the temporary MOLD_BUF binary.
**
***********************************************************************/
{
REBOOL comm = FALSE;
REBINT chr;
REBCNT lines = 0;
REBSER *buf = BUF_MOLD;

RESET_TAIL(buf);

if (*src++ != '{') return 0;

while (*src != '}') {
chr = *src;

switch (chr) {

case 0:
return 0; // Scan_state shows error location.
case '^':
chr = Scan_Char(&src);
if (chr == -1) return 0;
src--;
break;
case ';':
while (chr != 0) {
chr = *++src;
if (chr == '^') {
chr = Scan_Char(&src);
if (chr == -1) return 0;
src--;
}
if (chr == LF || chr == CR) {
goto new_line;
}
}
return 0; // end of input reached
case CR:
if (src[1] == LF) src++;
// fall thru
case LF:
new_line:
lines++;
// fall thru
case ' ':
case TAB:
src++;
continue;

default:
if (chr >= 0x80) return 0;
}

src++;

if (SERIES_FULL(buf))
Extend_Series(buf, 1);

*BIN_SKIP(buf, buf->tail) = chr;
buf->tail++;
}

src++; // Skip ending quote or brace.

if (scan_state) scan_state->line_count += lines;

STR_TERM(buf);

return src;
}


/***********************************************************************
**
Expand Down Expand Up @@ -929,7 +1007,11 @@
if (*cp == '{') { /* BINARY #{12343132023902902302938290382} */
scan_state->end = scan_state->begin; /* save start */
scan_state->begin = cp;
cp = Scan_Quote(cp, scan_state); // stores result string in BUF_MOLD !!??
// Originally there was used Scan_Quote collecting into BUF_MOLD, but this was not used later.
// It was wasting resources, because Scan_Quote collects unicode (2 bytes per char).
// Scan_Quote_Binary collects ANSI and report invalit input (like unicode char) much sooner.
// It also skips spaces and line-comments so these should not have to be tested by Decode_Binary later.
cp = Scan_Quote_Binary(cp, scan_state); // stores result string in BUF_MOLD !!??
scan_state->begin = scan_state->end; /* restore start */
if (cp) {
scan_state->end = cp;
Expand Down Expand Up @@ -1436,7 +1518,9 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
break;

case TOKEN_BINARY:
Scan_Binary(bp, len, value);
// In BUF_MOLD is preprocessed ANSI result without comments and spaces
// we just still need to resolve the binary base (like `64#{`) from the input
Scan_Binary(Scan_Binary_Base(bp, len), BIN_DATA(BUF_MOLD), BIN_LEN(BUF_MOLD), value);
LABEL_SERIES(VAL_SERIES(value), "scan binary");
break;

Expand Down
23 changes: 17 additions & 6 deletions src/core/l-types.c
Original file line number Diff line number Diff line change
Expand Up @@ -910,9 +910,9 @@ bad_hex: Trap0(RE_INVALID_CHARS);

/***********************************************************************
**
*/ const REBYTE *Scan_Binary(const REBYTE *cp, REBCNT len, REBVAL *value)
*/ REBINT *Scan_Binary_Base(const REBYTE *cp, REBCNT len)
/*
** Scan and convert binary strings.
** Scan for binary base
**
***********************************************************************/
{
Expand All @@ -926,14 +926,25 @@ bad_hex: Trap0(RE_INVALID_CHARS);
cp = ep;
}
cp++; // skip #
if (*cp++ != '{') return 0;
len -= 2;
if (*cp++ != '{' || (len - 2) < 1) return 0;
return base;
}

/***********************************************************************
**
*/ const REBYTE *Scan_Binary(REBINT base, const REBYTE *cp, REBCNT len, REBVAL *value)
/*
** Scan and convert binary strings according given base (like 2, 16, 64, 85).
**
***********************************************************************/
{
//O: no need to check the base here... Decode_Binary handles any case
cp = Decode_Binary(value, cp, len, base, '}', FALSE);
if (!cp) return 0;

cp = Skip_To_Char(cp, cp + len, '}');
if (!cp) return 0; // series will be gc'd
//O: bellow check is not needed, because scener already validated the input
//cp = Skip_To_Char(cp, cp + len, '}');
//if (!cp) return 0; // series will be gc'd

return cp;
}
Expand Down
24 changes: 24 additions & 0 deletions src/tests/units/lexer-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,30 @@ Rebol [

===end-group===

===start-group=== "BINARY"
--test-- {binary! with spaces}
--assert #{00} = first transcode/only to binary! " #{0 0}"
--assert #{00} = first transcode/only to binary! "2#{0000 00 00}"
--assert #{00} = first transcode/only to binary! "2#{0000^/0000}"
--assert #{00} = first transcode/only to binary! "2#{0000^M0000}"
--assert #{01} = first transcode/only to binary! "2#{0000^-0001}"
--assert #{02} = first transcode/only to binary! "2#{0000^ 0010}"
--assert #{0001} = first transcode/only to binary! "16#{00 01}"
--assert #{0001} = first transcode/only to binary! "64#{AA E=}"

--test-- {binary! with comments inside}
;@@ https://github.com/Oldes/Rebol-wishes/issues/23
--assert #{00} = first transcode/only/error to binary! "#{;XXX^/00}"
--assert #{00} = first transcode/only/error to binary! "#{00;XXX^/}"
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^/02}"
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^M02}" ;CR is also comment stopper
--test-- {binary! with other valid escapes}
--assert #{0003} = first transcode/only/error to binary! "#{^(30)^(30)03}"
--test-- {binary! with unicode char} ; is handled early
--assert error? first transcode/only/error to binary! "#{0č}"

===end-group===


===start-group=== "Special tests"
;if "true" <> get-env "CONTINUOUS_INTEGRATION" [
Expand Down

0 comments on commit 52a67db

Please sign in to comment.