Skip to content

Commit

Permalink
support malformed characters, the second
Browse files Browse the repository at this point in the history
Alternative to #44765. This disallows character literals that can not be
created from iterating a UTF-8 string.

fixes #25072
  • Loading branch information
simeonschaub committed Apr 15, 2022
1 parent 487d0e6 commit 71683df
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 26 deletions.
9 changes: 9 additions & 0 deletions src/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
JL_DLLEXPORT jl_sym_t *jl_release_sym;
JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
JL_DLLEXPORT jl_sym_t *jl_julia_char_sym;


static const uint8_t flisp_system_image[] = {
Expand Down Expand Up @@ -375,6 +376,7 @@ void jl_init_common_symbols(void)
jl_release_sym = jl_symbol("release");
jl_acquire_release_sym = jl_symbol("acquire_release");
jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
jl_julia_char_sym = jl_symbol("julia_char");
}

JL_DLLEXPORT void jl_lisp_prompt(void)
Expand Down Expand Up @@ -584,6 +586,13 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
ex = scm_to_julia_(fl_ctx, car_(e), mod);
temp = jl_new_struct(jl_quotenode_type, ex);
}
else if (sym == jl_julia_char_sym) {
value_t v = car_(e);
if (!(iscprim(v) && cp_class((cprim_t*)ptr(v)) == fl_ctx->uint32type))
jl_error("malformed julia char");
uint32_t c = *(uint32_t*)cp_data((cprim_t*)ptr(v));
temp = jl_box_char(c);
}
if (temp) {
JL_GC_POP();
return temp;
Expand Down
1 change: 1 addition & 0 deletions src/flisp/flisp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2396,6 +2396,7 @@ static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
#endif

fl_ctx->jl_sym = symbol(fl_ctx, "julia_value");
fl_ctx->jl_char_sym = symbol(fl_ctx, "julia_char");

fl_ctx->the_empty_vector = tagptr(alloc_words(fl_ctx, 1), TAG_VECTOR);
vector_setsize(fl_ctx->the_empty_vector, 0);
Expand Down
1 change: 1 addition & 0 deletions src/flisp/flisp.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ struct _fl_context_t {
value_t apply_func, apply_v, apply_e;

value_t jl_sym;
value_t jl_char_sym;
// persistent buffer (avoid repeated malloc/free)
// for julia_extensions.c: normalize
size_t jlbuflen;
Expand Down
50 changes: 50 additions & 0 deletions src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,55 @@ value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs
return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0])));
}

static uint32_t _iterate_continued(uint8_t *s, size_t n, size_t *i, uint32_t u) {
if (u < 0xc0000000) { ++*i; return u; }
uint8_t b;

if (++*i >= n) return u;
b = s[*i]; // cont byte 1
if ((b & 0xc0) != 0x80) return u;
u |= (uint32_t)b << 16;

if (++*i >= n || u < 0xe0000000) return u;
b = s[*i]; // cont byte 2
if ((b & 0xc0) != 0x80) return u;
u |= (uint32_t)b << 8;

if (++*i >= n || u < 0xf0000000) return u;
b = s[*i]; // cont byte 3
if ((b & 0xc0) != 0x80) return u;
u |= (uint32_t)b; ++*i;

return u;
}

static uint32_t _string_only_julia_char(uint8_t *s, size_t n) {
if (!(0 < n && n <= 4))
return -1;
size_t i = 0;
uint8_t b = s[i];
uint32_t u = (uint32_t)b << 24;
if (0x80 <= b && b <= 0xf7)
u = _iterate_continued(s, n, &i, u);
else
i = 1;
if (i < n)
return -1;
return u;
}

value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) {
argcount(fl_ctx, "string.only-julia-char", nargs, 1);
if (!fl_isstring(fl_ctx, args[0]))
type_error(fl_ctx, "string.only-julia-char", "string", args[0]);
uint8_t *s = (uint8_t*)cvalue_data(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
uint32_t u = _string_only_julia_char(s, len);
if (u == (uint32_t)-1)
return fl_ctx->F;
return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
}

static const builtinspec_t julia_flisp_func_info[] = {
{ "skip-ws", fl_skipws },
{ "accum-julia-symbol", fl_accum_julia_symbol },
Expand All @@ -371,6 +420,7 @@ static const builtinspec_t julia_flisp_func_info[] = {
{ "strip-op-suffix", fl_julia_strip_op_suffix },
{ "underscore-symbol?", fl_julia_underscore_symbolp },
{ "string->normsymbol", fl_string2normsymbol },
{ "string.only-julia-char", fl_string_only_julia_char },
{ NULL, NULL }
};

Expand Down
13 changes: 6 additions & 7 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2481,13 +2481,12 @@
(write-char (not-eof-1 (read-char (ts:port s)))
b))
(loop (read-char (ts:port s))))))
(let ((str (unescape-string (io.tostring! b))))
(let ((len (string-length str)))
(if (= len 1)
(string.char str 0)
(if (= len 0)
(error "invalid empty character literal")
(error "character literal contains multiple characters")))))))))
(let* ((str (unescape-string (io.tostring! b)))
(c (string.only-julia-char str)))
(or c
(if (= (string-length str) 0)
(error "invalid empty character literal")
(error "character literal contains multiple characters"))))))))

;; symbol/expression quote
((eq? t ':)
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
extern JL_DLLEXPORT jl_sym_t *jl_julia_char_sym;

JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
Expand Down
54 changes: 35 additions & 19 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,6 @@ end
@test Meta.parse("'\"'") == Meta.parse("'\\\"'") == '"' == "\""[1] == '\42'

# issue #24558
@test_throws ParseError Meta.parse("'\\xff'")
@test_throws ParseError Meta.parse("'\\x80'")
@test_throws ParseError Meta.parse("'ab'")
@test '\u2200' == "\u2200"[1]

@test_throws ParseError Meta.parse("f(2x for x=1:10, y")
Expand Down Expand Up @@ -317,19 +314,16 @@ let p = 15
@test 2p+1 == 31 # not a hex float literal
end

function test_parseerror(str, msg)
try
Meta.parse(str)
@test false
catch e
@test isa(e,ParseError) && e.msg == msg
end
macro test_parseerror(str, msg)
ex = :(@test_throws ParseError($(esc(msg))) Meta.parse($(esc(str))))
ex.args[2] = __source__
return ex
end
test_parseerror("0x", "invalid numeric constant \"0x\"")
test_parseerror("0b", "invalid numeric constant \"0b\"")
test_parseerror("0o", "invalid numeric constant \"0o\"")
test_parseerror("0x0.1", "hex float literal must contain \"p\" or \"P\"")
test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")
@test_parseerror("0x", "invalid numeric constant \"0x\"")
@test_parseerror("0b", "invalid numeric constant \"0b\"")
@test_parseerror("0o", "invalid numeric constant \"0o\"")
@test_parseerror("0x0.1", "hex float literal must contain \"p\" or \"P\"")
@test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")

# issue #15798
@test Meta.lower(Main, Base.parse_input_line("""
Expand All @@ -345,8 +339,8 @@ test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")
""")::Expr) == 23341

# issue #15763
test_parseerror("if\nfalse\nend", "missing condition in \"if\" at none:1")
test_parseerror("if false\nelseif\nend", "missing condition in \"elseif\" at none:2")
@test_parseerror("if\nfalse\nend", "missing condition in \"if\" at none:1")
@test_parseerror("if false\nelseif\nend", "missing condition in \"elseif\" at none:2")

# issue #15828
@test Meta.lower(Main, Meta.parse("x...")) == Expr(:error, "\"...\" expression outside call")
Expand Down Expand Up @@ -2059,8 +2053,8 @@ end == 1
# issue #29982
@test Meta.parse("'a'") == 'a'
@test Meta.parse("'\U0061'") == 'a'
test_parseerror("''", "invalid empty character literal")
test_parseerror("'abc'", "character literal contains multiple characters")
@test_parseerror("''", "invalid empty character literal")
@test_parseerror("'abc'", "character literal contains multiple characters")

# optional soft scope: #28789, #33864

Expand Down Expand Up @@ -3357,3 +3351,25 @@ demo44723()::Any = Base.Experimental.@opaque () -> true ? 1 : 2
@test y == Core.svec(2, 3)
@test z == 4
end

@testset "issue 25072" begin
@test '\xc0\x80' == reinterpret(Char, 0xc0800000)
@test '\x80' == reinterpret(Char, 0x80000000)
@test '\xff' == reinterpret(Char, 0xff000000)
@test_parseerror "'\\xff\\xff\\xff\\xff'" "character literal contains multiple characters" # == reinterpret(Char, 0xffffffff)
@test '\uffff' == Char(0xffff)
@test '\U00002014' == Char(0x2014)
@test '\100' == reinterpret(Char, UInt32(0o100) << 24)
@test_parseerror "'\\100\\42'" "character literal contains multiple characters" # == reinterpret(Char, (UInt32(0o100) << 24) | (UInt32(0o42) << 16))
@test_parseerror "''" "invalid empty character literal"
@test_parseerror "'\\xff\\xff\\xff\\xff\\xff'" "character literal contains multiple characters"
@test_parseerror "'abcd'" "character literal contains multiple characters"
@test_parseerror "'\\uff\\xff'" "character literal contains multiple characters"
@test_parseerror "'\\xff\\uff'" "character literal contains multiple characters"
@test_parseerror "'\\xffa'" "character literal contains multiple characters"
@test_parseerror "'\\uffffa'" "character literal contains multiple characters"
@test_parseerror "'\\U00002014a'" "character literal contains multiple characters"
@test_parseerror "'\\1000'" "character literal contains multiple characters"
@test Meta.isexpr(Meta.parse("'a"), :incomplete)
@test ''' == "'"[1]
end

0 comments on commit 71683df

Please sign in to comment.