Skip to content

Commit

Permalink
Allow quoted identifiers (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
rossberg authored May 21, 2024
1 parent f227556 commit ff86dc2
Show file tree
Hide file tree
Showing 15 changed files with 195 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-interpreter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Setup OCaml
uses: ocaml/setup-ocaml@v2
with:
ocaml-compiler: 4.12.x
ocaml-compiler: 4.14.x
- name: Setup OCaml tools
run: opam install --yes ocamlfind.1.9.5 js_of_ocaml.4.0.0 js_of_ocaml-ppx.4.0.0
- name: Setup Node.js
Expand Down
21 changes: 11 additions & 10 deletions document/core/text/conventions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,21 @@ It is convenient to define identifier contexts as :ref:`records <notation-record
\begin{array}{llll}
\production{identifier context} & I &::=&
\begin{array}[t]{l@{~}ll}
\{ & \ITYPES & (\Tid^?)^\ast, \\
& \IFUNCS & (\Tid^?)^\ast, \\
& \ITABLES & (\Tid^?)^\ast, \\
& \IMEMS & (\Tid^?)^\ast, \\
& \IGLOBALS & (\Tid^?)^\ast, \\
& \IELEM & (\Tid^?)^\ast, \\
& \IDATA & (\Tid^?)^\ast, \\
& \ILOCALS & (\Tid^?)^\ast, \\
& \ILABELS & (\Tid^?)^\ast, \\
\{ & \ITYPES & (\name^?)^\ast, \\
& \IFUNCS & (\name^?)^\ast, \\
& \ITABLES & (\name^?)^\ast, \\
& \IMEMS & (\name^?)^\ast, \\
& \IGLOBALS & (\name^?)^\ast, \\
& \IELEM & (\name^?)^\ast, \\
& \IDATA & (\name^?)^\ast, \\
& \ILOCALS & (\name^?)^\ast, \\
& \ILABELS & (\name^?)^\ast, \\
& \ITYPEDEFS & \functype^\ast ~\} \\
\end{array}
\end{array}
For each index space, such a context contains the list of :ref:`identifiers <text-id>` assigned to the defined indices.
For each index space, such a context contains the list of :ref:`names <syntax-name>` assigned to the defined indices,
which were denoted by the corresponding :ref:`identifiers <text-id>`.
Unnamed indices are associated with empty (:math:`\epsilon`) entries in these lists.

An identifier context is *well-formed* if no index space contains duplicate identifiers.
Expand Down
20 changes: 10 additions & 10 deletions document/core/text/instructions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ The following grammar handles the corresponding update to the :ref:`identifier c
.. math::
\begin{array}{llcllll}
\production{label} & \Tlabel_I &::=&
v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose I
v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose I
& (\iff v \notin I.\ILABELS) \\ &&|&
v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon)
v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon)
& (\iff I.\ILABELS[i] = v) \\ &&|&
\epsilon &\Rightarrow& \{\ILABELS~(\epsilon)\} \compose I \\
\epsilon &\Rightarrow& \epsilon, \{\ILABELS~(\epsilon)\} \compose I \\
\end{array}
.. note::
Expand Down Expand Up @@ -79,16 +79,16 @@ However, the special case of a type use that is syntactically empty or consists
x,I'{:}\Ttypeuse_I &\Rightarrow& x & (\iff I' = \{\ILOCALS~(\epsilon)^\ast\}) \\
\end{array} \\
\production{block instruction} & \Tblockinstr_I &::=&
\text{block}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^?
\text{block}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^?
\\ &&&\qquad \Rightarrow\quad \BLOCK~\X{bt}~\X{in}^\ast~\END
\qquad\quad~~ (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|&
\text{loop}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^?
\qquad\quad~~ (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|&
\text{loop}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^?
\\ &&&\qquad \Rightarrow\quad \LOOP~\X{bt}~\X{in}^\ast~\END
\qquad\qquad (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|&
\text{if}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~
\text{else}~~\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid_2^?
\qquad\qquad (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|&
\text{if}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~
\text{else}~~v_1^?{:}\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~v_2^?{:}\Tid_2^?
\\ &&&\qquad \Rightarrow\quad \IF~\X{bt}~\X{in}_1^\ast~\ELSE~\X{in}_2^\ast~\END
\qquad (\iff \Tid_1^? = \epsilon \vee \Tid_1^? = \Tlabel, \Tid_2^? = \epsilon \vee \Tid_2^? = \Tlabel) \\
\qquad (\iff v_1^? = \epsilon \vee v_1^? = v^?, v_2^? = \epsilon \vee v_2^? = v^?) \\
\end{array}
.. note::
Expand Down
8 changes: 5 additions & 3 deletions document/core/text/lexical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ The *look-ahead* restrictions on the productions for |Tblockchar| disambiguate t
Annotations
~~~~~~~~~~~

An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\T{@id}`.
An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\text{@id}` or :math:`\text{@"..."}`.
No :ref:`space <text-space>` is allowed between the opening parenthesis and this id.
Annotations are intended to be used for third-party extensions;
they can appear anywhere in a program but are ignored by the WebAssembly semantics itself, which treats them as :ref:`white space <text-space>`.
Expand All @@ -145,8 +145,10 @@ Annotations can contain other parenthesized token sequences (including nested an

.. math::
\begin{array}{llclll@{\qquad\qquad}l}
\production{annot} & \Tannot &::=&
\text{(@}~\Tidchar^+ ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\
\production{annotation} & \Tannot &::=&
\text{(@}~\Tannotid ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\
\production{annotation identifier} & \Tannotid &::=&
\Tidchar^+ ~|~ \Tname \\
\end{array}
.. note::
Expand Down
44 changes: 22 additions & 22 deletions document/core/text/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -679,28 +679,28 @@ The definition of the initial :ref:`identifier context <text-context>` :math:`I`

.. math::
\begin{array}{@{}lcl@{\qquad\qquad}l}
\F{idc}(\text{(}~\text{type}~\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=&
\{\ITYPES~(\Tid^?), \ITYPEDEFS~\X{ft}\} \\
\F{idc}(\text{(}~\text{func}~\Tid^?~\dots~\text{)}) &=&
\{\IFUNCS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{table}~\Tid^?~\dots~\text{)}) &=&
\{\ITABLES~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{memory}~\Tid^?~\dots~\text{)}) &=&
\{\IMEMS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{global}~\Tid^?~\dots~\text{)}) &=&
\{\IGLOBALS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{elem}~\Tid^?~\dots~\text{)}) &=&
\{\IELEM~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{data}~\Tid^?~\dots~\text{)}) &=&
\{\IDATA~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IFUNCS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\ITABLES~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IMEMS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IGLOBALS~(\Tid^?)\} \\
\F{idc}(\text{(}~\text{type}~v^?{:}\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=&
\{\ITYPES~(v^?), \ITYPEDEFS~\X{ft}\} \\
\F{idc}(\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\IFUNCS~(v^?)\} \\
\F{idc}(\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\ITABLES~(v^?)\} \\
\F{idc}(\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\IMEMS~(v^?)\} \\
\F{idc}(\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\IGLOBALS~(v^?)\} \\
\F{idc}(\text{(}~\text{elem}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\IELEM~(v^?)\} \\
\F{idc}(\text{(}~\text{data}~v^?{:}\Tid^?~\dots~\text{)}) &=&
\{\IDATA~(v^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IFUNCS~(v^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\ITABLES~(v^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IMEMS~(v^?)\} \\
\F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
\{\IGLOBALS~(v^?)\} \\
\F{idc}(\text{(}~\dots~\text{)}) &=&
\{\} \\
\end{array}
Expand Down
8 changes: 6 additions & 2 deletions document/core/text/values.rst
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,13 @@ Identifiers
~~~~~~~~~~~

:ref:`Indices <syntax-index>` can be given in both numeric and symbolic form.
Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by any sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket.
Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by eiter a sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket, or by a quoted :ref:`name <text-name>`.

.. math::
\begin{array}{llclll@{\qquad}l}
\production{identifier} & \Tid &::=&
\text{\$}~\Tidchar^+ \\
\text{\$}~c^\ast{:}\Tidchar^+ &\Rightarrow& c^\ast \\ &&|&
\text{\$}~c^\ast{:}\Tname &\Rightarrow& c^\ast & (\iff |c^\ast| > 0) \\
\production{identifier character} & \Tidchar &::=&
\text{0} ~~|~~ \dots ~~|~~ \text{9} \\ &&|&
\text{A} ~~|~~ \dots ~~|~~ \text{Z} \\ &&|&
Expand Down Expand Up @@ -246,6 +247,9 @@ Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}
\text{\tilde{~~}} \\
\end{array}
.. note::
The value of an identifier character is its the Unicode codepoint denoting it.

.. _text-id-fresh:

Conventions
Expand Down
1 change: 1 addition & 0 deletions document/core/util/macros.def
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@
.. |Tblockchar| mathdef:: \xref{text/lexical}{text-comment}{\T{blockchar}}

.. |Tannot| mathdef:: \xref{text/lexical}{text-annot}{\T{annot}}
.. |Tannotid| mathdef:: \xref{text/lexical}{text-annot}{\T{annotid}}
.. |Tannottoken| mathdef:: \xref{text/lexical}{text-annot}{\T{annottoken}}


Expand Down
5 changes: 4 additions & 1 deletion interpreter/text/arrange.ml
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,10 @@ let custom m mnode (module S : Custom.Section) =

let var_opt = function
| None -> ""
| Some x -> " " ^ x.it
| Some x when
String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it ->
" $" ^ x.it
| Some x -> " $" ^ name (Utf8.decode x.it)

let module_with_var_opt x_opt (m, cs) =
let fx = ref 0 in
Expand Down
37 changes: 30 additions & 7 deletions interpreter/text/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ let string s =
done;
Buffer.contents b

let annot_id lexbuf s =
let s' = string s in
if s' = "" then error lexbuf "empty annotation id";
try Utf8.decode s' with Utf8.Utf8 -> error lexbuf "malformed UTF-8 encoding"

let opt = Lib.Option.get
}

Expand Down Expand Up @@ -102,8 +107,7 @@ let float =
let string = '"' character* '"'

let idchar = letter | digit | '_' | symbol
let name = idchar+
let id = '$' name
let id = idchar+

let keyword = ['a'-'z'] (letter | digit | '_' | '.' | ':')+
let reserved = (idchar | string)+ | ',' | ';' | '[' | ']' | '{' | '}'
Expand Down Expand Up @@ -699,13 +703,21 @@ rule token = parse
| "offset="(nat as s) { OFFSET_EQ_NAT s }
| "align="(nat as s) { ALIGN_EQ_NAT s }

| id as s { VAR s }
| '$'(id as s) { VAR s }
| '$'(string as s)
{ let s' = string s in
if s' = "" then error lexbuf "empty identifier"; VAR s' }
| '$' { error lexbuf "empty identifier" }

| "(@"(name as n)
| "(@"(id as n)
{ let r = region lexbuf in
let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
Annot.record (Annot.{name = Utf8.decode n; items} @@ r); token lexbuf }
| "(@" { error lexbuf "malformed annotation id" }
| "(@"(string as s)
{ let r = region lexbuf in
let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
Annot.record (Annot.{name = annot_id lexbuf s; items} @@ r); token lexbuf }
| "(@" { error lexbuf "empty annotation id" }

| ";;"utf8_no_nl*eof { EOF }
| ";;"utf8_no_nl*newline { Lexing.new_line lexbuf; token lexbuf }
Expand All @@ -726,11 +738,16 @@ and annot start = parse
{ let r = region lexbuf in
let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
(Annot.Parens items @@ r) :: annot start lexbuf }
| "(@"(name as n)
| "(@"(id as n)
{ let r = region lexbuf in
let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
let ann = Annot.{name = Utf8.decode n; items} @@ r in
(Annot.Annot ann @@ r) :: annot start lexbuf }
| "(@"(string as s)
{ let r = region lexbuf in
let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
let ann = Annot.{name = annot_id lexbuf s; items} @@ r in
(Annot.Annot ann @@ r) :: annot start lexbuf }

| nat as s
{ let r = region lexbuf in
Expand All @@ -741,9 +758,15 @@ and annot start = parse
| float as s
{ let r = region lexbuf in
(Annot.Float s @@ r) :: annot start lexbuf }
| id as s
| '$'(id as s)
{ let r = region lexbuf in
(Annot.Var s @@ r) :: annot start lexbuf }
| '$'(string as s)
{ let r = region lexbuf in
let s' = string s in
if s' = "" then error lexbuf "empty identifier";
(Annot.Var s' @@ r) :: annot start lexbuf }
| '$' { error lexbuf "empty identifier" }
| string as s
{ let r = region lexbuf in
(Annot.String (string s) @@ r) :: annot start lexbuf }
Expand Down
36 changes: 28 additions & 8 deletions interpreter/text/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ let nat32 s loc =
let name s loc =
try Utf8.decode s with Utf8.Utf8 -> error (at loc) "malformed UTF-8 encoding"

let var s loc =
let r = at loc in
try ignore (Utf8.decode s); Source.(s @@ r)
with Utf8.Utf8 -> error r "malformed UTF-8 encoding"


(* Symbolic variables *)

Expand Down Expand Up @@ -120,9 +125,24 @@ let force_locals (c : context) =
let enter_func (c : context) =
{c with labels = VarMap.empty; locals = empty ()}

let print_char = function
| 0x09 -> "\\t"
| 0x0a -> "\\n"
| 0x22 -> "\\\""
| 0x5c -> "\\\\"
| c when 0x20 <= c && c < 0x7f -> String.make 1 (Char.chr c)
| c -> Printf.sprintf "\\u{%02x}" c

let print x =
"$" ^
if String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it
then x.it
else "\"" ^ String.concat "" (List.map print_char (Utf8.decode x.it)) ^ "\""


let lookup category space x =
try VarMap.find x.it space.map
with Not_found -> error x.at ("unknown " ^ category ^ " " ^ x.it)
with Not_found -> error x.at ("unknown " ^ category ^ " " ^ print x)

let type_ (c : context) x = lookup "type" c.types.space x
let func (c : context) x = lookup "function" c.funcs x
Expand All @@ -134,7 +154,7 @@ let elem (c : context) x = lookup "elem segment" c.elems x
let data (c : context) x = lookup "data segment" c.datas x
let label (c : context) x =
try VarMap.find x.it c.labels
with Not_found -> error x.at ("unknown label " ^ x.it)
with Not_found -> error x.at ("unknown label " ^ print x)

let func_type (c : context) x =
try (Lib.List32.nth c.types.list x.it).it
Expand All @@ -151,7 +171,7 @@ let anon category space n =
let bind category space x =
let i = anon category space 1l in
if VarMap.mem x.it space.map then
error x.at ("duplicate " ^ category ^ " " ^ x.it);
error x.at ("duplicate " ^ category ^ " " ^ print x);
space.map <- VarMap.add x.it i space.map;
i

Expand Down Expand Up @@ -336,8 +356,8 @@ num :
| FLOAT { $1 @@ $sloc }

var :
| NAT { let at = $sloc in fun c lookup -> nat32 $1 at @@ at }
| VAR { let at = $sloc in fun c lookup -> lookup c ($1 @@ at) @@ at }
| NAT { fun c lookup -> nat32 $1 $sloc @@ $sloc }
| VAR { fun c lookup -> lookup c (var $1 $sloc) @@ $sloc }

var_list :
| /* empty */ { fun c lookup -> [] }
Expand All @@ -348,7 +368,7 @@ bind_var_opt :
| bind_var { fun c anon bind -> bind c $1 } /* Sugar */

bind_var :
| VAR { $1 @@ $sloc }
| VAR { var $1 $sloc }

labeling_opt :
| /* empty */
Expand Down Expand Up @@ -1001,7 +1021,7 @@ module_fields1 :
{m with exports = $1 c :: m.exports} }

module_var :
| VAR { $1 @@ $sloc } /* Sugar */
| VAR { var $1 $sloc } /* Sugar */

module_ :
| LPAR MODULE option(module_var) module_fields RPAR
Expand All @@ -1026,7 +1046,7 @@ inline_module1 : /* Sugar */
/* Scripts */

script_var :
| VAR { $1 @@ $sloc } /* Sugar */
| VAR { var $1 $sloc } /* Sugar */

script_module :
| module_ { $1 }
Expand Down
9 changes: 9 additions & 0 deletions interpreter/util/lib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ struct
n <> 0 && n land (n - 1) = 0
end

module Char =
struct
let is_digit_ascii c = '0' <= c && c <= '9'
let is_uppercase_ascii c = 'A' <= c && c <= 'Z'
let is_lowercase_ascii c = 'a' <= c && c <= 'z'
let is_letter_ascii c = is_uppercase_ascii c || is_lowercase_ascii c
let is_alphanum_ascii c = is_digit_ascii c || is_letter_ascii c
end

module String =
struct
let implode cs =
Expand Down
Loading

0 comments on commit ff86dc2

Please sign in to comment.