From ab6b2215ceec630442d07b0ab10a4e18b649acb8 Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Sat, 27 Apr 2019 23:37:42 +0200 Subject: [PATCH] [spec/interpreter/test] Spec and implement proposal --- document/core/appendix/custom.rst | 206 +++++++++++++++++++++++++++++- document/core/text/lexical.rst | 34 ++++- document/core/util/macros.def | 22 ++++ interpreter/text/lexer.mll | 36 +++++- test/core/annotations.wast | 146 +++++++++++++++++++++ 5 files changed, 434 insertions(+), 10 deletions(-) create mode 100644 test/core/annotations.wast diff --git a/document/core/appendix/custom.rst b/document/core/appendix/custom.rst index afd2b1a0..ed645933 100644 --- a/document/core/appendix/custom.rst +++ b/document/core/appendix/custom.rst @@ -1,13 +1,12 @@ -.. index:: custom section, section, binary format +.. index:: custom section, section, binary format, annotation, text format -Custom Sections ---------------- +Custom Sections and Annotations +------------------------------- -This appendix defines dedicated :ref:`custom sections ` for WebAssembly's :ref:`binary format `. -Such sections do not contribute to, or otherwise affect, the WebAssembly semantics, and like any custom section they may be ignored by an implementation. +This appendix defines dedicated :ref:`custom sections ` for WebAssembly's :ref:`binary format ` and :ref:`annotations ` for the text format. +Such sections or annotations do not contribute to, or otherwise affect, the WebAssembly semantics, and may be ignored by an implementation. However, they provide useful meta data that implementations can make use of to improve user experience or take compilation hints. -Currently, only one dedicated custom section is defined, the :ref:`name section`. .. index:: ! name section, name, Unicode UTF-8 @@ -138,3 +137,198 @@ It consists of an :ref:`indirect name map ` assigning lo \production{local name subsection} & \Blocalnamesubsec &::=& \Bnamesubsection_2(\Bindirectnamemap) \\ \end{array} + + +.. index:: ! name annotation, name, Unicode UTF-8 +.. _text-nameannot: + +Name Annotations +~~~~~~~~~~~~~~~~ + +*Name annotations* are the textual analogue to the :ref:`name section ` and provide a textual representation for it. +Consequently, their id is :math:`\T{@name}`. + +Analogous to the name section, name annotations are allowed on :ref:`modules `, :ref:`functions `, and :ref:`locals ` (including :ref:`parameters `). +They can be placed where the text format allows binding occurrences of respective :ref:`identifiers `. +If both an identifier and a name annotation are given, the annotation is expected *after* the identifier. +In that case, the annotation takes precedence over the identifier as a textual representation of the binding's name. +At most one name annotation may be given per binding. + +All name annotations have the following format: + +.. math:: + \begin{array}{llclll} + \production{name annotation} & \Tnameannot &::=& + \text{(@name}~\Tstring~\text{)} \\ + \end{array} + + +.. note:: + All name annotations can be arbitrary UTF-8 :ref:`strings `. + Names need not be unique. + + +.. index:: module +.. _text-modulenameannot: + +Module Names +............ + +A *module name annotation* must be placed on a :ref:`module ` definition, +directly after the :math:`\text{module}` keyword, or if present, after the following module :ref:`identifier `. + +.. math:: + \begin{array}{llclll} + \production{module name annotation} & \Tmodulenameannot &::=& + \Tnameannot \\ + \end{array} + + +.. index:: function +.. _binary-funcnameannot: + +Function Names +.............. + +A *function name annotation* must be placed on a :ref:`function ` definition or function :ref:`import `, +directly after the :math:`\text{func}` keyword, or if present, after the following function :ref:`identifier ` or. + +.. math:: + \begin{array}{llclll} + \production{function name annotation} & \Tfuncnameannot &::=& + \Tnameannot \\ + \end{array} + + +.. index:: function, parameter +.. _binary-paramnameannot: + +Parameter Names +............... + +A *parameter name annotation* must be placed on a :ref:`parameter ` declaration, +directly after the :math:`\text{param}` keyword, or if present, after the following parameter :ref:`identifier `. +It may only be placed on a declaration that declares exactly one parameter. + +.. math:: + \begin{array}{llclll} + \production{parameter name annotation} & \Tparamnameannot &::=& + \Tnameannot \\ + \end{array} + + +.. index:: function, local +.. _binary-localnameannot: + +Local Names +........... + +A *local name annotation* must be placed on a :ref:`local ` declaration, +directly after the :math:`\text{local}` keyword, or if present, after the following local :ref:`identifier `. +It may only be placed on a declaration that declares exactly one local. + +.. math:: + \begin{array}{llclll} + \production{local name annotation} & \Tlocalnameannot &::=& + \Tnameannot \\ + \end{array} + + +.. index:: ! custom annotation, custom section +.. _text-customannot: + +Custom Annotations +~~~~~~~~~~~~~~~~~~ + +*Custom annotations* are a generic textual representation for any :ref:`custom section `. +Their id is :math:`\T{@custom}`. +By generating custom annotations, tools converting between :ref:`binary format ` and :ref:`text format ` can maintain and round-trip the content of custom sections even when they do not recognize them. + +Custom annotations must be placed inside a :ref:`module ` definition. +They must occur anywhere after the :math:`\text{module}` keyword, or if present, after the following module :ref:`identifier `. +They must not be nested into other constructs. + +.. math:: + \begin{array}{llclll} + \production{custom annotation} & \Tcustomannot &::=& + \text{(@custom}~~\Tstring~~\Tcustomplace^?~~\Tdatastring~~\text{)} \\ + \production{custom placement} & \Tcustomplace &::=& + \text{(}~\text{before}~~\text{first}~\text{)} \\ &&|& + \text{(}~\text{before}~~\Tsec~\text{)} \\ &&|& + \text{(}~\text{after}~~\Tsec~\text{)} \\ &&|& + \text{(}~\text{after}~~\text{last}~\text{)} \\ + \production{section} & \Tsec &::=& + \text{type} \\ &&|& + \text{import} \\ &&|& + \text{func} \\ &&|& + \text{table} \\ &&|& + \text{memory} \\ &&|& + \text{global} \\ &&|& + \text{export} \\ &&|& + \text{start} \\ &&|& + \text{elem} \\ &&|& + \text{code} \\ &&|& + \text{data} \\ + \end{array} + +The first :ref:`string ` in a custom annotation denotes the name of the custom section it represents. +The remaining strings collectively represent the section's payload data, written as a :ref:`data string `, which can be split up into a possibly empty sequence of individual string literals (similar to :ref:`data segments `). + +An arbitrary number of custom annotations (even of the same name) may occur in a module, +each defining a separate custom section when converting to :ref:`binary format `. +Placement of the sections in the binary can be customized via explicit *placement* directives, that position them either directly before or directly after a known section. +The placements :math:`\T{(before~first)}` and :math:`\T{(after~last)}` denote virtual sections before the first and after the last known section, respectively. +When the placement directive is omitted, it defaults to :math:`\T{(after~last)}`. + +If multiple placement directives appear for the same position, then the sections are all placed there, in order of their appearance in the text. +For this purpose, the position :math:`\T{after}` a section is considered different from the position :math:`\T{before}` the consecutive section, and the former occurs before the latter. + +.. note:: + Future versions of WebAssembly may introduce additional sections between others or at the beginning or end of a module. + Using :math:`\T{first}` and :math:`\T{last}` guarantees that placement will still go before or after any future section, respectively. + +If a custom section with a specific section id is given as well as annotations representing the same custom section (e.g., :math:`\T{@name}` :ref:`annotations ` as well as a :math:`\T{@custom}` annotation for a :math:`\T{name}` :ref:`section `), then two sections are assumed to be created. +Their relative placement will depend on the placement directive given for the :math:`\T{@custom}` annotation as well as the implicit placement requirements of the custom section, which are applied to the other annotation. + +.. note:: + + For example, the following module, + + .. code-block:: none + + (module + (@custom "A" "aaa") + (type $t (func)) + (@custom "B" (after func) "bbb") + (@custom "C" (before func) "ccc") + (@custom "D" (after last) "ddd") + (table 10 funcref) + (func (type $t)) + (@custom "E" (after import) "eee") + (@custom "F" (before type) "fff") + (@custom "G" (after data) "ggg") + (@custom "H" (after code) "hhh") + (@custom "I" (after func) "iii") + (@custom "J" (before func) "jjj") + (@custom "K" (before first) "kkk") + ) + + will result in the following section ordering: + + .. code-block:: none + + custom section "K" + custom section "F" + type section + custom section "E" + custom section "C" + custom section "J" + function section + custom section "B" + custom section "I" + table section + code section + custom section "H" + custom section "G" + custom section "A" + custom section "D" diff --git a/document/core/text/lexical.rst b/document/core/text/lexical.rst index a3b4529d..4c3ee4d3 100644 --- a/document/core/text/lexical.rst +++ b/document/core/text/lexical.rst @@ -50,7 +50,7 @@ The character stream in the source text is divided, from left to right, into a s (\text{a} ~|~ \dots ~|~ \text{z})~\Tidchar^\ast \qquad (\mbox{if occurring as a literal terminal in the grammar}) \\ \production{reserved} & \Treserved &::=& - \Tidchar^+ \\ + \Tidchar^+ ~|~ \text{;} ~|~ \text{[} ~|~ \text{]} ~|~ \text{\{} ~|~ \text{\}} \\ \end{array} Tokens are formed from the input character stream according to the *longest match* rule. @@ -77,7 +77,7 @@ Any token that does not fall into any of the other categories is considered *res White Space ~~~~~~~~~~~ -*White space* is any sequence of literal space characters, formatting characters, or :ref:`comments `. +*White space* is any sequence of literal space characters, formatting characters, :ref:`comments `, or :ref:`annotations `. The allowed formatting characters correspond to a subset of the |ASCII|_ *format effectors*, namely, *horizontal tabulation* (:math:`\unicode{09}`), *line feed* (:math:`\unicode{0A}`), and *carriage return* (:math:`\unicode{0D}`). .. math:: @@ -124,3 +124,33 @@ The *look-ahead* restrictions on the productions for |Tblockchar| disambiguate t .. note:: Any formatting and control characters are allowed inside comments. + + +.. index:: ! annotation + single: text format; annotation +.. _text-annot: + +Annotations +~~~~~~~~~~~ + +An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\T{@id}`. +No :ref:`space ` is allowed between the opening parenthesis and this id. +Annotations are intended to be used for third-party extensions; +they can appear anywhere in a program but are ignored by the WebAssembly semantics itself, which treats them as :ref:`white space `. + +Annotations can contain other parenthesized token sequences (including nested annotations), as long as they are well-nested. +:ref:`String literals ` and :ref:`comments ` occurring in an annotation must also be properly nested and closed. + +.. math:: + \begin{array}{llclll@{\qquad\qquad}l} + \production{annot} & \Tannot &::=& + \text{(}~\text{@}~\Tidchar^+ ~\Tspace^+ ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\ + \end{array} + +.. note:: + The annotation id is meant to be an identifier categorising the extension, and plays a role similar to the name of a :ref:`custom section `. + By convention, annotations corresponding to a custom section should use the custom section's name as an id. + + Implementations are expected to ignore annotations with ids that they do not recognize. + On the other hand, they may impose restrictions on annotations that they do recognize, e.g., requiring a specific structure by superimposing a more concrete grammar. + It is up to an implementation how it deals with errors in such annotations. diff --git a/document/core/util/macros.def b/document/core/util/macros.def index 081e9a20..529d6b24 100644 --- a/document/core/util/macros.def +++ b/document/core/util/macros.def @@ -550,6 +550,9 @@ .. |Tlinechar| mathdef:: \xref{text/lexical}{text-comment}{\T{linechar}} .. |Tblockchar| mathdef:: \xref{text/lexical}{text-comment}{\T{blockchar}} +.. |Tannot| mathdef:: \xref{text/lexical}{text-annot}{\T{annot}} +.. |Tannottoken| mathdef:: \xref{text/lexical}{text-annot}{\T{annottoken}} + .. Values, non-terminals @@ -1028,6 +1031,25 @@ .. |Blocalnamesubsec| mathdef:: \xref{appendix/custom}{binary-localnamesec}{\B{localnamesubsec}} +.. Annotations +.. ----------- + +.. Custom annotations, non-terminals + +.. |Tcustomannot| mathdef:: \xref{appendix/custom}{text-customannot}{\T{customannot}} +.. |Tcustomplace| mathdef:: \xref{appendix/custom}{text-customannot}{\T{customplace}} +.. |Tsec| mathdef:: \xref{appendix/custom}{text-customannot}{\T{sec}} + + +.. Name annotations, non-terminals + +.. |Tnameannot| mathdef:: \xref{appendix/custom}{text-nameannot}{\T{nameannot}} +.. |Tmodulenameannot| mathdef:: \xref{appendix/custom}{text-modulenameannot}{\T{modulenameannot}} +.. |Tfuncnameannot| mathdef:: \xref{appendix/custom}{text-funcnameannot}{\T{funcnameannot}} +.. |Tparamnameannot| mathdef:: \xref{appendix/custom}{text-paramnameannot}{\T{paramnameannot}} +.. |Tlocalnameannot| mathdef:: \xref{appendix/custom}{text-localnameannot}{\T{localnameannot}} + + .. Embedding .. --------- diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll index dab48e04..6aa5abb6 100644 --- a/interpreter/text/lexer.mll +++ b/interpreter/text/lexer.mll @@ -134,8 +134,11 @@ let float = | sign? "nan" | sign? "nan:" "0x" hexnum let string = '"' character* '"' -let name = '$' (letter | digit | '_' | symbol)+ -let reserved = ([^'\"''('')'';'] # space)+ (* hack for table size *) + +let id = (letter | digit | '_' | symbol)+ +let name = '$' id + +let reserved = ';' | ([^'\"''('')'';'] # space)+ (* hack for table size *) let ixx = "i" ("32" | "64") let fxx = "f" ("32" | "64") @@ -353,6 +356,9 @@ rule token = parse | name as s { VAR s } + | "(@"id { annot (Lexing.lexeme_start_p lexbuf) lexbuf; token lexbuf } + | "(@" { error lexbuf "malformed annotation id" } + | ";;"utf8_no_nl*eof { EOF } | ";;"utf8_no_nl*'\n' { Lexing.new_line lexbuf; token lexbuf } | ";;"utf8_no_nl* { token lexbuf (* causes error on following position *) } @@ -365,6 +371,32 @@ rule token = parse | utf8 { error lexbuf "malformed operator" } | _ { error lexbuf "malformed UTF-8 encoding" } +and annot start = parse + | ")" { () } + | "(" { annot (Lexing.lexeme_start_p lexbuf) lexbuf; annot start lexbuf } + + | reserved { annot start lexbuf } + | nat { annot start lexbuf } + | int { annot start lexbuf } + | float { annot start lexbuf } + | id { annot start lexbuf } + | name { annot start lexbuf } + | string { annot start lexbuf } + | '"'character*('\n'|eof) { error lexbuf "unclosed string literal" } + | '"'character*['\x00'-'\x09''\x0b'-'\x1f''\x7f'] + { error lexbuf "illegal control character in string literal" } + | '"'character*'\\'_ + { error_nest (Lexing.lexeme_end_p lexbuf) lexbuf "illegal escape" } + + | (";;"utf8_no_nl*)? eof { error_nest start lexbuf "unclosed annotation" } + | ";;"utf8_no_nl*'\n' { Lexing.new_line lexbuf; annot start lexbuf } + | ";;"utf8_no_nl* { annot start lexbuf (* error on following position *) } + | "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; annot start lexbuf } + | space#'\n' { annot start lexbuf } + | '\n' { Lexing.new_line lexbuf; annot start lexbuf } + | eof { error_nest start lexbuf "unclosed annotation" } + | _ { error lexbuf "malformed UTF-8 encoding" } + and comment start = parse | ";)" { () } | "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; comment start lexbuf } diff --git a/test/core/annotations.wast b/test/core/annotations.wast new file mode 100644 index 00000000..5ede9004 --- /dev/null +++ b/test/core/annotations.wast @@ -0,0 +1,146 @@ +(@a) +(@aas-3!@$d-@#4) +(@@) +(@a x y z) +(@a x-y $yz "aa" -2 0.3 0x3) +(@a x-y$yz"aa"-2) +(@a block func module i32.add) +(@a 0x 8q 0xfa #4g0-.@f#^&@#$*0sf -- @#) +(@a ; ] [ }} }x{ ({) {{};}] ;) +(@a (bla) () (5-g) ("aa" a) ($x) (bla bla) (x (y)) ")" "(" x")"y) +(@a @ @x (@x) (@x y) (@) (@ x) (@(@(@(@))))) +(@a Heiße Würstchen   + +) +(@a (;bla;) (; ) ;) + ;; bla) + ;; bla (@x +) + +(assert_malformed (module quote "(@)") "malformed annotation id") +(assert_malformed (module quote "(@ )") "malformed annotation id") +(assert_malformed (module quote "(@ x)") "malformed annotation id") + +(assert_malformed (module quote "(@x ") "unclosed annotation") +(assert_malformed (module quote "(@x ()") "unclosed annotation") +(assert_malformed (module quote "(@x (y (z))") "unclosed annotation") +(assert_malformed (module quote "(@x (@y )") "unclosed annotation") + +(assert_malformed (module quote "(@x))") "unexpected token") +(assert_malformed (module quote "(@x ()))") "unexpected token") +(assert_malformed (module quote "(@x (y (z))))") "unexpected token") +(assert_malformed (module quote "(@x (@y )))") "unexpected token") + +(assert_malformed (module quote "(@x \"") "unclosed string") + +(assert_malformed (module quote "((@a)@b)") "unknown operator") +(assert_malformed (module quote "(func $(@a))") "unknown operator") +(assert_malformed (module quote "(func $(@a)f)") "unknown operator") + +((@a) module (@a) $m (@a) (@a) + ((@a) import (@a) "spectest" (@a) "global_i32" (@a) + ((@a) global (@a) $g (@a) i32 (@a)) (@a) + ) (@a) + ((@a) import (@a) "spectest" (@a) "table" (@a) + ((@a) table (@a) $t (@a) 10 (@a) 20 (@a) funcref (@a)) (@a) + ) (@a) + ((@a) import (@a) "spectest" (@a) "memory" (@a) + ((@a) memory (@a) $m (@a) 1 (@a) 2 (@a)) (@a) + ) (@a) + ((@a) import (@a) "spectest" (@a) "print_i32_f32" (@a) + ((@a) func (@a) $f (@a) + ((@a) param (@a) i32 (@a) f32 (@a)) (@a) + ((@a) result (@a)) (@a) + ) (@a) + ) (@a) + + ((@a) export (@a) "g" (@a) + ((@a) global (@a) $g (@a)) (@a) + ) (@a) + ((@a) export (@a) "t" (@a) + ((@a) table (@a) $t (@a)) (@a) + ) (@a) + ((@a) export (@a) "m" (@a) + ((@a) memory (@a) $m (@a)) (@a) + ) (@a) + ((@a) export (@a) "f" (@a) + ((@a) func (@a) $f (@a)) (@a) + ) (@a) +) (@a) + +((@a) module (@a) $m (@a) (@a) + ((@a) global (@a) $g (@a) + ((@a) export (@a) "g" (@a)) (@a) + ((@a) import (@a) "spectest" (@a) "global_i32" (@a)) (@a) + i32 (@a) + ) (@a) + ((@a) table (@a) $t (@a) + ((@a) export (@a) "t" (@a)) (@a) + ((@a) import (@a) "spectest" (@a) "table" (@a)) (@a) + 10 (@a) 20 (@a) + funcref (@a) + ) (@a) + ((@a) memory (@a) $m (@a) + ((@a) export (@a) "m" (@a)) (@a) + ((@a) import (@a) "spectest" (@a) "memory" (@a)) (@a) + 1 (@a) 2 (@a) + ) (@a) + ((@a) func (@a) $f (@a) + ((@a) export (@a) "f" (@a)) (@a) + ((@a) import (@a) "spectest" (@a) "print_i32_f32" (@a)) (@a) + ((@a) param (@a) i32 (@a) f32 (@a)) (@a) + ((@a) result (@a)) (@a) + ) (@a) +) (@a) + +((@a) module (@a) $m (@a) (@a) + ((@a) type (@a) $T (@a) + ((@a) func (@a) + ((@a) param (@a) i32 (@a) i64 (@a)) (@a) + ((@a) param (@a) $x (@a) i32 (@a)) (@a) + ((@a) result (@a) i32 (@a)) (@a) + ) (@a) + ) (@a) + + ((@a) global (@a) $g (@a) + ((@a) export (@a) "g" (@a)) (@a) + i32 (@a) + ((@a) i32.const (@a) 42 (@a)) (@a) + ) (@a) + ((@a) table (@a) $t (@a) + ((@a) export (@a) "t" (@a)) (@a) + 10 (@a) 20 (@a) + funcref (@a) + ) (@a) + ((@a) memory (@a) $m (@a) + ((@a) export (@a) "m" (@a)) (@a) + 1 (@a) 2 (@a) + ) (@a) + ((@a) func (@a) $f (@a) + ((@a) export (@a) "f" (@a)) (@a) + ((@a) param (@a) i32 (@a) i64 (@a)) (@a) + ((@a) param (@a) $x (@a) i32 (@a)) (@a) + ((@a) result (@a) i32 (@a)) (@a) + ((@a) local (@a) i32 (@a) i32 (@a)) (@a) + ((@a) local (@a) $y (@a) i32 (@a)) (@a) + ((@a) block (@a) + ((@a) result (@a) i32 (@a)) (@a) + ((@a) i32.add (@a) + ((@a) local.get (@a) $x (@a)) (@a) + ((@a) local.get (@a) 0 (@a)) (@a) + ) + ) + ) (@a) + + ((@a) elem (@a) + ((@a) offset (@a) ((@a) i32.const (@a) 0 (@a)) (@a)) (@a) + $f (@a) $f (@a) (@a) $f (@a) + ) (@a) + ((@a) data (@a) + ((@a) offset (@a) ((@a) i32.const (@a) 0 (@a)) (@a)) (@a) + "bla" (@a) "\43" (@a) (@a) "" (@a) + ) (@a) + + (func $s) + ((@a) start (@a) $s (@a)) (@a) +) (@a)