From c6dec9044f1ac68b853f19828df65625846a828b Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Thu, 3 Nov 2016 17:36:03 -0700 Subject: [PATCH] Convert spec to use postfix syntax on object member names (closes #30) --- draft-tjson-spec.md | 164 +++++++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 64 deletions(-) diff --git a/draft-tjson-spec.md b/draft-tjson-spec.md index 01d145b..30a2114 100644 --- a/draft-tjson-spec.md +++ b/draft-tjson-spec.md @@ -5,7 +5,7 @@ category = "info" docName = "draft-tjson-spec" - date = 2016-10-02T20:00:00Z + date = 2016-11-03T20:00:00Z [[author]] initials = "T. " @@ -31,22 +31,36 @@ set of types within JSON documents. # Introduction Tagged JavaScript Object Notation (TJSON) is a set of backwards-compatible -extensions to JavaScript Object Notation (JSON) [@!RFC7159] which enrich -the set of types the format is able to express. +extensions to JavaScript Object Notation (JSON) [@!RFC7159] which enrichs +the format with additional types beyond those originally specified. -TJSON can represent six primitive types (strings, binary data, integers, -floating points, datetimes, and null) and two structured types (objects and -arrays). +TJSON supports six scalar types: + +* Strings +* Binary Data +* Integers (signed/unsigned) +* Floating points +* Timestamps +* JSON values (true/false/nil) + +It supports two non-scalar types: + +* Objects +* Arrays + +TJSON provides backwards-compatible self-describing type annotations to JSON +in the form of postfix tags on object member names. To extend JSON with additional types in a backwards-compatible manner, -TJSON adds a special mandatory "tag" to each JSON string which identifies -the data type and, optionally, encoding format. A tag consists of one -or more alphanumeric characters, followed by the colon ":" character. -All strings in TJSON MUST have a valid tag prefix. +TJSON adds a special mandatory "tag" to each member name which identifies +the encoding format of the data and infers the type it represents. A tag +consists of one or more alphanumeric characters, followed by the colon ":" +character. All strings in TJSON MUST have a valid tag prefix. TJSON is intended to simplify transcoding documents from other interchange -formats which disambiguate strings from binary data, and also improve the -ability to both canonicalize and authenticate JSON documents. +formats which have a type system rich enough to include a binary data format +in addition to strings, and also improve the ability to both canonicalize and +authenticate JSON documents. ## Conventions Used in This Document @@ -63,13 +77,16 @@ backwards-compatible way. ## String Grammar -The main grammatical addition of TJSON is a tag prefix on string literals. Every -string literal MUST have a tag prefix in TJSON. Strings literals in TJSON are -described by the following grammar: +The main grammatical addition of TJSON is the addition of a postfix type +annotation, or "tag", on the member names of all objects, which are string +literals. Every member name MUST have a tag prefix in TJSON. Member names +in TJSON are described by the following grammar: - ::= quotation-mark tag *char quotation-mark + = name-separator - ::= * ':' + ::= quotation-mark *char tag quotation-mark + + ::= ':' * ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | @@ -79,39 +96,40 @@ described by the following grammar: ::= | -The tagged-string pushdown replaces the string pushdown in JSON as described in -[@!RFC7159]. +The "tagged-string" pushdown replaces the string pushdown in JSON member names +as described in [@!RFC7159]. The "value" pushdown remains the same. The quotation-mark and char pushdowns are described in section 7 of [@!RFC7159]. -TJSON places a maximum length of 4 bytes on tag, including the ':' character. - ## Root Symbol The root grammatical symbol of all TJSON documents is constrained to the -following nonterminals as described in [@!RFC7159]: +"object" nonterminal as described in [@!RFC7159]: + + ::= - ::= | +TJSON uses objects to describe all further type information, so they MUST +be the toplevel expression. -Documents which do not contain an object or array as the toplevel element -MUST be rejected by parsers. +Documents which do not contain an object as the toplevel element MUST be +rejected by parsers. # Extended Types The following section describes the extended types added to TJSON by embedding them in string literals as described in section 2.1 of this document. -## UTF-8 Strings ("s:") +## Unicode Strings (":s") The syntax for TJSON strings is grammatically identical to JSON, except per section 2.1 of this document the string type MUST carry a mandatory tag -character, "s:" indicating a UTF-8 String. Unlike JSON, all Unicode Strings -in TJSON MUST be valid UTF-8 [@!RFC3629]. Other Unicode encodings are -expressly not supported. +character, ":s" indicating a Unicode string. Unlike JSON, all Unicode Strings +in TJSON MUST be valid UTF-8 [@!RFC3629]. No other Unicode encodings are +valid for TJSON strings. The following is an example of a UTF-8 String literal in TJSON: - "s:Hello, world!" + {"example:s":"Hello, world!"} ## Binary Data @@ -120,28 +138,33 @@ different encodings within a tagged string. Tags for binary data begin with the "b" character followed by an alphanumeric identifier for a specific format. -The preferred encoding is base64url ("b64:"), which SHOULD be used by +The preferred encoding is base64url (":b64"), which SHOULD be used by default unless another encoding is explicitly specified at serialization time. -The base16 and base64url formats are mandatory to implement for all TJSON -parsers. +The base16, base32, and base64url formats are mandatory to implement for all +TJSON parsers. -### base16 ("b16:") +### base16 ("b16") + +Base16 literals are identified by the "b16" tag, with an associated JSON +JSON string literal value containing base16-serialized binary data. -A base16 literal starts with the "b16:" tag, followed by a valid base16 string. The base16 format (a.k.a. hexadecimal) is described in [@!RFC4648]. All base16 strings in TJSON MUST be lower case. The following is an example of a base16 string literal in TJSON: - "b16:48656c6c6f2c20776f726c6421" + {"example:b16":"48656c6c6f2c20776f726c6421"} + +This decodes to an object with an "example" key whose value is the equivalent +of the ASCII string: "Hello, world!" -This decodes to the equivalent of the ASCII string: "Hello, world!" +### base32 ("b32") -### base32 ("b32:") +Base32 literals are identified by the "b32" tag, with an associated JSON +JSON string literal value containing base32-serialized binary data. -A base32 literal starts with the "b16:" tag, followed by a valid base32 string. The base32 format is described in [@!RFC4648]. All base32 strings in TJSON MUST be lower case, and MUST NOT include any padding with the '=' character. TJSON parsers MUST reject any documents containing upper case base32 characters @@ -149,22 +172,34 @@ or padding. The following is an example of a base32 string literal in TJSON: - "b32:jbswy3dpfqqho33snrscc" + {"example:b32:jbswy3dpfqqho33snrscc" + +This decodes to an object with an "example" key whose value is the equivalent +of the ASCII string: "Hello, world!" + +### base64url ("b64") -This decodes to the equivalent of the ASCII string: "Hello, world!" +Base64url literals are identified by the "b" or "b64" tags, with an +associated JSON string literal value containing base64url-serialized binary +data. -### base64url ("b64:") +The base64url format is described in [@!RFC4648]. All base64url strings in +TJSON MUST NOT include any padding with the '=' character. TJSON parsers MUST +reject any documents containing padded base64url strings. -A base64url literal starts with the "b64:" tag, followed by a valid base64url -string. The base64url format is described in [@!RFC4648]. All base64url strings -in TJSON MUST NOT include any padding with the '=' character. TJSON parsers -MUST reject any documents containing padded base64url strings. +When serializing binary data as TJSON, encoders SHOULD use the "b" tag to +indicate binary data unless another format has been explicitly specified. The following is an example of a base64url string literal in TJSON: - "b64:SGVsbG8sIHdvcmxkIQ" + {"example:b64":"SGVsbG8sIHdvcmxkIQ"} -This decodes to the equivalent of the ASCII string: "Hello, world!" +The following is the same document using the shorter "b" tag: + + {"example:b":"SGVsbG8sIHdvcmxkIQ"} + +This decodes to an object with an "example" key whose value is the equivalent +of the ASCII string: "Hello, world!" Only the base64url format is supported. The non-URL safe form of base64 is not supported and MUST be rejected by parsers. @@ -178,35 +213,38 @@ range defined as interoperable in [@!RFC7159]. Both signed and unsigned integers are supported and provide the same ranges as 64-bit integers. -### Signed Integers ("i:") +### Signed Integers ("i") -A signed integer literal is represented as string with an "i:" tag, followed -by a valid JSON integer literal, with an optional minus ("-") character. +Signed integer literals are identified by the "i" tag, with an associated +JSON string literal value containing the string representation of a valid +JSON integer literal, with an optional minus ("-") character. Conforming TJSON parsers MUST be capable of supporting the full 64-bit signed integer range `[-(2**63), (2**63)-1]` for this type. Integers outside this range MUST be rejected. -### Unsigned Integers ("u:") +### Unsigned Integers ("u") -An unsigned integer literal is represented as a string with a "u:" tag, -followed by a valid JSON integer literal. The minus ("-") character is -expressly disallowed and parsers MUST fail if it's present. +Unsigned integer literals are identified by the "u" tag, with an associated +JSON string literal value containing the string representation of a valid +JSON integer literal. The minus ("-") character is expressly disallowed and +parsers MUST reject documents containing it in an unsigned integer expression. Conforming TJSON parsers MUST be capable of supporting the full 64-bit unsigned integer range `[0, (2**64)−1]` for this type. -## Timestamps ("t:") +## Timestamps ("t") TJSON natively supports a timestamp type whose syntax is a subset of that provided by [@!RFC3339]. Specifically, TJSON timestamps MUST use only the -upper-case UTC time zone identifier "Z". No other time zone identifiers are -allowed except "Z" and parsers MUST NOT allow them. +upper-case UTC time zone identifier "Z" (i.e. times MUST be Z-normalized). +No other time zone identifiers are allowed except "Z" and parsers MUST NOT +allow them. The following is an example of a TJSON timestamp: - "t:2016-10-02T07:31:51Z" + {"example:t":"2016-10-02T07:31:51Z"} TJSON libraries SHOULD convert these timestamps to a native datetime type. @@ -217,12 +255,10 @@ handled under TJSON. ## Objects -TJSON constrains the allowable types for the names of object members to either -Unicode Strings or Binary Data. - -All other types, such as integers, are expressly disallowed. +Type information MUST be present in all object member names (i.e. all member +names must be tagged). Parsers MUST reject objects with untagged members. -The names of object members MUST be unique in TJSON. Repeated use of the same +Object member names MUST be unique in TJSON. Repeated use of the same name for more than one member MUST be rejected by TJSON parsers. ## Arrays