From b96582ba35aa64170bc05e26895fa0a8a12e6d7c Mon Sep 17 00:00:00 2001 From: natasha41575 Date: Fri, 13 May 2022 15:38:45 -0700 Subject: [PATCH] fork go-yaml v2 and v3 --- goyaml.v2/LICENSE | 201 ++ goyaml.v2/LICENSE.libyaml | 31 + goyaml.v2/NOTICE | 13 + goyaml.v2/README.md | 133 ++ goyaml.v2/apic.go | 744 +++++++ goyaml.v2/decode.go | 815 ++++++++ goyaml.v2/decode_test.go | 1367 +++++++++++++ goyaml.v2/emitterc.go | 1685 +++++++++++++++ goyaml.v2/encode.go | 390 ++++ goyaml.v2/encode_test.go | 646 ++++++ goyaml.v2/example_embedded_test.go | 41 + goyaml.v2/go.mod | 5 + goyaml.v2/limit_test.go | 128 ++ goyaml.v2/parserc.go | 1095 ++++++++++ goyaml.v2/readerc.go | 412 ++++ goyaml.v2/resolve.go | 258 +++ goyaml.v2/scannerc.go | 2711 +++++++++++++++++++++++++ goyaml.v2/sorter.go | 113 ++ goyaml.v2/suite_test.go | 12 + goyaml.v2/writerc.go | 26 + goyaml.v2/yaml.go | 478 +++++ goyaml.v2/yamlh.go | 739 +++++++ goyaml.v2/yamlprivateh.go | 173 ++ goyaml.v3/LICENSE | 50 + goyaml.v3/NOTICE | 13 + goyaml.v3/README.md | 150 ++ goyaml.v3/apic.go | 747 +++++++ goyaml.v3/decode.go | 1000 +++++++++ goyaml.v3/decode_test.go | 1771 ++++++++++++++++ goyaml.v3/emitterc.go | 2020 ++++++++++++++++++ goyaml.v3/encode.go | 577 ++++++ goyaml.v3/encode_test.go | 736 +++++++ goyaml.v3/example_embedded_test.go | 56 + goyaml.v3/go.mod | 5 + goyaml.v3/limit_test.go | 128 ++ goyaml.v3/node_test.go | 2886 ++++++++++++++++++++++++++ goyaml.v3/parserc.go | 1258 ++++++++++++ goyaml.v3/readerc.go | 434 ++++ goyaml.v3/resolve.go | 326 +++ goyaml.v3/scannerc.go | 3038 ++++++++++++++++++++++++++++ goyaml.v3/sorter.go | 134 ++ goyaml.v3/suite_test.go | 27 + goyaml.v3/writerc.go | 48 + goyaml.v3/yaml.go | 698 +++++++ goyaml.v3/yamlh.go | 807 ++++++++ goyaml.v3/yamlprivateh.go | 198 ++ 46 files changed, 29323 insertions(+) create mode 100644 goyaml.v2/LICENSE create mode 100644 goyaml.v2/LICENSE.libyaml create mode 100644 goyaml.v2/NOTICE create mode 100644 goyaml.v2/README.md create mode 100644 goyaml.v2/apic.go create mode 100644 goyaml.v2/decode.go create mode 100644 goyaml.v2/decode_test.go create mode 100644 goyaml.v2/emitterc.go create mode 100644 goyaml.v2/encode.go create mode 100644 goyaml.v2/encode_test.go create mode 100644 goyaml.v2/example_embedded_test.go create mode 100644 goyaml.v2/go.mod create mode 100644 goyaml.v2/limit_test.go create mode 100644 goyaml.v2/parserc.go create mode 100644 goyaml.v2/readerc.go create mode 100644 goyaml.v2/resolve.go create mode 100644 goyaml.v2/scannerc.go create mode 100644 goyaml.v2/sorter.go create mode 100644 goyaml.v2/suite_test.go create mode 100644 goyaml.v2/writerc.go create mode 100644 goyaml.v2/yaml.go create mode 100644 goyaml.v2/yamlh.go create mode 100644 goyaml.v2/yamlprivateh.go create mode 100644 goyaml.v3/LICENSE create mode 100644 goyaml.v3/NOTICE create mode 100644 goyaml.v3/README.md create mode 100644 goyaml.v3/apic.go create mode 100644 goyaml.v3/decode.go create mode 100644 goyaml.v3/decode_test.go create mode 100644 goyaml.v3/emitterc.go create mode 100644 goyaml.v3/encode.go create mode 100644 goyaml.v3/encode_test.go create mode 100644 goyaml.v3/example_embedded_test.go create mode 100644 goyaml.v3/go.mod create mode 100644 goyaml.v3/limit_test.go create mode 100644 goyaml.v3/node_test.go create mode 100644 goyaml.v3/parserc.go create mode 100644 goyaml.v3/readerc.go create mode 100644 goyaml.v3/resolve.go create mode 100644 goyaml.v3/scannerc.go create mode 100644 goyaml.v3/sorter.go create mode 100644 goyaml.v3/suite_test.go create mode 100644 goyaml.v3/writerc.go create mode 100644 goyaml.v3/yaml.go create mode 100644 goyaml.v3/yamlh.go create mode 100644 goyaml.v3/yamlprivateh.go diff --git a/goyaml.v2/LICENSE b/goyaml.v2/LICENSE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/goyaml.v2/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/goyaml.v2/LICENSE.libyaml b/goyaml.v2/LICENSE.libyaml new file mode 100644 index 0000000..8da58fb --- /dev/null +++ b/goyaml.v2/LICENSE.libyaml @@ -0,0 +1,31 @@ +The following files were ported to Go from C files of libyaml, and thus +are still covered by their original copyright and license: + + apic.go + emitterc.go + parserc.go + readerc.go + scannerc.go + writerc.go + yamlh.go + yamlprivateh.go + +Copyright (c) 2006 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/goyaml.v2/NOTICE b/goyaml.v2/NOTICE new file mode 100644 index 0000000..866d74a --- /dev/null +++ b/goyaml.v2/NOTICE @@ -0,0 +1,13 @@ +Copyright 2011-2016 Canonical Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/goyaml.v2/README.md b/goyaml.v2/README.md new file mode 100644 index 0000000..b50c6e8 --- /dev/null +++ b/goyaml.v2/README.md @@ -0,0 +1,133 @@ +# YAML support for the Go language + +Introduction +------------ + +The yaml package enables Go programs to comfortably encode and decode YAML +values. It was developed within [Canonical](https://www.canonical.com) as +part of the [juju](https://juju.ubuntu.com) project, and is based on a +pure Go port of the well-known [libyaml](http://pyyaml.org/wiki/LibYAML) +C library to parse and generate YAML data quickly and reliably. + +Compatibility +------------- + +The yaml package supports most of YAML 1.1 and 1.2, including support for +anchors, tags, map merging, etc. Multi-document unmarshalling is not yet +implemented, and base-60 floats from YAML 1.1 are purposefully not +supported since they're a poor design and are gone in YAML 1.2. + +Installation and usage +---------------------- + +The import path for the package is *gopkg.in/yaml.v2*. + +To install it, run: + + go get gopkg.in/yaml.v2 + +API documentation +----------------- + +If opened in a browser, the import path itself leads to the API documentation: + + * [https://gopkg.in/yaml.v2](https://gopkg.in/yaml.v2) + +API stability +------------- + +The package API for yaml v2 will remain stable as described in [gopkg.in](https://gopkg.in). + + +License +------- + +The yaml package is licensed under the Apache License 2.0. Please see the LICENSE file for details. + + +Example +------- + +```Go +package main + +import ( + "fmt" + "log" + + "gopkg.in/yaml.v2" +) + +var data = ` +a: Easy! +b: + c: 2 + d: [3, 4] +` + +// Note: struct fields must be public in order for unmarshal to +// correctly populate the data. +type T struct { + A string + B struct { + RenamedC int `yaml:"c"` + D []int `yaml:",flow"` + } +} + +func main() { + t := T{} + + err := yaml.Unmarshal([]byte(data), &t) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- t:\n%v\n\n", t) + + d, err := yaml.Marshal(&t) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- t dump:\n%s\n\n", string(d)) + + m := make(map[interface{}]interface{}) + + err = yaml.Unmarshal([]byte(data), &m) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- m:\n%v\n\n", m) + + d, err = yaml.Marshal(&m) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- m dump:\n%s\n\n", string(d)) +} +``` + +This example will generate the following output: + +``` +--- t: +{Easy! {2 [3 4]}} + +--- t dump: +a: Easy! +b: + c: 2 + d: [3, 4] + + +--- m: +map[a:Easy! b:map[c:2 d:[3 4]]] + +--- m dump: +a: Easy! +b: + c: 2 + d: + - 3 + - 4 +``` + diff --git a/goyaml.v2/apic.go b/goyaml.v2/apic.go new file mode 100644 index 0000000..acf7140 --- /dev/null +++ b/goyaml.v2/apic.go @@ -0,0 +1,744 @@ +package yaml + +import ( + "io" +) + +func yaml_insert_token(parser *yaml_parser_t, pos int, token *yaml_token_t) { + //fmt.Println("yaml_insert_token", "pos:", pos, "typ:", token.typ, "head:", parser.tokens_head, "len:", len(parser.tokens)) + + // Check if we can move the queue at the beginning of the buffer. + if parser.tokens_head > 0 && len(parser.tokens) == cap(parser.tokens) { + if parser.tokens_head != len(parser.tokens) { + copy(parser.tokens, parser.tokens[parser.tokens_head:]) + } + parser.tokens = parser.tokens[:len(parser.tokens)-parser.tokens_head] + parser.tokens_head = 0 + } + parser.tokens = append(parser.tokens, *token) + if pos < 0 { + return + } + copy(parser.tokens[parser.tokens_head+pos+1:], parser.tokens[parser.tokens_head+pos:]) + parser.tokens[parser.tokens_head+pos] = *token +} + +// Create a new parser object. +func yaml_parser_initialize(parser *yaml_parser_t) bool { + *parser = yaml_parser_t{ + raw_buffer: make([]byte, 0, input_raw_buffer_size), + buffer: make([]byte, 0, input_buffer_size), + } + return true +} + +// Destroy a parser object. +func yaml_parser_delete(parser *yaml_parser_t) { + *parser = yaml_parser_t{} +} + +// String read handler. +func yaml_string_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + if parser.input_pos == len(parser.input) { + return 0, io.EOF + } + n = copy(buffer, parser.input[parser.input_pos:]) + parser.input_pos += n + return n, nil +} + +// Reader read handler. +func yaml_reader_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + return parser.input_reader.Read(buffer) +} + +// Set a string input. +func yaml_parser_set_input_string(parser *yaml_parser_t, input []byte) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_string_read_handler + parser.input = input + parser.input_pos = 0 +} + +// Set a file input. +func yaml_parser_set_input_reader(parser *yaml_parser_t, r io.Reader) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_reader_read_handler + parser.input_reader = r +} + +// Set the source encoding. +func yaml_parser_set_encoding(parser *yaml_parser_t, encoding yaml_encoding_t) { + if parser.encoding != yaml_ANY_ENCODING { + panic("must set the encoding only once") + } + parser.encoding = encoding +} + +var disableLineWrapping = false + +// Create a new emitter object. +func yaml_emitter_initialize(emitter *yaml_emitter_t) { + *emitter = yaml_emitter_t{ + buffer: make([]byte, output_buffer_size), + raw_buffer: make([]byte, 0, output_raw_buffer_size), + states: make([]yaml_emitter_state_t, 0, initial_stack_size), + events: make([]yaml_event_t, 0, initial_queue_size), + } + if disableLineWrapping { + emitter.best_width = -1 + } +} + +// Destroy an emitter object. +func yaml_emitter_delete(emitter *yaml_emitter_t) { + *emitter = yaml_emitter_t{} +} + +// String write handler. +func yaml_string_write_handler(emitter *yaml_emitter_t, buffer []byte) error { + *emitter.output_buffer = append(*emitter.output_buffer, buffer...) + return nil +} + +// yaml_writer_write_handler uses emitter.output_writer to write the +// emitted text. +func yaml_writer_write_handler(emitter *yaml_emitter_t, buffer []byte) error { + _, err := emitter.output_writer.Write(buffer) + return err +} + +// Set a string output. +func yaml_emitter_set_output_string(emitter *yaml_emitter_t, output_buffer *[]byte) { + if emitter.write_handler != nil { + panic("must set the output target only once") + } + emitter.write_handler = yaml_string_write_handler + emitter.output_buffer = output_buffer +} + +// Set a file output. +func yaml_emitter_set_output_writer(emitter *yaml_emitter_t, w io.Writer) { + if emitter.write_handler != nil { + panic("must set the output target only once") + } + emitter.write_handler = yaml_writer_write_handler + emitter.output_writer = w +} + +// Set the output encoding. +func yaml_emitter_set_encoding(emitter *yaml_emitter_t, encoding yaml_encoding_t) { + if emitter.encoding != yaml_ANY_ENCODING { + panic("must set the output encoding only once") + } + emitter.encoding = encoding +} + +// Set the canonical output style. +func yaml_emitter_set_canonical(emitter *yaml_emitter_t, canonical bool) { + emitter.canonical = canonical +} + +//// Set the indentation increment. +func yaml_emitter_set_indent(emitter *yaml_emitter_t, indent int) { + if indent < 2 || indent > 9 { + indent = 2 + } + emitter.best_indent = indent +} + +// Set the preferred line width. +func yaml_emitter_set_width(emitter *yaml_emitter_t, width int) { + if width < 0 { + width = -1 + } + emitter.best_width = width +} + +// Set if unescaped non-ASCII characters are allowed. +func yaml_emitter_set_unicode(emitter *yaml_emitter_t, unicode bool) { + emitter.unicode = unicode +} + +// Set the preferred line break character. +func yaml_emitter_set_break(emitter *yaml_emitter_t, line_break yaml_break_t) { + emitter.line_break = line_break +} + +///* +// * Destroy a token object. +// */ +// +//YAML_DECLARE(void) +//yaml_token_delete(yaml_token_t *token) +//{ +// assert(token); // Non-NULL token object expected. +// +// switch (token.type) +// { +// case YAML_TAG_DIRECTIVE_TOKEN: +// yaml_free(token.data.tag_directive.handle); +// yaml_free(token.data.tag_directive.prefix); +// break; +// +// case YAML_ALIAS_TOKEN: +// yaml_free(token.data.alias.value); +// break; +// +// case YAML_ANCHOR_TOKEN: +// yaml_free(token.data.anchor.value); +// break; +// +// case YAML_TAG_TOKEN: +// yaml_free(token.data.tag.handle); +// yaml_free(token.data.tag.suffix); +// break; +// +// case YAML_SCALAR_TOKEN: +// yaml_free(token.data.scalar.value); +// break; +// +// default: +// break; +// } +// +// memset(token, 0, sizeof(yaml_token_t)); +//} +// +///* +// * Check if a string is a valid UTF-8 sequence. +// * +// * Check 'reader.c' for more details on UTF-8 encoding. +// */ +// +//static int +//yaml_check_utf8(yaml_char_t *start, size_t length) +//{ +// yaml_char_t *end = start+length; +// yaml_char_t *pointer = start; +// +// while (pointer < end) { +// unsigned char octet; +// unsigned int width; +// unsigned int value; +// size_t k; +// +// octet = pointer[0]; +// width = (octet & 0x80) == 0x00 ? 1 : +// (octet & 0xE0) == 0xC0 ? 2 : +// (octet & 0xF0) == 0xE0 ? 3 : +// (octet & 0xF8) == 0xF0 ? 4 : 0; +// value = (octet & 0x80) == 0x00 ? octet & 0x7F : +// (octet & 0xE0) == 0xC0 ? octet & 0x1F : +// (octet & 0xF0) == 0xE0 ? octet & 0x0F : +// (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; +// if (!width) return 0; +// if (pointer+width > end) return 0; +// for (k = 1; k < width; k ++) { +// octet = pointer[k]; +// if ((octet & 0xC0) != 0x80) return 0; +// value = (value << 6) + (octet & 0x3F); +// } +// if (!((width == 1) || +// (width == 2 && value >= 0x80) || +// (width == 3 && value >= 0x800) || +// (width == 4 && value >= 0x10000))) return 0; +// +// pointer += width; +// } +// +// return 1; +//} +// + +// Create STREAM-START. +func yaml_stream_start_event_initialize(event *yaml_event_t, encoding yaml_encoding_t) { + *event = yaml_event_t{ + typ: yaml_STREAM_START_EVENT, + encoding: encoding, + } +} + +// Create STREAM-END. +func yaml_stream_end_event_initialize(event *yaml_event_t) { + *event = yaml_event_t{ + typ: yaml_STREAM_END_EVENT, + } +} + +// Create DOCUMENT-START. +func yaml_document_start_event_initialize( + event *yaml_event_t, + version_directive *yaml_version_directive_t, + tag_directives []yaml_tag_directive_t, + implicit bool, +) { + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + version_directive: version_directive, + tag_directives: tag_directives, + implicit: implicit, + } +} + +// Create DOCUMENT-END. +func yaml_document_end_event_initialize(event *yaml_event_t, implicit bool) { + *event = yaml_event_t{ + typ: yaml_DOCUMENT_END_EVENT, + implicit: implicit, + } +} + +///* +// * Create ALIAS. +// */ +// +//YAML_DECLARE(int) +//yaml_alias_event_initialize(event *yaml_event_t, anchor *yaml_char_t) +//{ +// mark yaml_mark_t = { 0, 0, 0 } +// anchor_copy *yaml_char_t = NULL +// +// assert(event) // Non-NULL event object is expected. +// assert(anchor) // Non-NULL anchor is expected. +// +// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) return 0 +// +// anchor_copy = yaml_strdup(anchor) +// if (!anchor_copy) +// return 0 +// +// ALIAS_EVENT_INIT(*event, anchor_copy, mark, mark) +// +// return 1 +//} + +// Create SCALAR. +func yaml_scalar_event_initialize(event *yaml_event_t, anchor, tag, value []byte, plain_implicit, quoted_implicit bool, style yaml_scalar_style_t) bool { + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + anchor: anchor, + tag: tag, + value: value, + implicit: plain_implicit, + quoted_implicit: quoted_implicit, + style: yaml_style_t(style), + } + return true +} + +// Create SEQUENCE-START. +func yaml_sequence_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_sequence_style_t) bool { + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(style), + } + return true +} + +// Create SEQUENCE-END. +func yaml_sequence_end_event_initialize(event *yaml_event_t) bool { + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + } + return true +} + +// Create MAPPING-START. +func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_mapping_style_t) { + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(style), + } +} + +// Create MAPPING-END. +func yaml_mapping_end_event_initialize(event *yaml_event_t) { + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + } +} + +// Destroy an event object. +func yaml_event_delete(event *yaml_event_t) { + *event = yaml_event_t{} +} + +///* +// * Create a document object. +// */ +// +//YAML_DECLARE(int) +//yaml_document_initialize(document *yaml_document_t, +// version_directive *yaml_version_directive_t, +// tag_directives_start *yaml_tag_directive_t, +// tag_directives_end *yaml_tag_directive_t, +// start_implicit int, end_implicit int) +//{ +// struct { +// error yaml_error_type_t +// } context +// struct { +// start *yaml_node_t +// end *yaml_node_t +// top *yaml_node_t +// } nodes = { NULL, NULL, NULL } +// version_directive_copy *yaml_version_directive_t = NULL +// struct { +// start *yaml_tag_directive_t +// end *yaml_tag_directive_t +// top *yaml_tag_directive_t +// } tag_directives_copy = { NULL, NULL, NULL } +// value yaml_tag_directive_t = { NULL, NULL } +// mark yaml_mark_t = { 0, 0, 0 } +// +// assert(document) // Non-NULL document object is expected. +// assert((tag_directives_start && tag_directives_end) || +// (tag_directives_start == tag_directives_end)) +// // Valid tag directives are expected. +// +// if (!STACK_INIT(&context, nodes, INITIAL_STACK_SIZE)) goto error +// +// if (version_directive) { +// version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)) +// if (!version_directive_copy) goto error +// version_directive_copy.major = version_directive.major +// version_directive_copy.minor = version_directive.minor +// } +// +// if (tag_directives_start != tag_directives_end) { +// tag_directive *yaml_tag_directive_t +// if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) +// goto error +// for (tag_directive = tag_directives_start +// tag_directive != tag_directives_end; tag_directive ++) { +// assert(tag_directive.handle) +// assert(tag_directive.prefix) +// if (!yaml_check_utf8(tag_directive.handle, +// strlen((char *)tag_directive.handle))) +// goto error +// if (!yaml_check_utf8(tag_directive.prefix, +// strlen((char *)tag_directive.prefix))) +// goto error +// value.handle = yaml_strdup(tag_directive.handle) +// value.prefix = yaml_strdup(tag_directive.prefix) +// if (!value.handle || !value.prefix) goto error +// if (!PUSH(&context, tag_directives_copy, value)) +// goto error +// value.handle = NULL +// value.prefix = NULL +// } +// } +// +// DOCUMENT_INIT(*document, nodes.start, nodes.end, version_directive_copy, +// tag_directives_copy.start, tag_directives_copy.top, +// start_implicit, end_implicit, mark, mark) +// +// return 1 +// +//error: +// STACK_DEL(&context, nodes) +// yaml_free(version_directive_copy) +// while (!STACK_EMPTY(&context, tag_directives_copy)) { +// value yaml_tag_directive_t = POP(&context, tag_directives_copy) +// yaml_free(value.handle) +// yaml_free(value.prefix) +// } +// STACK_DEL(&context, tag_directives_copy) +// yaml_free(value.handle) +// yaml_free(value.prefix) +// +// return 0 +//} +// +///* +// * Destroy a document object. +// */ +// +//YAML_DECLARE(void) +//yaml_document_delete(document *yaml_document_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// tag_directive *yaml_tag_directive_t +// +// context.error = YAML_NO_ERROR // Eliminate a compiler warning. +// +// assert(document) // Non-NULL document object is expected. +// +// while (!STACK_EMPTY(&context, document.nodes)) { +// node yaml_node_t = POP(&context, document.nodes) +// yaml_free(node.tag) +// switch (node.type) { +// case YAML_SCALAR_NODE: +// yaml_free(node.data.scalar.value) +// break +// case YAML_SEQUENCE_NODE: +// STACK_DEL(&context, node.data.sequence.items) +// break +// case YAML_MAPPING_NODE: +// STACK_DEL(&context, node.data.mapping.pairs) +// break +// default: +// assert(0) // Should not happen. +// } +// } +// STACK_DEL(&context, document.nodes) +// +// yaml_free(document.version_directive) +// for (tag_directive = document.tag_directives.start +// tag_directive != document.tag_directives.end +// tag_directive++) { +// yaml_free(tag_directive.handle) +// yaml_free(tag_directive.prefix) +// } +// yaml_free(document.tag_directives.start) +// +// memset(document, 0, sizeof(yaml_document_t)) +//} +// +///** +// * Get a document node. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_node(document *yaml_document_t, index int) +//{ +// assert(document) // Non-NULL document object is expected. +// +// if (index > 0 && document.nodes.start + index <= document.nodes.top) { +// return document.nodes.start + index - 1 +// } +// return NULL +//} +// +///** +// * Get the root object. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_root_node(document *yaml_document_t) +//{ +// assert(document) // Non-NULL document object is expected. +// +// if (document.nodes.top != document.nodes.start) { +// return document.nodes.start +// } +// return NULL +//} +// +///* +// * Add a scalar node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_scalar(document *yaml_document_t, +// tag *yaml_char_t, value *yaml_char_t, length int, +// style yaml_scalar_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// value_copy *yaml_char_t = NULL +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// assert(value) // Non-NULL value is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SCALAR_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (length < 0) { +// length = strlen((char *)value) +// } +// +// if (!yaml_check_utf8(value, length)) goto error +// value_copy = yaml_malloc(length+1) +// if (!value_copy) goto error +// memcpy(value_copy, value, length) +// value_copy[length] = '\0' +// +// SCALAR_NODE_INIT(node, tag_copy, value_copy, length, style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// yaml_free(tag_copy) +// yaml_free(value_copy) +// +// return 0 +//} +// +///* +// * Add a sequence node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_sequence(document *yaml_document_t, +// tag *yaml_char_t, style yaml_sequence_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// struct { +// start *yaml_node_item_t +// end *yaml_node_item_t +// top *yaml_node_item_t +// } items = { NULL, NULL, NULL } +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SEQUENCE_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (!STACK_INIT(&context, items, INITIAL_STACK_SIZE)) goto error +// +// SEQUENCE_NODE_INIT(node, tag_copy, items.start, items.end, +// style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// STACK_DEL(&context, items) +// yaml_free(tag_copy) +// +// return 0 +//} +// +///* +// * Add a mapping node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_mapping(document *yaml_document_t, +// tag *yaml_char_t, style yaml_mapping_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// struct { +// start *yaml_node_pair_t +// end *yaml_node_pair_t +// top *yaml_node_pair_t +// } pairs = { NULL, NULL, NULL } +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_MAPPING_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (!STACK_INIT(&context, pairs, INITIAL_STACK_SIZE)) goto error +// +// MAPPING_NODE_INIT(node, tag_copy, pairs.start, pairs.end, +// style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// STACK_DEL(&context, pairs) +// yaml_free(tag_copy) +// +// return 0 +//} +// +///* +// * Append an item to a sequence node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_sequence_item(document *yaml_document_t, +// sequence int, item int) +//{ +// struct { +// error yaml_error_type_t +// } context +// +// assert(document) // Non-NULL document is required. +// assert(sequence > 0 +// && document.nodes.start + sequence <= document.nodes.top) +// // Valid sequence id is required. +// assert(document.nodes.start[sequence-1].type == YAML_SEQUENCE_NODE) +// // A sequence node is required. +// assert(item > 0 && document.nodes.start + item <= document.nodes.top) +// // Valid item id is required. +// +// if (!PUSH(&context, +// document.nodes.start[sequence-1].data.sequence.items, item)) +// return 0 +// +// return 1 +//} +// +///* +// * Append a pair of a key and a value to a mapping node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_mapping_pair(document *yaml_document_t, +// mapping int, key int, value int) +//{ +// struct { +// error yaml_error_type_t +// } context +// +// pair yaml_node_pair_t +// +// assert(document) // Non-NULL document is required. +// assert(mapping > 0 +// && document.nodes.start + mapping <= document.nodes.top) +// // Valid mapping id is required. +// assert(document.nodes.start[mapping-1].type == YAML_MAPPING_NODE) +// // A mapping node is required. +// assert(key > 0 && document.nodes.start + key <= document.nodes.top) +// // Valid key id is required. +// assert(value > 0 && document.nodes.start + value <= document.nodes.top) +// // Valid value id is required. +// +// pair.key = key +// pair.value = value +// +// if (!PUSH(&context, +// document.nodes.start[mapping-1].data.mapping.pairs, pair)) +// return 0 +// +// return 1 +//} +// +// diff --git a/goyaml.v2/decode.go b/goyaml.v2/decode.go new file mode 100644 index 0000000..129bc2a --- /dev/null +++ b/goyaml.v2/decode.go @@ -0,0 +1,815 @@ +package yaml + +import ( + "encoding" + "encoding/base64" + "fmt" + "io" + "math" + "reflect" + "strconv" + "time" +) + +const ( + documentNode = 1 << iota + mappingNode + sequenceNode + scalarNode + aliasNode +) + +type node struct { + kind int + line, column int + tag string + // For an alias node, alias holds the resolved alias. + alias *node + value string + implicit bool + children []*node + anchors map[string]*node +} + +// ---------------------------------------------------------------------------- +// Parser, produces a node tree out of a libyaml event stream. + +type parser struct { + parser yaml_parser_t + event yaml_event_t + doc *node + doneInit bool +} + +func newParser(b []byte) *parser { + p := parser{} + if !yaml_parser_initialize(&p.parser) { + panic("failed to initialize YAML emitter") + } + if len(b) == 0 { + b = []byte{'\n'} + } + yaml_parser_set_input_string(&p.parser, b) + return &p +} + +func newParserFromReader(r io.Reader) *parser { + p := parser{} + if !yaml_parser_initialize(&p.parser) { + panic("failed to initialize YAML emitter") + } + yaml_parser_set_input_reader(&p.parser, r) + return &p +} + +func (p *parser) init() { + if p.doneInit { + return + } + p.expect(yaml_STREAM_START_EVENT) + p.doneInit = true +} + +func (p *parser) destroy() { + if p.event.typ != yaml_NO_EVENT { + yaml_event_delete(&p.event) + } + yaml_parser_delete(&p.parser) +} + +// expect consumes an event from the event stream and +// checks that it's of the expected type. +func (p *parser) expect(e yaml_event_type_t) { + if p.event.typ == yaml_NO_EVENT { + if !yaml_parser_parse(&p.parser, &p.event) { + p.fail() + } + } + if p.event.typ == yaml_STREAM_END_EVENT { + failf("attempted to go past the end of stream; corrupted value?") + } + if p.event.typ != e { + p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) + p.fail() + } + yaml_event_delete(&p.event) + p.event.typ = yaml_NO_EVENT +} + +// peek peeks at the next event in the event stream, +// puts the results into p.event and returns the event type. +func (p *parser) peek() yaml_event_type_t { + if p.event.typ != yaml_NO_EVENT { + return p.event.typ + } + if !yaml_parser_parse(&p.parser, &p.event) { + p.fail() + } + return p.event.typ +} + +func (p *parser) fail() { + var where string + var line int + if p.parser.problem_mark.line != 0 { + line = p.parser.problem_mark.line + // Scanner errors don't iterate line before returning error + if p.parser.error == yaml_SCANNER_ERROR { + line++ + } + } else if p.parser.context_mark.line != 0 { + line = p.parser.context_mark.line + } + if line != 0 { + where = "line " + strconv.Itoa(line) + ": " + } + var msg string + if len(p.parser.problem) > 0 { + msg = p.parser.problem + } else { + msg = "unknown problem parsing YAML content" + } + failf("%s%s", where, msg) +} + +func (p *parser) anchor(n *node, anchor []byte) { + if anchor != nil { + p.doc.anchors[string(anchor)] = n + } +} + +func (p *parser) parse() *node { + p.init() + switch p.peek() { + case yaml_SCALAR_EVENT: + return p.scalar() + case yaml_ALIAS_EVENT: + return p.alias() + case yaml_MAPPING_START_EVENT: + return p.mapping() + case yaml_SEQUENCE_START_EVENT: + return p.sequence() + case yaml_DOCUMENT_START_EVENT: + return p.document() + case yaml_STREAM_END_EVENT: + // Happens when attempting to decode an empty buffer. + return nil + default: + panic("attempted to parse unknown event: " + p.event.typ.String()) + } +} + +func (p *parser) node(kind int) *node { + return &node{ + kind: kind, + line: p.event.start_mark.line, + column: p.event.start_mark.column, + } +} + +func (p *parser) document() *node { + n := p.node(documentNode) + n.anchors = make(map[string]*node) + p.doc = n + p.expect(yaml_DOCUMENT_START_EVENT) + n.children = append(n.children, p.parse()) + p.expect(yaml_DOCUMENT_END_EVENT) + return n +} + +func (p *parser) alias() *node { + n := p.node(aliasNode) + n.value = string(p.event.anchor) + n.alias = p.doc.anchors[n.value] + if n.alias == nil { + failf("unknown anchor '%s' referenced", n.value) + } + p.expect(yaml_ALIAS_EVENT) + return n +} + +func (p *parser) scalar() *node { + n := p.node(scalarNode) + n.value = string(p.event.value) + n.tag = string(p.event.tag) + n.implicit = p.event.implicit + p.anchor(n, p.event.anchor) + p.expect(yaml_SCALAR_EVENT) + return n +} + +func (p *parser) sequence() *node { + n := p.node(sequenceNode) + p.anchor(n, p.event.anchor) + p.expect(yaml_SEQUENCE_START_EVENT) + for p.peek() != yaml_SEQUENCE_END_EVENT { + n.children = append(n.children, p.parse()) + } + p.expect(yaml_SEQUENCE_END_EVENT) + return n +} + +func (p *parser) mapping() *node { + n := p.node(mappingNode) + p.anchor(n, p.event.anchor) + p.expect(yaml_MAPPING_START_EVENT) + for p.peek() != yaml_MAPPING_END_EVENT { + n.children = append(n.children, p.parse(), p.parse()) + } + p.expect(yaml_MAPPING_END_EVENT) + return n +} + +// ---------------------------------------------------------------------------- +// Decoder, unmarshals a node into a provided value. + +type decoder struct { + doc *node + aliases map[*node]bool + mapType reflect.Type + terrors []string + strict bool + + decodeCount int + aliasCount int + aliasDepth int +} + +var ( + mapItemType = reflect.TypeOf(MapItem{}) + durationType = reflect.TypeOf(time.Duration(0)) + defaultMapType = reflect.TypeOf(map[interface{}]interface{}{}) + ifaceType = defaultMapType.Elem() + timeType = reflect.TypeOf(time.Time{}) + ptrTimeType = reflect.TypeOf(&time.Time{}) +) + +func newDecoder(strict bool) *decoder { + d := &decoder{mapType: defaultMapType, strict: strict} + d.aliases = make(map[*node]bool) + return d +} + +func (d *decoder) terror(n *node, tag string, out reflect.Value) { + if n.tag != "" { + tag = n.tag + } + value := n.value + if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG { + if len(value) > 10 { + value = " `" + value[:7] + "...`" + } else { + value = " `" + value + "`" + } + } + d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type())) +} + +func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) { + terrlen := len(d.terrors) + err := u.UnmarshalYAML(func(v interface{}) (err error) { + defer handleErr(&err) + d.unmarshal(n, reflect.ValueOf(v)) + if len(d.terrors) > terrlen { + issues := d.terrors[terrlen:] + d.terrors = d.terrors[:terrlen] + return &TypeError{issues} + } + return nil + }) + if e, ok := err.(*TypeError); ok { + d.terrors = append(d.terrors, e.Errors...) + return false + } + if err != nil { + fail(err) + } + return true +} + +// d.prepare initializes and dereferences pointers and calls UnmarshalYAML +// if a value is found to implement it. +// It returns the initialized and dereferenced out value, whether +// unmarshalling was already done by UnmarshalYAML, and if so whether +// its types unmarshalled appropriately. +// +// If n holds a null value, prepare returns before doing anything. +func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) { + if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) { + return out, false, false + } + again := true + for again { + again = false + if out.Kind() == reflect.Ptr { + if out.IsNil() { + out.Set(reflect.New(out.Type().Elem())) + } + out = out.Elem() + again = true + } + if out.CanAddr() { + if u, ok := out.Addr().Interface().(Unmarshaler); ok { + good = d.callUnmarshaler(n, u) + return out, true, good + } + } + } + return out, false, false +} + +const ( + // 400,000 decode operations is ~500kb of dense object declarations, or + // ~5kb of dense object declarations with 10000% alias expansion + alias_ratio_range_low = 400000 + + // 4,000,000 decode operations is ~5MB of dense object declarations, or + // ~4.5MB of dense object declarations with 10% alias expansion + alias_ratio_range_high = 4000000 + + // alias_ratio_range is the range over which we scale allowed alias ratios + alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low) +) + +func allowedAliasRatio(decodeCount int) float64 { + switch { + case decodeCount <= alias_ratio_range_low: + // allow 99% to come from alias expansion for small-to-medium documents + return 0.99 + case decodeCount >= alias_ratio_range_high: + // allow 10% to come from alias expansion for very large documents + return 0.10 + default: + // scale smoothly from 99% down to 10% over the range. + // this maps to 396,000 - 400,000 allowed alias-driven decodes over the range. + // 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps). + return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range) + } +} + +func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) { + d.decodeCount++ + if d.aliasDepth > 0 { + d.aliasCount++ + } + if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) { + failf("document contains excessive aliasing") + } + switch n.kind { + case documentNode: + return d.document(n, out) + case aliasNode: + return d.alias(n, out) + } + out, unmarshaled, good := d.prepare(n, out) + if unmarshaled { + return good + } + switch n.kind { + case scalarNode: + good = d.scalar(n, out) + case mappingNode: + good = d.mapping(n, out) + case sequenceNode: + good = d.sequence(n, out) + default: + panic("internal error: unknown node kind: " + strconv.Itoa(n.kind)) + } + return good +} + +func (d *decoder) document(n *node, out reflect.Value) (good bool) { + if len(n.children) == 1 { + d.doc = n + d.unmarshal(n.children[0], out) + return true + } + return false +} + +func (d *decoder) alias(n *node, out reflect.Value) (good bool) { + if d.aliases[n] { + // TODO this could actually be allowed in some circumstances. + failf("anchor '%s' value contains itself", n.value) + } + d.aliases[n] = true + d.aliasDepth++ + good = d.unmarshal(n.alias, out) + d.aliasDepth-- + delete(d.aliases, n) + return good +} + +var zeroValue reflect.Value + +func resetMap(out reflect.Value) { + for _, k := range out.MapKeys() { + out.SetMapIndex(k, zeroValue) + } +} + +func (d *decoder) scalar(n *node, out reflect.Value) bool { + var tag string + var resolved interface{} + if n.tag == "" && !n.implicit { + tag = yaml_STR_TAG + resolved = n.value + } else { + tag, resolved = resolve(n.tag, n.value) + if tag == yaml_BINARY_TAG { + data, err := base64.StdEncoding.DecodeString(resolved.(string)) + if err != nil { + failf("!!binary value contains invalid base64 data") + } + resolved = string(data) + } + } + if resolved == nil { + if out.Kind() == reflect.Map && !out.CanAddr() { + resetMap(out) + } else { + out.Set(reflect.Zero(out.Type())) + } + return true + } + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + // We've resolved to exactly the type we want, so use that. + out.Set(resolvedv) + return true + } + // Perhaps we can use the value as a TextUnmarshaler to + // set its value. + if out.CanAddr() { + u, ok := out.Addr().Interface().(encoding.TextUnmarshaler) + if ok { + var text []byte + if tag == yaml_BINARY_TAG { + text = []byte(resolved.(string)) + } else { + // We let any value be unmarshaled into TextUnmarshaler. + // That might be more lax than we'd like, but the + // TextUnmarshaler itself should bowl out any dubious values. + text = []byte(n.value) + } + err := u.UnmarshalText(text) + if err != nil { + fail(err) + } + return true + } + } + switch out.Kind() { + case reflect.String: + if tag == yaml_BINARY_TAG { + out.SetString(resolved.(string)) + return true + } + if resolved != nil { + out.SetString(n.value) + return true + } + case reflect.Interface: + if resolved == nil { + out.Set(reflect.Zero(out.Type())) + } else if tag == yaml_TIMESTAMP_TAG { + // It looks like a timestamp but for backward compatibility + // reasons we set it as a string, so that code that unmarshals + // timestamp-like values into interface{} will continue to + // see a string and not a time.Time. + // TODO(v3) Drop this. + out.Set(reflect.ValueOf(n.value)) + } else { + out.Set(reflect.ValueOf(resolved)) + } + return true + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + switch resolved := resolved.(type) { + case int: + if !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case int64: + if !out.OverflowInt(resolved) { + out.SetInt(resolved) + return true + } + case uint64: + if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case float64: + if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case string: + if out.Type() == durationType { + d, err := time.ParseDuration(resolved) + if err == nil { + out.SetInt(int64(d)) + return true + } + } + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + switch resolved := resolved.(type) { + case int: + if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case int64: + if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case uint64: + if !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case float64: + if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + } + case reflect.Bool: + switch resolved := resolved.(type) { + case bool: + out.SetBool(resolved) + return true + } + case reflect.Float32, reflect.Float64: + switch resolved := resolved.(type) { + case int: + out.SetFloat(float64(resolved)) + return true + case int64: + out.SetFloat(float64(resolved)) + return true + case uint64: + out.SetFloat(float64(resolved)) + return true + case float64: + out.SetFloat(resolved) + return true + } + case reflect.Struct: + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + out.Set(resolvedv) + return true + } + case reflect.Ptr: + if out.Type().Elem() == reflect.TypeOf(resolved) { + // TODO DOes this make sense? When is out a Ptr except when decoding a nil value? + elem := reflect.New(out.Type().Elem()) + elem.Elem().Set(reflect.ValueOf(resolved)) + out.Set(elem) + return true + } + } + d.terror(n, tag, out) + return false +} + +func settableValueOf(i interface{}) reflect.Value { + v := reflect.ValueOf(i) + sv := reflect.New(v.Type()).Elem() + sv.Set(v) + return sv +} + +func (d *decoder) sequence(n *node, out reflect.Value) (good bool) { + l := len(n.children) + + var iface reflect.Value + switch out.Kind() { + case reflect.Slice: + out.Set(reflect.MakeSlice(out.Type(), l, l)) + case reflect.Array: + if l != out.Len() { + failf("invalid array: want %d elements but got %d", out.Len(), l) + } + case reflect.Interface: + // No type hints. Will have to use a generic sequence. + iface = out + out = settableValueOf(make([]interface{}, l)) + default: + d.terror(n, yaml_SEQ_TAG, out) + return false + } + et := out.Type().Elem() + + j := 0 + for i := 0; i < l; i++ { + e := reflect.New(et).Elem() + if ok := d.unmarshal(n.children[i], e); ok { + out.Index(j).Set(e) + j++ + } + } + if out.Kind() != reflect.Array { + out.Set(out.Slice(0, j)) + } + if iface.IsValid() { + iface.Set(out) + } + return true +} + +func (d *decoder) mapping(n *node, out reflect.Value) (good bool) { + switch out.Kind() { + case reflect.Struct: + return d.mappingStruct(n, out) + case reflect.Slice: + return d.mappingSlice(n, out) + case reflect.Map: + // okay + case reflect.Interface: + if d.mapType.Kind() == reflect.Map { + iface := out + out = reflect.MakeMap(d.mapType) + iface.Set(out) + } else { + slicev := reflect.New(d.mapType).Elem() + if !d.mappingSlice(n, slicev) { + return false + } + out.Set(slicev) + return true + } + default: + d.terror(n, yaml_MAP_TAG, out) + return false + } + outt := out.Type() + kt := outt.Key() + et := outt.Elem() + + mapType := d.mapType + if outt.Key() == ifaceType && outt.Elem() == ifaceType { + d.mapType = outt + } + + if out.IsNil() { + out.Set(reflect.MakeMap(outt)) + } + l := len(n.children) + for i := 0; i < l; i += 2 { + if isMerge(n.children[i]) { + d.merge(n.children[i+1], out) + continue + } + k := reflect.New(kt).Elem() + if d.unmarshal(n.children[i], k) { + kkind := k.Kind() + if kkind == reflect.Interface { + kkind = k.Elem().Kind() + } + if kkind == reflect.Map || kkind == reflect.Slice { + failf("invalid map key: %#v", k.Interface()) + } + e := reflect.New(et).Elem() + if d.unmarshal(n.children[i+1], e) { + d.setMapIndex(n.children[i+1], out, k, e) + } + } + } + d.mapType = mapType + return true +} + +func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) { + if d.strict && out.MapIndex(k) != zeroValue { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface())) + return + } + out.SetMapIndex(k, v) +} + +func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) { + outt := out.Type() + if outt.Elem() != mapItemType { + d.terror(n, yaml_MAP_TAG, out) + return false + } + + mapType := d.mapType + d.mapType = outt + + var slice []MapItem + var l = len(n.children) + for i := 0; i < l; i += 2 { + if isMerge(n.children[i]) { + d.merge(n.children[i+1], out) + continue + } + item := MapItem{} + k := reflect.ValueOf(&item.Key).Elem() + if d.unmarshal(n.children[i], k) { + v := reflect.ValueOf(&item.Value).Elem() + if d.unmarshal(n.children[i+1], v) { + slice = append(slice, item) + } + } + } + out.Set(reflect.ValueOf(slice)) + d.mapType = mapType + return true +} + +func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) { + sinfo, err := getStructInfo(out.Type()) + if err != nil { + panic(err) + } + name := settableValueOf("") + l := len(n.children) + + var inlineMap reflect.Value + var elemType reflect.Type + if sinfo.InlineMap != -1 { + inlineMap = out.Field(sinfo.InlineMap) + inlineMap.Set(reflect.New(inlineMap.Type()).Elem()) + elemType = inlineMap.Type().Elem() + } + + var doneFields []bool + if d.strict { + doneFields = make([]bool, len(sinfo.FieldsList)) + } + for i := 0; i < l; i += 2 { + ni := n.children[i] + if isMerge(ni) { + d.merge(n.children[i+1], out) + continue + } + if !d.unmarshal(ni, name) { + continue + } + if info, ok := sinfo.FieldsMap[name.String()]; ok { + if d.strict { + if doneFields[info.Id] { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type())) + continue + } + doneFields[info.Id] = true + } + var field reflect.Value + if info.Inline == nil { + field = out.Field(info.Num) + } else { + field = out.FieldByIndex(info.Inline) + } + d.unmarshal(n.children[i+1], field) + } else if sinfo.InlineMap != -1 { + if inlineMap.IsNil() { + inlineMap.Set(reflect.MakeMap(inlineMap.Type())) + } + value := reflect.New(elemType).Elem() + d.unmarshal(n.children[i+1], value) + d.setMapIndex(n.children[i+1], inlineMap, name, value) + } else if d.strict { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type())) + } + } + return true +} + +func failWantMap() { + failf("map merge requires map or sequence of maps as the value") +} + +func (d *decoder) merge(n *node, out reflect.Value) { + switch n.kind { + case mappingNode: + d.unmarshal(n, out) + case aliasNode: + if n.alias != nil && n.alias.kind != mappingNode { + failWantMap() + } + d.unmarshal(n, out) + case sequenceNode: + // Step backwards as earlier nodes take precedence. + for i := len(n.children) - 1; i >= 0; i-- { + ni := n.children[i] + if ni.kind == aliasNode { + if ni.alias != nil && ni.alias.kind != mappingNode { + failWantMap() + } + } else if ni.kind != mappingNode { + failWantMap() + } + d.unmarshal(ni, out) + } + default: + failWantMap() + } +} + +func isMerge(n *node) bool { + return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG) +} diff --git a/goyaml.v2/decode_test.go b/goyaml.v2/decode_test.go new file mode 100644 index 0000000..c7d104e --- /dev/null +++ b/goyaml.v2/decode_test.go @@ -0,0 +1,1367 @@ +package yaml_test + +import ( + "errors" + "io" + "math" + "reflect" + "strings" + "time" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v2" +) + +var unmarshalIntTest = 123 + +var unmarshalTests = []struct { + data string + value interface{} +}{ + { + "", + (*struct{})(nil), + }, + { + "{}", &struct{}{}, + }, { + "v: hi", + map[string]string{"v": "hi"}, + }, { + "v: hi", map[string]interface{}{"v": "hi"}, + }, { + "v: true", + map[string]string{"v": "true"}, + }, { + "v: true", + map[string]interface{}{"v": true}, + }, { + "v: 10", + map[string]interface{}{"v": 10}, + }, { + "v: 0b10", + map[string]interface{}{"v": 2}, + }, { + "v: 0xA", + map[string]interface{}{"v": 10}, + }, { + "v: 4294967296", + map[string]int64{"v": 4294967296}, + }, { + "v: 0.1", + map[string]interface{}{"v": 0.1}, + }, { + "v: .1", + map[string]interface{}{"v": 0.1}, + }, { + "v: .Inf", + map[string]interface{}{"v": math.Inf(+1)}, + }, { + "v: -.Inf", + map[string]interface{}{"v": math.Inf(-1)}, + }, { + "v: -10", + map[string]interface{}{"v": -10}, + }, { + "v: -.1", + map[string]interface{}{"v": -0.1}, + }, + + // Simple values. + { + "123", + &unmarshalIntTest, + }, + + // Floats from spec + { + "canonical: 6.8523e+5", + map[string]interface{}{"canonical": 6.8523e+5}, + }, { + "expo: 685.230_15e+03", + map[string]interface{}{"expo": 685.23015e+03}, + }, { + "fixed: 685_230.15", + map[string]interface{}{"fixed": 685230.15}, + }, { + "neginf: -.inf", + map[string]interface{}{"neginf": math.Inf(-1)}, + }, { + "fixed: 685_230.15", + map[string]float64{"fixed": 685230.15}, + }, + //{"sexa: 190:20:30.15", map[string]interface{}{"sexa": 0}}, // Unsupported + //{"notanum: .NaN", map[string]interface{}{"notanum": math.NaN()}}, // Equality of NaN fails. + + // Bools from spec + { + "canonical: y", + map[string]interface{}{"canonical": true}, + }, { + "answer: NO", + map[string]interface{}{"answer": false}, + }, { + "logical: True", + map[string]interface{}{"logical": true}, + }, { + "option: on", + map[string]interface{}{"option": true}, + }, { + "option: on", + map[string]bool{"option": true}, + }, + // Ints from spec + { + "canonical: 685230", + map[string]interface{}{"canonical": 685230}, + }, { + "decimal: +685_230", + map[string]interface{}{"decimal": 685230}, + }, { + "octal: 02472256", + map[string]interface{}{"octal": 685230}, + }, { + "hexa: 0x_0A_74_AE", + map[string]interface{}{"hexa": 685230}, + }, { + "bin: 0b1010_0111_0100_1010_1110", + map[string]interface{}{"bin": 685230}, + }, { + "bin: -0b101010", + map[string]interface{}{"bin": -42}, + }, { + "bin: -0b1000000000000000000000000000000000000000000000000000000000000000", + map[string]interface{}{"bin": -9223372036854775808}, + }, { + "decimal: +685_230", + map[string]int{"decimal": 685230}, + }, + + //{"sexa: 190:20:30", map[string]interface{}{"sexa": 0}}, // Unsupported + + // Nulls from spec + { + "empty:", + map[string]interface{}{"empty": nil}, + }, { + "canonical: ~", + map[string]interface{}{"canonical": nil}, + }, { + "english: null", + map[string]interface{}{"english": nil}, + }, { + "~: null key", + map[interface{}]string{nil: "null key"}, + }, { + "empty:", + map[string]*bool{"empty": nil}, + }, + + // Flow sequence + { + "seq: [A,B]", + map[string]interface{}{"seq": []interface{}{"A", "B"}}, + }, { + "seq: [A,B,C,]", + map[string][]string{"seq": []string{"A", "B", "C"}}, + }, { + "seq: [A,1,C]", + map[string][]string{"seq": []string{"A", "1", "C"}}, + }, { + "seq: [A,1,C]", + map[string][]int{"seq": []int{1}}, + }, { + "seq: [A,1,C]", + map[string]interface{}{"seq": []interface{}{"A", 1, "C"}}, + }, + // Block sequence + { + "seq:\n - A\n - B", + map[string]interface{}{"seq": []interface{}{"A", "B"}}, + }, { + "seq:\n - A\n - B\n - C", + map[string][]string{"seq": []string{"A", "B", "C"}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string][]string{"seq": []string{"A", "1", "C"}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string][]int{"seq": []int{1}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string]interface{}{"seq": []interface{}{"A", 1, "C"}}, + }, + + // Literal block scalar + { + "scalar: | # Comment\n\n literal\n\n \ttext\n\n", + map[string]string{"scalar": "\nliteral\n\n\ttext\n"}, + }, + + // Folded block scalar + { + "scalar: > # Comment\n\n folded\n line\n \n next\n line\n * one\n * two\n\n last\n line\n\n", + map[string]string{"scalar": "\nfolded line\nnext line\n * one\n * two\n\nlast line\n"}, + }, + + // Map inside interface with no type hints. + { + "a: {b: c}", + map[interface{}]interface{}{"a": map[interface{}]interface{}{"b": "c"}}, + }, + + // Structs and type conversions. + { + "hello: world", + &struct{ Hello string }{"world"}, + }, { + "a: {b: c}", + &struct{ A struct{ B string } }{struct{ B string }{"c"}}, + }, { + "a: {b: c}", + &struct{ A *struct{ B string } }{&struct{ B string }{"c"}}, + }, { + "a: {b: c}", + &struct{ A map[string]string }{map[string]string{"b": "c"}}, + }, { + "a: {b: c}", + &struct{ A *map[string]string }{&map[string]string{"b": "c"}}, + }, { + "a:", + &struct{ A map[string]string }{}, + }, { + "a: 1", + &struct{ A int }{1}, + }, { + "a: 1", + &struct{ A float64 }{1}, + }, { + "a: 1.0", + &struct{ A int }{1}, + }, { + "a: 1.0", + &struct{ A uint }{1}, + }, { + "a: [1, 2]", + &struct{ A []int }{[]int{1, 2}}, + }, { + "a: [1, 2]", + &struct{ A [2]int }{[2]int{1, 2}}, + }, { + "a: 1", + &struct{ B int }{0}, + }, { + "a: 1", + &struct { + B int "a" + }{1}, + }, { + "a: y", + &struct{ A bool }{true}, + }, + + // Some cross type conversions + { + "v: 42", + map[string]uint{"v": 42}, + }, { + "v: -42", + map[string]uint{}, + }, { + "v: 4294967296", + map[string]uint64{"v": 4294967296}, + }, { + "v: -4294967296", + map[string]uint64{}, + }, + + // int + { + "int_max: 2147483647", + map[string]int{"int_max": math.MaxInt32}, + }, + { + "int_min: -2147483648", + map[string]int{"int_min": math.MinInt32}, + }, + { + "int_overflow: 9223372036854775808", // math.MaxInt64 + 1 + map[string]int{}, + }, + + // int64 + { + "int64_max: 9223372036854775807", + map[string]int64{"int64_max": math.MaxInt64}, + }, + { + "int64_max_base2: 0b111111111111111111111111111111111111111111111111111111111111111", + map[string]int64{"int64_max_base2": math.MaxInt64}, + }, + { + "int64_min: -9223372036854775808", + map[string]int64{"int64_min": math.MinInt64}, + }, + { + "int64_neg_base2: -0b111111111111111111111111111111111111111111111111111111111111111", + map[string]int64{"int64_neg_base2": -math.MaxInt64}, + }, + { + "int64_overflow: 9223372036854775808", // math.MaxInt64 + 1 + map[string]int64{}, + }, + + // uint + { + "uint_min: 0", + map[string]uint{"uint_min": 0}, + }, + { + "uint_max: 4294967295", + map[string]uint{"uint_max": math.MaxUint32}, + }, + { + "uint_underflow: -1", + map[string]uint{}, + }, + + // uint64 + { + "uint64_min: 0", + map[string]uint{"uint64_min": 0}, + }, + { + "uint64_max: 18446744073709551615", + map[string]uint64{"uint64_max": math.MaxUint64}, + }, + { + "uint64_max_base2: 0b1111111111111111111111111111111111111111111111111111111111111111", + map[string]uint64{"uint64_max_base2": math.MaxUint64}, + }, + { + "uint64_maxint64: 9223372036854775807", + map[string]uint64{"uint64_maxint64": math.MaxInt64}, + }, + { + "uint64_underflow: -1", + map[string]uint64{}, + }, + + // float32 + { + "float32_max: 3.40282346638528859811704183484516925440e+38", + map[string]float32{"float32_max": math.MaxFloat32}, + }, + { + "float32_nonzero: 1.401298464324817070923729583289916131280e-45", + map[string]float32{"float32_nonzero": math.SmallestNonzeroFloat32}, + }, + { + "float32_maxuint64: 18446744073709551615", + map[string]float32{"float32_maxuint64": float32(math.MaxUint64)}, + }, + { + "float32_maxuint64+1: 18446744073709551616", + map[string]float32{"float32_maxuint64+1": float32(math.MaxUint64 + 1)}, + }, + + // float64 + { + "float64_max: 1.797693134862315708145274237317043567981e+308", + map[string]float64{"float64_max": math.MaxFloat64}, + }, + { + "float64_nonzero: 4.940656458412465441765687928682213723651e-324", + map[string]float64{"float64_nonzero": math.SmallestNonzeroFloat64}, + }, + { + "float64_maxuint64: 18446744073709551615", + map[string]float64{"float64_maxuint64": float64(math.MaxUint64)}, + }, + { + "float64_maxuint64+1: 18446744073709551616", + map[string]float64{"float64_maxuint64+1": float64(math.MaxUint64 + 1)}, + }, + + // Overflow cases. + { + "v: 4294967297", + map[string]int32{}, + }, { + "v: 128", + map[string]int8{}, + }, + + // Quoted values. + { + "'1': '\"2\"'", + map[interface{}]interface{}{"1": "\"2\""}, + }, { + "v:\n- A\n- 'B\n\n C'\n", + map[string][]string{"v": []string{"A", "B\nC"}}, + }, + + // Explicit tags. + { + "v: !!float '1.1'", + map[string]interface{}{"v": 1.1}, + }, { + "v: !!float 0", + map[string]interface{}{"v": float64(0)}, + }, { + "v: !!float -1", + map[string]interface{}{"v": float64(-1)}, + }, { + "v: !!null ''", + map[string]interface{}{"v": nil}, + }, { + "%TAG !y! tag:yaml.org,2002:\n---\nv: !y!int '1'", + map[string]interface{}{"v": 1}, + }, + + // Non-specific tag (Issue #75) + { + "v: ! test", + map[string]interface{}{"v": "test"}, + }, + + // Anchors and aliases. + { + "a: &x 1\nb: &y 2\nc: *x\nd: *y\n", + &struct{ A, B, C, D int }{1, 2, 1, 2}, + }, { + "a: &a {c: 1}\nb: *a", + &struct { + A, B struct { + C int + } + }{struct{ C int }{1}, struct{ C int }{1}}, + }, { + "a: &a [1, 2]\nb: *a", + &struct{ B []int }{[]int{1, 2}}, + }, + + // Bug #1133337 + { + "foo: ''", + map[string]*string{"foo": new(string)}, + }, { + "foo: null", + map[string]*string{"foo": nil}, + }, { + "foo: null", + map[string]string{"foo": ""}, + }, { + "foo: null", + map[string]interface{}{"foo": nil}, + }, + + // Support for ~ + { + "foo: ~", + map[string]*string{"foo": nil}, + }, { + "foo: ~", + map[string]string{"foo": ""}, + }, { + "foo: ~", + map[string]interface{}{"foo": nil}, + }, + + // Ignored field + { + "a: 1\nb: 2\n", + &struct { + A int + B int "-" + }{1, 0}, + }, + + // Bug #1191981 + { + "" + + "%YAML 1.1\n" + + "--- !!str\n" + + `"Generic line break (no glyph)\n\` + "\n" + + ` Generic line break (glyphed)\n\` + "\n" + + ` Line separator\u2028\` + "\n" + + ` Paragraph separator\u2029"` + "\n", + "" + + "Generic line break (no glyph)\n" + + "Generic line break (glyphed)\n" + + "Line separator\u2028Paragraph separator\u2029", + }, + + // Struct inlining + { + "a: 1\nb: 2\nc: 3\n", + &struct { + A int + C inlineB `yaml:",inline"` + }{1, inlineB{2, inlineC{3}}}, + }, + + // Map inlining + { + "a: 1\nb: 2\nc: 3\n", + &struct { + A int + C map[string]int `yaml:",inline"` + }{1, map[string]int{"b": 2, "c": 3}}, + }, + + // bug 1243827 + { + "a: -b_c", + map[string]interface{}{"a": "-b_c"}, + }, + { + "a: +b_c", + map[string]interface{}{"a": "+b_c"}, + }, + { + "a: 50cent_of_dollar", + map[string]interface{}{"a": "50cent_of_dollar"}, + }, + + // issue #295 (allow scalars with colons in flow mappings and sequences) + { + "a: {b: https://github.com/go-yaml/yaml}", + map[string]interface{}{"a": map[interface{}]interface{}{ + "b": "https://github.com/go-yaml/yaml", + }}, + }, + { + "a: [https://github.com/go-yaml/yaml]", + map[string]interface{}{"a": []interface{}{"https://github.com/go-yaml/yaml"}}, + }, + + // Duration + { + "a: 3s", + map[string]time.Duration{"a": 3 * time.Second}, + }, + + // Issue #24. + { + "a: ", + map[string]string{"a": ""}, + }, + + // Base 60 floats are obsolete and unsupported. + { + "a: 1:1\n", + map[string]string{"a": "1:1"}, + }, + + // Binary data. + { + "a: !!binary gIGC\n", + map[string]string{"a": "\x80\x81\x82"}, + }, { + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + map[string]string{"a": strings.Repeat("\x90", 54)}, + }, { + "a: !!binary |\n " + strings.Repeat("A", 70) + "\n ==\n", + map[string]string{"a": strings.Repeat("\x00", 52)}, + }, + + // Ordered maps. + { + "{b: 2, a: 1, d: 4, c: 3, sub: {e: 5}}", + &yaml.MapSlice{{"b", 2}, {"a", 1}, {"d", 4}, {"c", 3}, {"sub", yaml.MapSlice{{"e", 5}}}}, + }, + + // Issue #39. + { + "a:\n b:\n c: d\n", + map[string]struct{ B interface{} }{"a": {map[interface{}]interface{}{"c": "d"}}}, + }, + + // Custom map type. + { + "a: {b: c}", + M{"a": M{"b": "c"}}, + }, + + // Support encoding.TextUnmarshaler. + { + "a: 1.2.3.4\n", + map[string]textUnmarshaler{"a": textUnmarshaler{S: "1.2.3.4"}}, + }, + { + "a: 2015-02-24T18:19:39Z\n", + map[string]textUnmarshaler{"a": textUnmarshaler{"2015-02-24T18:19:39Z"}}, + }, + + // Timestamps + { + // Date only. + "a: 2015-01-01\n", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // RFC3339 + "a: 2015-02-24T18:19:39.12Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, .12e9, time.UTC)}, + }, + { + // RFC3339 with short dates. + "a: 2015-2-3T3:4:5Z", + map[string]time.Time{"a": time.Date(2015, 2, 3, 3, 4, 5, 0, time.UTC)}, + }, + { + // ISO8601 lower case t + "a: 2015-02-24t18:19:39Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + { + // space separate, no time zone + "a: 2015-02-24 18:19:39\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + // Some cases not currently handled. Uncomment these when + // the code is fixed. + // { + // // space separated with time zone + // "a: 2001-12-14 21:59:43.10 -5", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + // { + // // arbitrary whitespace between fields + // "a: 2001-12-14 \t\t \t21:59:43.10 \t Z", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + { + // explicit string tag + "a: !!str 2015-01-01", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag on quoted string + "a: !!timestamp \"2015-01-01\"", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // explicit timestamp tag on unquoted string + "a: !!timestamp 2015-01-01", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // quoted string that's a valid timestamp + "a: \"2015-01-01\"", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag into interface. + "a: !!timestamp \"2015-01-01\"", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // implicit timestamp tag into interface. + "a: 2015-01-01", + map[string]interface{}{"a": "2015-01-01"}, + }, + + // Encode empty lists as zero-length slices. + { + "a: []", + &struct{ A []int }{[]int{}}, + }, + + // UTF-16-LE + { + "\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00", + M{"ñoño": "very yes"}, + }, + // UTF-16-LE with surrogate. + { + "\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00 \x00=\xd8\xd4\xdf\n\x00", + M{"ñoño": "very yes 🟔"}, + }, + + // UTF-16-BE + { + "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n", + M{"ñoño": "very yes"}, + }, + // UTF-16-BE with surrogate. + { + "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00 \xd8=\xdf\xd4\x00\n", + M{"ñoño": "very yes 🟔"}, + }, + + // This *is* in fact a float number, per the spec. #171 was a mistake. + { + "a: 123456e1\n", + M{"a": 123456e1}, + }, { + "a: 123456E1\n", + M{"a": 123456E1}, + }, + // yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes + { + "First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n", + map[interface{}]interface{}{ + "Reuse anchor": "Bar", + "First occurrence": "Foo", + "Second occurrence": "Foo", + "Override anchor": "Bar", + }, + }, + // Single document with garbage following it. + { + "---\nhello\n...\n}not yaml", + "hello", + }, + { + "a: 5\n", + &struct{ A jsonNumberT }{"5"}, + }, + { + "a: 5.5\n", + &struct{ A jsonNumberT }{"5.5"}, + }, + { + ` +a: + b +b: + ? a + : a`, + &M{"a": "b", + "b": M{ + "a": "a", + }}, + }, +} + +type M map[interface{}]interface{} + +type inlineB struct { + B int + inlineC `yaml:",inline"` +} + +type inlineC struct { + C int +} + +func (s *S) TestUnmarshal(c *C) { + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + if _, ok := err.(*yaml.TypeError); !ok { + c.Assert(err, IsNil) + } + c.Assert(value.Elem().Interface(), DeepEquals, item.value, Commentf("error: %v", err)) + } +} + +// TODO(v3): This test should also work when unmarshaling onto an interface{}. +func (s *S) TestUnmarshalFullTimestamp(c *C) { + // Full timestamp in same format as encoded. This is confirmed to be + // properly decoded by Python as a timestamp as well. + var str = "2015-02-24T18:19:39.123456789-03:00" + var t time.Time + err := yaml.Unmarshal([]byte(str), &t) + c.Assert(err, IsNil) + c.Assert(t, Equals, time.Date(2015, 2, 24, 18, 19, 39, 123456789, t.Location())) + c.Assert(t.In(time.UTC), Equals, time.Date(2015, 2, 24, 21, 19, 39, 123456789, time.UTC)) +} + +func (s *S) TestDecoderSingleDocument(c *C) { + // Test that Decoder.Decode works as expected on + // all the unmarshal tests. + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) + if item.data == "" { + // Behaviour differs when there's no YAML. + continue + } + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(value.Interface()) + if _, ok := err.(*yaml.TypeError); !ok { + c.Assert(err, IsNil) + } + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + } +} + +var decoderTests = []struct { + data string + values []interface{} +}{{ + "", + nil, +}, { + "a: b", + []interface{}{ + map[interface{}]interface{}{"a": "b"}, + }, +}, { + "---\na: b\n...\n", + []interface{}{ + map[interface{}]interface{}{"a": "b"}, + }, +}, { + "---\n'hello'\n...\n---\ngoodbye\n...\n", + []interface{}{ + "hello", + "goodbye", + }, +}} + +func (s *S) TestDecoder(c *C) { + for i, item := range decoderTests { + c.Logf("test %d: %q", i, item.data) + var values []interface{} + dec := yaml.NewDecoder(strings.NewReader(item.data)) + for { + var value interface{} + err := dec.Decode(&value) + if err == io.EOF { + break + } + c.Assert(err, IsNil) + values = append(values, value) + } + c.Assert(values, DeepEquals, item.values) + } +} + +type errReader struct{} + +func (errReader) Read([]byte) (int, error) { + return 0, errors.New("some read error") +} + +func (s *S) TestDecoderReadError(c *C) { + err := yaml.NewDecoder(errReader{}).Decode(&struct{}{}) + c.Assert(err, ErrorMatches, `yaml: input error: some read error`) +} + +func (s *S) TestUnmarshalNaN(c *C) { + value := map[string]interface{}{} + err := yaml.Unmarshal([]byte("notanum: .NaN"), &value) + c.Assert(err, IsNil) + c.Assert(math.IsNaN(value["notanum"].(float64)), Equals, true) +} + +var unmarshalErrorTests = []struct { + data, error string +}{ + {"v: !!float 'error'", "yaml: cannot decode !!str `error` as a !!float"}, + {"v: [A,", "yaml: line 1: did not find expected node content"}, + {"v:\n- [A,", "yaml: line 2: did not find expected node content"}, + {"a:\n- b: *,", "yaml: line 2: did not find expected alphabetic or numeric character"}, + {"a: *b\n", "yaml: unknown anchor 'b' referenced"}, + {"a: &a\n b: *a\n", "yaml: anchor 'a' value contains itself"}, + {"a: &x null\n<<:\n- *x\nb: &x {}\n", `yaml: map merge requires map or sequence of maps as the value`}, // Issue #529. + {"value: -", "yaml: block sequence entries are not allowed in this context"}, + {"a: !!binary ==", "yaml: !!binary value contains invalid base64 data"}, + {"{[.]}", `yaml: invalid map key: \[\]interface \{\}\{"\."\}`}, + {"{{.}}", `yaml: invalid map key: map\[interface\ \{\}\]interface \{\}\{".":interface \{\}\(nil\)\}`}, + {"b: *a\na: &a {c: 1}", `yaml: unknown anchor 'a' referenced`}, + {"%TAG !%79! tag:yaml.org,2002:\n---\nv: !%79!int '1'", "yaml: did not find expected whitespace"}, + {"a:\n 1:\nb\n 2:", ".*could not find expected ':'"}, + { + "a: &a [00,00,00,00,00,00,00,00,00]\n" + + "b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]\n" + + "c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]\n" + + "d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]\n" + + "e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d]\n" + + "f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e]\n" + + "g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f]\n" + + "h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g]\n" + + "i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h]\n", + "yaml: document contains excessive aliasing", + }, +} + +func (s *S) TestUnmarshalErrors(c *C) { + for i, item := range unmarshalErrorTests { + c.Logf("test %d: %q", i, item.data) + var value interface{} + err := yaml.Unmarshal([]byte(item.data), &value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + + if strings.Contains(item.data, ":") { + // Repeat test with typed value. + var value map[string]interface{} + err := yaml.Unmarshal([]byte(item.data), &value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + } + } +} + +func (s *S) TestDecoderErrors(c *C) { + for _, item := range unmarshalErrorTests { + var value interface{} + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(&value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + } +} + +var unmarshalerTests = []struct { + data, tag string + value interface{} +}{ + {"_: {hi: there}", "!!map", map[interface{}]interface{}{"hi": "there"}}, + {"_: [1,A]", "!!seq", []interface{}{1, "A"}}, + {"_: 10", "!!int", 10}, + {"_: null", "!!null", nil}, + {`_: BAR!`, "!!str", "BAR!"}, + {`_: "BAR!"`, "!!str", "BAR!"}, + {"_: !!foo 'BAR!'", "!!foo", "BAR!"}, + {`_: ""`, "!!str", ""}, +} + +var unmarshalerResult = map[int]error{} + +type unmarshalerType struct { + value interface{} +} + +func (o *unmarshalerType) UnmarshalYAML(unmarshal func(v interface{}) error) error { + if err := unmarshal(&o.value); err != nil { + return err + } + if i, ok := o.value.(int); ok { + if result, ok := unmarshalerResult[i]; ok { + return result + } + } + return nil +} + +type unmarshalerPointer struct { + Field *unmarshalerType "_" +} + +type unmarshalerValue struct { + Field unmarshalerType "_" +} + +func (s *S) TestUnmarshalerPointerField(c *C) { + for _, item := range unmarshalerTests { + obj := &unmarshalerPointer{} + err := yaml.Unmarshal([]byte(item.data), obj) + c.Assert(err, IsNil) + if item.value == nil { + c.Assert(obj.Field, IsNil) + } else { + c.Assert(obj.Field, NotNil, Commentf("Pointer not initialized (%#v)", item.value)) + c.Assert(obj.Field.value, DeepEquals, item.value) + } + } +} + +func (s *S) TestUnmarshalerValueField(c *C) { + for _, item := range unmarshalerTests { + obj := &unmarshalerValue{} + err := yaml.Unmarshal([]byte(item.data), obj) + c.Assert(err, IsNil) + c.Assert(obj.Field, NotNil, Commentf("Pointer not initialized (%#v)", item.value)) + c.Assert(obj.Field.value, DeepEquals, item.value) + } +} + +func (s *S) TestUnmarshalerWholeDocument(c *C) { + obj := &unmarshalerType{} + err := yaml.Unmarshal([]byte(unmarshalerTests[0].data), obj) + c.Assert(err, IsNil) + value, ok := obj.value.(map[interface{}]interface{}) + c.Assert(ok, Equals, true, Commentf("value: %#v", obj.value)) + c.Assert(value["_"], DeepEquals, unmarshalerTests[0].value) +} + +func (s *S) TestUnmarshalerTypeError(c *C) { + unmarshalerResult[2] = &yaml.TypeError{[]string{"foo"}} + unmarshalerResult[4] = &yaml.TypeError{[]string{"bar"}} + defer func() { + delete(unmarshalerResult, 2) + delete(unmarshalerResult, 4) + }() + + type T struct { + Before int + After int + M map[string]*unmarshalerType + } + var v T + data := `{before: A, m: {abc: 1, def: 2, ghi: 3, jkl: 4}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " foo\n"+ + " bar\n"+ + " line 1: cannot unmarshal !!str `B` into int") + c.Assert(v.M["abc"], NotNil) + c.Assert(v.M["def"], IsNil) + c.Assert(v.M["ghi"], NotNil) + c.Assert(v.M["jkl"], IsNil) + + c.Assert(v.M["abc"].value, Equals, 1) + c.Assert(v.M["ghi"].value, Equals, 3) +} + +type proxyTypeError struct{} + +func (v *proxyTypeError) UnmarshalYAML(unmarshal func(interface{}) error) error { + var s string + var a int32 + var b int64 + if err := unmarshal(&s); err != nil { + panic(err) + } + if s == "a" { + if err := unmarshal(&b); err == nil { + panic("should have failed") + } + return unmarshal(&a) + } + if err := unmarshal(&a); err == nil { + panic("should have failed") + } + return unmarshal(&b) +} + +func (s *S) TestUnmarshalerTypeErrorProxying(c *C) { + type T struct { + Before int + After int + M map[string]*proxyTypeError + } + var v T + data := `{before: A, m: {abc: a, def: b}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " line 1: cannot unmarshal !!str `a` into int32\n"+ + " line 1: cannot unmarshal !!str `b` into int64\n"+ + " line 1: cannot unmarshal !!str `B` into int") +} + +type failingUnmarshaler struct{} + +var failingErr = errors.New("failingErr") + +func (ft *failingUnmarshaler) UnmarshalYAML(unmarshal func(interface{}) error) error { + return failingErr +} + +func (s *S) TestUnmarshalerError(c *C) { + err := yaml.Unmarshal([]byte("a: b"), &failingUnmarshaler{}) + c.Assert(err, Equals, failingErr) +} + +type sliceUnmarshaler []int + +func (su *sliceUnmarshaler) UnmarshalYAML(unmarshal func(interface{}) error) error { + var slice []int + err := unmarshal(&slice) + if err == nil { + *su = slice + return nil + } + + var intVal int + err = unmarshal(&intVal) + if err == nil { + *su = []int{intVal} + return nil + } + + return err +} + +func (s *S) TestUnmarshalerRetry(c *C) { + var su sliceUnmarshaler + err := yaml.Unmarshal([]byte("[1, 2, 3]"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, sliceUnmarshaler([]int{1, 2, 3})) + + err = yaml.Unmarshal([]byte("1"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, sliceUnmarshaler([]int{1})) +} + +// From http://yaml.org/type/merge.html +var mergeTests = ` +anchors: + list: + - &CENTER { "x": 1, "y": 2 } + - &LEFT { "x": 0, "y": 2 } + - &BIG { "r": 10 } + - &SMALL { "r": 1 } + +# All the following maps are equal: + +plain: + # Explicit keys + "x": 1 + "y": 2 + "r": 10 + label: center/big + +mergeOne: + # Merge one map + << : *CENTER + "r": 10 + label: center/big + +mergeMultiple: + # Merge multiple maps + << : [ *CENTER, *BIG ] + label: center/big + +override: + # Override + << : [ *BIG, *LEFT, *SMALL ] + "x": 1 + label: center/big + +shortTag: + # Explicit short merge tag + !!merge "<<" : [ *CENTER, *BIG ] + label: center/big + +longTag: + # Explicit merge long tag + ! "<<" : [ *CENTER, *BIG ] + label: center/big + +inlineMap: + # Inlined map + << : {"x": 1, "y": 2, "r": 10} + label: center/big + +inlineSequenceMap: + # Inlined map in sequence + << : [ *CENTER, {"r": 10} ] + label: center/big +` + +func (s *S) TestMerge(c *C) { + var want = map[interface{}]interface{}{ + "x": 1, + "y": 2, + "r": 10, + "label": "center/big", + } + + var m map[interface{}]interface{} + err := yaml.Unmarshal([]byte(mergeTests), &m) + c.Assert(err, IsNil) + for name, test := range m { + if name == "anchors" { + continue + } + c.Assert(test, DeepEquals, want, Commentf("test %q failed", name)) + } +} + +func (s *S) TestMergeStruct(c *C) { + type Data struct { + X, Y, R int + Label string + } + want := Data{1, 2, 10, "center/big"} + + var m map[string]Data + err := yaml.Unmarshal([]byte(mergeTests), &m) + c.Assert(err, IsNil) + for name, test := range m { + if name == "anchors" { + continue + } + c.Assert(test, Equals, want, Commentf("test %q failed", name)) + } +} + +var unmarshalNullTests = []func() interface{}{ + func() interface{} { var v interface{}; v = "v"; return &v }, + func() interface{} { var s = "s"; return &s }, + func() interface{} { var s = "s"; sptr := &s; return &sptr }, + func() interface{} { var i = 1; return &i }, + func() interface{} { var i = 1; iptr := &i; return &iptr }, + func() interface{} { m := map[string]int{"s": 1}; return &m }, + func() interface{} { m := map[string]int{"s": 1}; return m }, +} + +func (s *S) TestUnmarshalNull(c *C) { + for _, test := range unmarshalNullTests { + item := test() + zero := reflect.Zero(reflect.TypeOf(item).Elem()).Interface() + err := yaml.Unmarshal([]byte("null"), item) + c.Assert(err, IsNil) + if reflect.TypeOf(item).Kind() == reflect.Map { + c.Assert(reflect.ValueOf(item).Interface(), DeepEquals, reflect.MakeMap(reflect.TypeOf(item)).Interface()) + } else { + c.Assert(reflect.ValueOf(item).Elem().Interface(), DeepEquals, zero) + } + } +} + +func (s *S) TestUnmarshalSliceOnPreset(c *C) { + // Issue #48. + v := struct{ A []int }{[]int{1}} + yaml.Unmarshal([]byte("a: [2]"), &v) + c.Assert(v.A, DeepEquals, []int{2}) +} + +var unmarshalStrictTests = []struct { + data string + value interface{} + error string +}{{ + data: "a: 1\nc: 2\n", + value: struct{ A, B int }{A: 1}, + error: `yaml: unmarshal errors:\n line 2: field c not found in type struct { A int; B int }`, +}, { + data: "a: 1\nb: 2\na: 3\n", + value: struct{ A, B int }{A: 3, B: 2}, + error: `yaml: unmarshal errors:\n line 3: field a already set in type struct { A int; B int }`, +}, { + data: "c: 3\na: 1\nb: 2\nc: 4\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 4, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: field c already set in type struct { A int; yaml_test.inlineB "yaml:\\",inline\\"" }`, +}, { + data: "c: 0\na: 1\nb: 2\nc: 1\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 1, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: field c already set in type struct { A int; yaml_test.inlineB "yaml:\\",inline\\"" }`, +}, { + data: "c: 1\na: 1\nb: 2\nc: 3\n", + value: struct { + A int + M map[string]interface{} `yaml:",inline"` + }{ + A: 1, + M: map[string]interface{}{ + "b": 2, + "c": 3, + }, + }, + error: `yaml: unmarshal errors:\n line 4: key "c" already set in map`, +}, { + data: "a: 1\n9: 2\nnull: 3\n9: 4", + value: map[interface{}]interface{}{ + "a": 1, + nil: 3, + 9: 4, + }, + error: `yaml: unmarshal errors:\n line 4: key 9 already set in map`, +}} + +func (s *S) TestUnmarshalStrict(c *C) { + for i, item := range unmarshalStrictTests { + c.Logf("test %d: %q", i, item.data) + // First test that normal Unmarshal unmarshals to the expected value. + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + c.Assert(err, Equals, nil) + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + + // Then test that UnmarshalStrict fails on the same thing. + t = reflect.ValueOf(item.value).Type() + value = reflect.New(t) + err = yaml.UnmarshalStrict([]byte(item.data), value.Interface()) + c.Assert(err, ErrorMatches, item.error) + } +} + +type textUnmarshaler struct { + S string +} + +func (t *textUnmarshaler) UnmarshalText(s []byte) error { + t.S = string(s) + return nil +} + +func (s *S) TestFuzzCrashers(c *C) { + cases := []string{ + // runtime error: index out of range + "\"\\0\\\r\n", + + // should not happen + " 0: [\n] 0", + "? ? \"\n\" 0", + " - {\n000}0", + "0:\n 0: [0\n] 0", + " - \"\n000\"0", + " - \"\n000\"\"", + "0:\n - {\n000}0", + "0:\n - \"\n000\"0", + "0:\n - \"\n000\"\"", + + // runtime error: index out of range + " \ufeff\n", + "? \ufeff\n", + "? \ufeff:\n", + "0: \ufeff\n", + "? \ufeff: \ufeff\n", + } + for _, data := range cases { + var v interface{} + _ = yaml.Unmarshal([]byte(data), &v) + } +} + +//var data []byte +//func init() { +// var err error +// data, err = ioutil.ReadFile("/tmp/file.yaml") +// if err != nil { +// panic(err) +// } +//} +// +//func (s *S) BenchmarkUnmarshal(c *C) { +// var err error +// for i := 0; i < c.N; i++ { +// var v map[string]interface{} +// err = yaml.Unmarshal(data, &v) +// } +// if err != nil { +// panic(err) +// } +//} +// +//func (s *S) BenchmarkMarshal(c *C) { +// var v map[string]interface{} +// yaml.Unmarshal(data, &v) +// c.ResetTimer() +// for i := 0; i < c.N; i++ { +// yaml.Marshal(&v) +// } +//} diff --git a/goyaml.v2/emitterc.go b/goyaml.v2/emitterc.go new file mode 100644 index 0000000..a1c2cc5 --- /dev/null +++ b/goyaml.v2/emitterc.go @@ -0,0 +1,1685 @@ +package yaml + +import ( + "bytes" + "fmt" +) + +// Flush the buffer if needed. +func flush(emitter *yaml_emitter_t) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) { + return yaml_emitter_flush(emitter) + } + return true +} + +// Put a character to the output buffer. +func put(emitter *yaml_emitter_t, value byte) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + emitter.buffer[emitter.buffer_pos] = value + emitter.buffer_pos++ + emitter.column++ + return true +} + +// Put a line break to the output buffer. +func put_break(emitter *yaml_emitter_t) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + switch emitter.line_break { + case yaml_CR_BREAK: + emitter.buffer[emitter.buffer_pos] = '\r' + emitter.buffer_pos += 1 + case yaml_LN_BREAK: + emitter.buffer[emitter.buffer_pos] = '\n' + emitter.buffer_pos += 1 + case yaml_CRLN_BREAK: + emitter.buffer[emitter.buffer_pos+0] = '\r' + emitter.buffer[emitter.buffer_pos+1] = '\n' + emitter.buffer_pos += 2 + default: + panic("unknown line break setting") + } + emitter.column = 0 + emitter.line++ + return true +} + +// Copy a character from a string into buffer. +func write(emitter *yaml_emitter_t, s []byte, i *int) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + p := emitter.buffer_pos + w := width(s[*i]) + switch w { + case 4: + emitter.buffer[p+3] = s[*i+3] + fallthrough + case 3: + emitter.buffer[p+2] = s[*i+2] + fallthrough + case 2: + emitter.buffer[p+1] = s[*i+1] + fallthrough + case 1: + emitter.buffer[p+0] = s[*i+0] + default: + panic("unknown character width") + } + emitter.column++ + emitter.buffer_pos += w + *i += w + return true +} + +// Write a whole string into buffer. +func write_all(emitter *yaml_emitter_t, s []byte) bool { + for i := 0; i < len(s); { + if !write(emitter, s, &i) { + return false + } + } + return true +} + +// Copy a line break character from a string into buffer. +func write_break(emitter *yaml_emitter_t, s []byte, i *int) bool { + if s[*i] == '\n' { + if !put_break(emitter) { + return false + } + *i++ + } else { + if !write(emitter, s, i) { + return false + } + emitter.column = 0 + emitter.line++ + } + return true +} + +// Set an emitter error and return false. +func yaml_emitter_set_emitter_error(emitter *yaml_emitter_t, problem string) bool { + emitter.error = yaml_EMITTER_ERROR + emitter.problem = problem + return false +} + +// Emit an event. +func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool { + emitter.events = append(emitter.events, *event) + for !yaml_emitter_need_more_events(emitter) { + event := &emitter.events[emitter.events_head] + if !yaml_emitter_analyze_event(emitter, event) { + return false + } + if !yaml_emitter_state_machine(emitter, event) { + return false + } + yaml_event_delete(event) + emitter.events_head++ + } + return true +} + +// Check if we need to accumulate more events before emitting. +// +// We accumulate extra +// - 1 event for DOCUMENT-START +// - 2 events for SEQUENCE-START +// - 3 events for MAPPING-START +// +func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool { + if emitter.events_head == len(emitter.events) { + return true + } + var accumulate int + switch emitter.events[emitter.events_head].typ { + case yaml_DOCUMENT_START_EVENT: + accumulate = 1 + break + case yaml_SEQUENCE_START_EVENT: + accumulate = 2 + break + case yaml_MAPPING_START_EVENT: + accumulate = 3 + break + default: + return false + } + if len(emitter.events)-emitter.events_head > accumulate { + return false + } + var level int + for i := emitter.events_head; i < len(emitter.events); i++ { + switch emitter.events[i].typ { + case yaml_STREAM_START_EVENT, yaml_DOCUMENT_START_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT: + level++ + case yaml_STREAM_END_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_END_EVENT, yaml_MAPPING_END_EVENT: + level-- + } + if level == 0 { + return false + } + } + return true +} + +// Append a directive to the directives stack. +func yaml_emitter_append_tag_directive(emitter *yaml_emitter_t, value *yaml_tag_directive_t, allow_duplicates bool) bool { + for i := 0; i < len(emitter.tag_directives); i++ { + if bytes.Equal(value.handle, emitter.tag_directives[i].handle) { + if allow_duplicates { + return true + } + return yaml_emitter_set_emitter_error(emitter, "duplicate %TAG directive") + } + } + + // [Go] Do we actually need to copy this given garbage collection + // and the lack of deallocating destructors? + tag_copy := yaml_tag_directive_t{ + handle: make([]byte, len(value.handle)), + prefix: make([]byte, len(value.prefix)), + } + copy(tag_copy.handle, value.handle) + copy(tag_copy.prefix, value.prefix) + emitter.tag_directives = append(emitter.tag_directives, tag_copy) + return true +} + +// Increase the indentation level. +func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool) bool { + emitter.indents = append(emitter.indents, emitter.indent) + if emitter.indent < 0 { + if flow { + emitter.indent = emitter.best_indent + } else { + emitter.indent = 0 + } + } else if !indentless { + emitter.indent += emitter.best_indent + } + return true +} + +// State dispatcher. +func yaml_emitter_state_machine(emitter *yaml_emitter_t, event *yaml_event_t) bool { + switch emitter.state { + default: + case yaml_EMIT_STREAM_START_STATE: + return yaml_emitter_emit_stream_start(emitter, event) + + case yaml_EMIT_FIRST_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, true) + + case yaml_EMIT_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, false) + + case yaml_EMIT_DOCUMENT_CONTENT_STATE: + return yaml_emitter_emit_document_content(emitter, event) + + case yaml_EMIT_DOCUMENT_END_STATE: + return yaml_emitter_emit_document_end(emitter, event) + + case yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, true) + + case yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, false) + + case yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, true) + + case yaml_EMIT_FLOW_MAPPING_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, false) + + case yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, true) + + case yaml_EMIT_FLOW_MAPPING_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, false) + + case yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, true) + + case yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, false) + + case yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, true) + + case yaml_EMIT_BLOCK_MAPPING_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, false) + + case yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, true) + + case yaml_EMIT_BLOCK_MAPPING_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, false) + + case yaml_EMIT_END_STATE: + return yaml_emitter_set_emitter_error(emitter, "expected nothing after STREAM-END") + } + panic("invalid emitter state") +} + +// Expect STREAM-START. +func yaml_emitter_emit_stream_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if event.typ != yaml_STREAM_START_EVENT { + return yaml_emitter_set_emitter_error(emitter, "expected STREAM-START") + } + if emitter.encoding == yaml_ANY_ENCODING { + emitter.encoding = event.encoding + if emitter.encoding == yaml_ANY_ENCODING { + emitter.encoding = yaml_UTF8_ENCODING + } + } + if emitter.best_indent < 2 || emitter.best_indent > 9 { + emitter.best_indent = 2 + } + if emitter.best_width >= 0 && emitter.best_width <= emitter.best_indent*2 { + emitter.best_width = 80 + } + if emitter.best_width < 0 { + emitter.best_width = 1<<31 - 1 + } + if emitter.line_break == yaml_ANY_BREAK { + emitter.line_break = yaml_LN_BREAK + } + + emitter.indent = -1 + emitter.line = 0 + emitter.column = 0 + emitter.whitespace = true + emitter.indention = true + + if emitter.encoding != yaml_UTF8_ENCODING { + if !yaml_emitter_write_bom(emitter) { + return false + } + } + emitter.state = yaml_EMIT_FIRST_DOCUMENT_START_STATE + return true +} + +// Expect DOCUMENT-START or STREAM-END. +func yaml_emitter_emit_document_start(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + + if event.typ == yaml_DOCUMENT_START_EVENT { + + if event.version_directive != nil { + if !yaml_emitter_analyze_version_directive(emitter, event.version_directive) { + return false + } + } + + for i := 0; i < len(event.tag_directives); i++ { + tag_directive := &event.tag_directives[i] + if !yaml_emitter_analyze_tag_directive(emitter, tag_directive) { + return false + } + if !yaml_emitter_append_tag_directive(emitter, tag_directive, false) { + return false + } + } + + for i := 0; i < len(default_tag_directives); i++ { + tag_directive := &default_tag_directives[i] + if !yaml_emitter_append_tag_directive(emitter, tag_directive, true) { + return false + } + } + + implicit := event.implicit + if !first || emitter.canonical { + implicit = false + } + + if emitter.open_ended && (event.version_directive != nil || len(event.tag_directives) > 0) { + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if event.version_directive != nil { + implicit = false + if !yaml_emitter_write_indicator(emitter, []byte("%YAML"), true, false, false) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte("1.1"), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if len(event.tag_directives) > 0 { + implicit = false + for i := 0; i < len(event.tag_directives); i++ { + tag_directive := &event.tag_directives[i] + if !yaml_emitter_write_indicator(emitter, []byte("%TAG"), true, false, false) { + return false + } + if !yaml_emitter_write_tag_handle(emitter, tag_directive.handle) { + return false + } + if !yaml_emitter_write_tag_content(emitter, tag_directive.prefix, true) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + } + + if yaml_emitter_check_empty_document(emitter) { + implicit = false + } + if !implicit { + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte("---"), true, false, false) { + return false + } + if emitter.canonical { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + } + + emitter.state = yaml_EMIT_DOCUMENT_CONTENT_STATE + return true + } + + if event.typ == yaml_STREAM_END_EVENT { + if emitter.open_ended { + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_flush(emitter) { + return false + } + emitter.state = yaml_EMIT_END_STATE + return true + } + + return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-START or STREAM-END") +} + +// Expect the root node. +func yaml_emitter_emit_document_content(emitter *yaml_emitter_t, event *yaml_event_t) bool { + emitter.states = append(emitter.states, yaml_EMIT_DOCUMENT_END_STATE) + return yaml_emitter_emit_node(emitter, event, true, false, false, false) +} + +// Expect DOCUMENT-END. +func yaml_emitter_emit_document_end(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if event.typ != yaml_DOCUMENT_END_EVENT { + return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-END") + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if !event.implicit { + // [Go] Allocate the slice elsewhere. + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_flush(emitter) { + return false + } + emitter.state = yaml_EMIT_DOCUMENT_START_STATE + emitter.tag_directives = emitter.tag_directives[:0] + return true +} + +// Expect a flow item node. +func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_write_indicator(emitter, []byte{'['}, true, true, false) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + emitter.flow_level++ + } + + if event.typ == yaml_SEQUENCE_END_EVENT { + emitter.flow_level-- + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + if emitter.canonical && !first { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{']'}, false, false, false) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + + return true + } + + if !first { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE) + return yaml_emitter_emit_node(emitter, event, false, true, false, false) +} + +// Expect a flow key node. +func yaml_emitter_emit_flow_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_write_indicator(emitter, []byte{'{'}, true, true, false) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + emitter.flow_level++ + } + + if event.typ == yaml_MAPPING_END_EVENT { + emitter.flow_level-- + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + if emitter.canonical && !first { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'}'}, false, false, false) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + + if !first { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if !emitter.canonical && yaml_emitter_check_simple_key(emitter) { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, true) + } + if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, false) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a flow value node. +func yaml_emitter_emit_flow_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { + if simple { + if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { + return false + } + } else { + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, false) { + return false + } + } + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_KEY_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a block item node. +func yaml_emitter_emit_block_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_increase_indent(emitter, false, emitter.mapping_context && !emitter.indention) { + return false + } + } + if event.typ == yaml_SEQUENCE_END_EVENT { + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{'-'}, true, false, true) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE) + return yaml_emitter_emit_node(emitter, event, false, true, false, false) +} + +// Expect a block key node. +func yaml_emitter_emit_block_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_increase_indent(emitter, false, false) { + return false + } + } + if event.typ == yaml_MAPPING_END_EVENT { + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if yaml_emitter_check_simple_key(emitter) { + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, true) + } + if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, true) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a block value node. +func yaml_emitter_emit_block_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { + if simple { + if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { + return false + } + } else { + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, true) { + return false + } + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_KEY_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a node. +func yaml_emitter_emit_node(emitter *yaml_emitter_t, event *yaml_event_t, + root bool, sequence bool, mapping bool, simple_key bool) bool { + + emitter.root_context = root + emitter.sequence_context = sequence + emitter.mapping_context = mapping + emitter.simple_key_context = simple_key + + switch event.typ { + case yaml_ALIAS_EVENT: + return yaml_emitter_emit_alias(emitter, event) + case yaml_SCALAR_EVENT: + return yaml_emitter_emit_scalar(emitter, event) + case yaml_SEQUENCE_START_EVENT: + return yaml_emitter_emit_sequence_start(emitter, event) + case yaml_MAPPING_START_EVENT: + return yaml_emitter_emit_mapping_start(emitter, event) + default: + return yaml_emitter_set_emitter_error(emitter, + fmt.Sprintf("expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS, but got %v", event.typ)) + } +} + +// Expect ALIAS. +func yaml_emitter_emit_alias(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true +} + +// Expect SCALAR. +func yaml_emitter_emit_scalar(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_select_scalar_style(emitter, event) { + return false + } + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + if !yaml_emitter_process_scalar(emitter) { + return false + } + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true +} + +// Expect SEQUENCE-START. +func yaml_emitter_emit_sequence_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if emitter.flow_level > 0 || emitter.canonical || event.sequence_style() == yaml_FLOW_SEQUENCE_STYLE || + yaml_emitter_check_empty_sequence(emitter) { + emitter.state = yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE + } else { + emitter.state = yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE + } + return true +} + +// Expect MAPPING-START. +func yaml_emitter_emit_mapping_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if emitter.flow_level > 0 || emitter.canonical || event.mapping_style() == yaml_FLOW_MAPPING_STYLE || + yaml_emitter_check_empty_mapping(emitter) { + emitter.state = yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE + } else { + emitter.state = yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE + } + return true +} + +// Check if the document content is an empty scalar. +func yaml_emitter_check_empty_document(emitter *yaml_emitter_t) bool { + return false // [Go] Huh? +} + +// Check if the next events represent an empty sequence. +func yaml_emitter_check_empty_sequence(emitter *yaml_emitter_t) bool { + if len(emitter.events)-emitter.events_head < 2 { + return false + } + return emitter.events[emitter.events_head].typ == yaml_SEQUENCE_START_EVENT && + emitter.events[emitter.events_head+1].typ == yaml_SEQUENCE_END_EVENT +} + +// Check if the next events represent an empty mapping. +func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool { + if len(emitter.events)-emitter.events_head < 2 { + return false + } + return emitter.events[emitter.events_head].typ == yaml_MAPPING_START_EVENT && + emitter.events[emitter.events_head+1].typ == yaml_MAPPING_END_EVENT +} + +// Check if the next node can be expressed as a simple key. +func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool { + length := 0 + switch emitter.events[emitter.events_head].typ { + case yaml_ALIAS_EVENT: + length += len(emitter.anchor_data.anchor) + case yaml_SCALAR_EVENT: + if emitter.scalar_data.multiline { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + + len(emitter.scalar_data.value) + case yaml_SEQUENCE_START_EVENT: + if !yaml_emitter_check_empty_sequence(emitter) { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + case yaml_MAPPING_START_EVENT: + if !yaml_emitter_check_empty_mapping(emitter) { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + default: + return false + } + return length <= 128 +} + +// Determine an acceptable scalar style. +func yaml_emitter_select_scalar_style(emitter *yaml_emitter_t, event *yaml_event_t) bool { + + no_tag := len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 + if no_tag && !event.implicit && !event.quoted_implicit { + return yaml_emitter_set_emitter_error(emitter, "neither tag nor implicit flags are specified") + } + + style := event.scalar_style() + if style == yaml_ANY_SCALAR_STYLE { + style = yaml_PLAIN_SCALAR_STYLE + } + if emitter.canonical { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + if emitter.simple_key_context && emitter.scalar_data.multiline { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + + if style == yaml_PLAIN_SCALAR_STYLE { + if emitter.flow_level > 0 && !emitter.scalar_data.flow_plain_allowed || + emitter.flow_level == 0 && !emitter.scalar_data.block_plain_allowed { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + if len(emitter.scalar_data.value) == 0 && (emitter.flow_level > 0 || emitter.simple_key_context) { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + if no_tag && !event.implicit { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + } + if style == yaml_SINGLE_QUOTED_SCALAR_STYLE { + if !emitter.scalar_data.single_quoted_allowed { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + } + if style == yaml_LITERAL_SCALAR_STYLE || style == yaml_FOLDED_SCALAR_STYLE { + if !emitter.scalar_data.block_allowed || emitter.flow_level > 0 || emitter.simple_key_context { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + } + + if no_tag && !event.quoted_implicit && style != yaml_PLAIN_SCALAR_STYLE { + emitter.tag_data.handle = []byte{'!'} + } + emitter.scalar_data.style = style + return true +} + +// Write an anchor. +func yaml_emitter_process_anchor(emitter *yaml_emitter_t) bool { + if emitter.anchor_data.anchor == nil { + return true + } + c := []byte{'&'} + if emitter.anchor_data.alias { + c[0] = '*' + } + if !yaml_emitter_write_indicator(emitter, c, true, false, false) { + return false + } + return yaml_emitter_write_anchor(emitter, emitter.anchor_data.anchor) +} + +// Write a tag. +func yaml_emitter_process_tag(emitter *yaml_emitter_t) bool { + if len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 { + return true + } + if len(emitter.tag_data.handle) > 0 { + if !yaml_emitter_write_tag_handle(emitter, emitter.tag_data.handle) { + return false + } + if len(emitter.tag_data.suffix) > 0 { + if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { + return false + } + } + } else { + // [Go] Allocate these slices elsewhere. + if !yaml_emitter_write_indicator(emitter, []byte("!<"), true, false, false) { + return false + } + if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{'>'}, false, false, false) { + return false + } + } + return true +} + +// Write a scalar. +func yaml_emitter_process_scalar(emitter *yaml_emitter_t) bool { + switch emitter.scalar_data.style { + case yaml_PLAIN_SCALAR_STYLE: + return yaml_emitter_write_plain_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_SINGLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_single_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_DOUBLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_double_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_LITERAL_SCALAR_STYLE: + return yaml_emitter_write_literal_scalar(emitter, emitter.scalar_data.value) + + case yaml_FOLDED_SCALAR_STYLE: + return yaml_emitter_write_folded_scalar(emitter, emitter.scalar_data.value) + } + panic("unknown scalar style") +} + +// Check if a %YAML directive is valid. +func yaml_emitter_analyze_version_directive(emitter *yaml_emitter_t, version_directive *yaml_version_directive_t) bool { + if version_directive.major != 1 || version_directive.minor != 1 { + return yaml_emitter_set_emitter_error(emitter, "incompatible %YAML directive") + } + return true +} + +// Check if a %TAG directive is valid. +func yaml_emitter_analyze_tag_directive(emitter *yaml_emitter_t, tag_directive *yaml_tag_directive_t) bool { + handle := tag_directive.handle + prefix := tag_directive.prefix + if len(handle) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag handle must not be empty") + } + if handle[0] != '!' { + return yaml_emitter_set_emitter_error(emitter, "tag handle must start with '!'") + } + if handle[len(handle)-1] != '!' { + return yaml_emitter_set_emitter_error(emitter, "tag handle must end with '!'") + } + for i := 1; i < len(handle)-1; i += width(handle[i]) { + if !is_alpha(handle, i) { + return yaml_emitter_set_emitter_error(emitter, "tag handle must contain alphanumerical characters only") + } + } + if len(prefix) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag prefix must not be empty") + } + return true +} + +// Check if an anchor is valid. +func yaml_emitter_analyze_anchor(emitter *yaml_emitter_t, anchor []byte, alias bool) bool { + if len(anchor) == 0 { + problem := "anchor value must not be empty" + if alias { + problem = "alias value must not be empty" + } + return yaml_emitter_set_emitter_error(emitter, problem) + } + for i := 0; i < len(anchor); i += width(anchor[i]) { + if !is_alpha(anchor, i) { + problem := "anchor value must contain alphanumerical characters only" + if alias { + problem = "alias value must contain alphanumerical characters only" + } + return yaml_emitter_set_emitter_error(emitter, problem) + } + } + emitter.anchor_data.anchor = anchor + emitter.anchor_data.alias = alias + return true +} + +// Check if a tag is valid. +func yaml_emitter_analyze_tag(emitter *yaml_emitter_t, tag []byte) bool { + if len(tag) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag value must not be empty") + } + for i := 0; i < len(emitter.tag_directives); i++ { + tag_directive := &emitter.tag_directives[i] + if bytes.HasPrefix(tag, tag_directive.prefix) { + emitter.tag_data.handle = tag_directive.handle + emitter.tag_data.suffix = tag[len(tag_directive.prefix):] + return true + } + } + emitter.tag_data.suffix = tag + return true +} + +// Check if a scalar is valid. +func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { + var ( + block_indicators = false + flow_indicators = false + line_breaks = false + special_characters = false + + leading_space = false + leading_break = false + trailing_space = false + trailing_break = false + break_space = false + space_break = false + + preceded_by_whitespace = false + followed_by_whitespace = false + previous_space = false + previous_break = false + ) + + emitter.scalar_data.value = value + + if len(value) == 0 { + emitter.scalar_data.multiline = false + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = true + emitter.scalar_data.single_quoted_allowed = true + emitter.scalar_data.block_allowed = false + return true + } + + if len(value) >= 3 && ((value[0] == '-' && value[1] == '-' && value[2] == '-') || (value[0] == '.' && value[1] == '.' && value[2] == '.')) { + block_indicators = true + flow_indicators = true + } + + preceded_by_whitespace = true + for i, w := 0, 0; i < len(value); i += w { + w = width(value[i]) + followed_by_whitespace = i+w >= len(value) || is_blank(value, i+w) + + if i == 0 { + switch value[i] { + case '#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`': + flow_indicators = true + block_indicators = true + case '?', ':': + flow_indicators = true + if followed_by_whitespace { + block_indicators = true + } + case '-': + if followed_by_whitespace { + flow_indicators = true + block_indicators = true + } + } + } else { + switch value[i] { + case ',', '?', '[', ']', '{', '}': + flow_indicators = true + case ':': + flow_indicators = true + if followed_by_whitespace { + block_indicators = true + } + case '#': + if preceded_by_whitespace { + flow_indicators = true + block_indicators = true + } + } + } + + if !is_printable(value, i) || !is_ascii(value, i) && !emitter.unicode { + special_characters = true + } + if is_space(value, i) { + if i == 0 { + leading_space = true + } + if i+width(value[i]) == len(value) { + trailing_space = true + } + if previous_break { + break_space = true + } + previous_space = true + previous_break = false + } else if is_break(value, i) { + line_breaks = true + if i == 0 { + leading_break = true + } + if i+width(value[i]) == len(value) { + trailing_break = true + } + if previous_space { + space_break = true + } + previous_space = false + previous_break = true + } else { + previous_space = false + previous_break = false + } + + // [Go]: Why 'z'? Couldn't be the end of the string as that's the loop condition. + preceded_by_whitespace = is_blankz(value, i) + } + + emitter.scalar_data.multiline = line_breaks + emitter.scalar_data.flow_plain_allowed = true + emitter.scalar_data.block_plain_allowed = true + emitter.scalar_data.single_quoted_allowed = true + emitter.scalar_data.block_allowed = true + + if leading_space || leading_break || trailing_space || trailing_break { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + } + if trailing_space { + emitter.scalar_data.block_allowed = false + } + if break_space { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + emitter.scalar_data.single_quoted_allowed = false + } + if space_break || special_characters { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + emitter.scalar_data.single_quoted_allowed = false + emitter.scalar_data.block_allowed = false + } + if line_breaks { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + } + if flow_indicators { + emitter.scalar_data.flow_plain_allowed = false + } + if block_indicators { + emitter.scalar_data.block_plain_allowed = false + } + return true +} + +// Check if the event data is valid. +func yaml_emitter_analyze_event(emitter *yaml_emitter_t, event *yaml_event_t) bool { + + emitter.anchor_data.anchor = nil + emitter.tag_data.handle = nil + emitter.tag_data.suffix = nil + emitter.scalar_data.value = nil + + switch event.typ { + case yaml_ALIAS_EVENT: + if !yaml_emitter_analyze_anchor(emitter, event.anchor, true) { + return false + } + + case yaml_SCALAR_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || (!event.implicit && !event.quoted_implicit)) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + if !yaml_emitter_analyze_scalar(emitter, event.value) { + return false + } + + case yaml_SEQUENCE_START_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + + case yaml_MAPPING_START_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + } + return true +} + +// Write the BOM character. +func yaml_emitter_write_bom(emitter *yaml_emitter_t) bool { + if !flush(emitter) { + return false + } + pos := emitter.buffer_pos + emitter.buffer[pos+0] = '\xEF' + emitter.buffer[pos+1] = '\xBB' + emitter.buffer[pos+2] = '\xBF' + emitter.buffer_pos += 3 + return true +} + +func yaml_emitter_write_indent(emitter *yaml_emitter_t) bool { + indent := emitter.indent + if indent < 0 { + indent = 0 + } + if !emitter.indention || emitter.column > indent || (emitter.column == indent && !emitter.whitespace) { + if !put_break(emitter) { + return false + } + } + for emitter.column < indent { + if !put(emitter, ' ') { + return false + } + } + emitter.whitespace = true + emitter.indention = true + return true +} + +func yaml_emitter_write_indicator(emitter *yaml_emitter_t, indicator []byte, need_whitespace, is_whitespace, is_indention bool) bool { + if need_whitespace && !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + if !write_all(emitter, indicator) { + return false + } + emitter.whitespace = is_whitespace + emitter.indention = (emitter.indention && is_indention) + emitter.open_ended = false + return true +} + +func yaml_emitter_write_anchor(emitter *yaml_emitter_t, value []byte) bool { + if !write_all(emitter, value) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_tag_handle(emitter *yaml_emitter_t, value []byte) bool { + if !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + if !write_all(emitter, value) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_tag_content(emitter *yaml_emitter_t, value []byte, need_whitespace bool) bool { + if need_whitespace && !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + for i := 0; i < len(value); { + var must_write bool + switch value[i] { + case ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')', '[', ']': + must_write = true + default: + must_write = is_alpha(value, i) + } + if must_write { + if !write(emitter, value, &i) { + return false + } + } else { + w := width(value[i]) + for k := 0; k < w; k++ { + octet := value[i] + i++ + if !put(emitter, '%') { + return false + } + + c := octet >> 4 + if c < 10 { + c += '0' + } else { + c += 'A' - 10 + } + if !put(emitter, c) { + return false + } + + c = octet & 0x0f + if c < 10 { + c += '0' + } else { + c += 'A' - 10 + } + if !put(emitter, c) { + return false + } + } + } + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_plain_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + if !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + + spaces := false + breaks := false + for i := 0; i < len(value); { + if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && !is_space(value, i+1) { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + spaces = true + } else if is_break(value, i) { + if !breaks && value[i] == '\n' { + if !put_break(emitter) { + return false + } + } + if !write_break(emitter, value, &i) { + return false + } + emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + spaces = false + breaks = false + } + } + + emitter.whitespace = false + emitter.indention = false + if emitter.root_context { + emitter.open_ended = true + } + + return true +} + +func yaml_emitter_write_single_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + + if !yaml_emitter_write_indicator(emitter, []byte{'\''}, true, false, false) { + return false + } + + spaces := false + breaks := false + for i := 0; i < len(value); { + if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 && !is_space(value, i+1) { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + spaces = true + } else if is_break(value, i) { + if !breaks && value[i] == '\n' { + if !put_break(emitter) { + return false + } + } + if !write_break(emitter, value, &i) { + return false + } + emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if value[i] == '\'' { + if !put(emitter, '\'') { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + spaces = false + breaks = false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'\''}, false, false, false) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_double_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + spaces := false + if !yaml_emitter_write_indicator(emitter, []byte{'"'}, true, false, false) { + return false + } + + for i := 0; i < len(value); { + if !is_printable(value, i) || (!emitter.unicode && !is_ascii(value, i)) || + is_bom(value, i) || is_break(value, i) || + value[i] == '"' || value[i] == '\\' { + + octet := value[i] + + var w int + var v rune + switch { + case octet&0x80 == 0x00: + w, v = 1, rune(octet&0x7F) + case octet&0xE0 == 0xC0: + w, v = 2, rune(octet&0x1F) + case octet&0xF0 == 0xE0: + w, v = 3, rune(octet&0x0F) + case octet&0xF8 == 0xF0: + w, v = 4, rune(octet&0x07) + } + for k := 1; k < w; k++ { + octet = value[i+k] + v = (v << 6) + (rune(octet) & 0x3F) + } + i += w + + if !put(emitter, '\\') { + return false + } + + var ok bool + switch v { + case 0x00: + ok = put(emitter, '0') + case 0x07: + ok = put(emitter, 'a') + case 0x08: + ok = put(emitter, 'b') + case 0x09: + ok = put(emitter, 't') + case 0x0A: + ok = put(emitter, 'n') + case 0x0b: + ok = put(emitter, 'v') + case 0x0c: + ok = put(emitter, 'f') + case 0x0d: + ok = put(emitter, 'r') + case 0x1b: + ok = put(emitter, 'e') + case 0x22: + ok = put(emitter, '"') + case 0x5c: + ok = put(emitter, '\\') + case 0x85: + ok = put(emitter, 'N') + case 0xA0: + ok = put(emitter, '_') + case 0x2028: + ok = put(emitter, 'L') + case 0x2029: + ok = put(emitter, 'P') + default: + if v <= 0xFF { + ok = put(emitter, 'x') + w = 2 + } else if v <= 0xFFFF { + ok = put(emitter, 'u') + w = 4 + } else { + ok = put(emitter, 'U') + w = 8 + } + for k := (w - 1) * 4; ok && k >= 0; k -= 4 { + digit := byte((v >> uint(k)) & 0x0F) + if digit < 10 { + ok = put(emitter, digit+'0') + } else { + ok = put(emitter, digit+'A'-10) + } + } + } + if !ok { + return false + } + spaces = false + } else if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 { + if !yaml_emitter_write_indent(emitter) { + return false + } + if is_space(value, i+1) { + if !put(emitter, '\\') { + return false + } + } + i += width(value[i]) + } else if !write(emitter, value, &i) { + return false + } + spaces = true + } else { + if !write(emitter, value, &i) { + return false + } + spaces = false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'"'}, false, false, false) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_block_scalar_hints(emitter *yaml_emitter_t, value []byte) bool { + if is_space(value, 0) || is_break(value, 0) { + indent_hint := []byte{'0' + byte(emitter.best_indent)} + if !yaml_emitter_write_indicator(emitter, indent_hint, false, false, false) { + return false + } + } + + emitter.open_ended = false + + var chomp_hint [1]byte + if len(value) == 0 { + chomp_hint[0] = '-' + } else { + i := len(value) - 1 + for value[i]&0xC0 == 0x80 { + i-- + } + if !is_break(value, i) { + chomp_hint[0] = '-' + } else if i == 0 { + chomp_hint[0] = '+' + emitter.open_ended = true + } else { + i-- + for value[i]&0xC0 == 0x80 { + i-- + } + if is_break(value, i) { + chomp_hint[0] = '+' + emitter.open_ended = true + } + } + } + if chomp_hint[0] != 0 { + if !yaml_emitter_write_indicator(emitter, chomp_hint[:], false, false, false) { + return false + } + } + return true +} + +func yaml_emitter_write_literal_scalar(emitter *yaml_emitter_t, value []byte) bool { + if !yaml_emitter_write_indicator(emitter, []byte{'|'}, true, false, false) { + return false + } + if !yaml_emitter_write_block_scalar_hints(emitter, value) { + return false + } + if !put_break(emitter) { + return false + } + emitter.indention = true + emitter.whitespace = true + breaks := true + for i := 0; i < len(value); { + if is_break(value, i) { + if !write_break(emitter, value, &i) { + return false + } + emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + breaks = false + } + } + + return true +} + +func yaml_emitter_write_folded_scalar(emitter *yaml_emitter_t, value []byte) bool { + if !yaml_emitter_write_indicator(emitter, []byte{'>'}, true, false, false) { + return false + } + if !yaml_emitter_write_block_scalar_hints(emitter, value) { + return false + } + + if !put_break(emitter) { + return false + } + emitter.indention = true + emitter.whitespace = true + + breaks := true + leading_spaces := true + for i := 0; i < len(value); { + if is_break(value, i) { + if !breaks && !leading_spaces && value[i] == '\n' { + k := 0 + for is_break(value, k) { + k += width(value[k]) + } + if !is_blankz(value, k) { + if !put_break(emitter) { + return false + } + } + } + if !write_break(emitter, value, &i) { + return false + } + emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + leading_spaces = is_blank(value, i) + } + if !breaks && is_space(value, i) && !is_space(value, i+1) && emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + emitter.indention = false + breaks = false + } + } + return true +} diff --git a/goyaml.v2/encode.go b/goyaml.v2/encode.go new file mode 100644 index 0000000..0ee738e --- /dev/null +++ b/goyaml.v2/encode.go @@ -0,0 +1,390 @@ +package yaml + +import ( + "encoding" + "fmt" + "io" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + "time" + "unicode/utf8" +) + +// jsonNumber is the interface of the encoding/json.Number datatype. +// Repeating the interface here avoids a dependency on encoding/json, and also +// supports other libraries like jsoniter, which use a similar datatype with +// the same interface. Detecting this interface is useful when dealing with +// structures containing json.Number, which is a string under the hood. The +// encoder should prefer the use of Int64(), Float64() and string(), in that +// order, when encoding this type. +type jsonNumber interface { + Float64() (float64, error) + Int64() (int64, error) + String() string +} + +type encoder struct { + emitter yaml_emitter_t + event yaml_event_t + out []byte + flow bool + // doneInit holds whether the initial stream_start_event has been + // emitted. + doneInit bool +} + +func newEncoder() *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) + yaml_emitter_set_output_string(&e.emitter, &e.out) + yaml_emitter_set_unicode(&e.emitter, true) + return e +} + +func newEncoderWithWriter(w io.Writer) *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) + yaml_emitter_set_output_writer(&e.emitter, w) + yaml_emitter_set_unicode(&e.emitter, true) + return e +} + +func (e *encoder) init() { + if e.doneInit { + return + } + yaml_stream_start_event_initialize(&e.event, yaml_UTF8_ENCODING) + e.emit() + e.doneInit = true +} + +func (e *encoder) finish() { + e.emitter.open_ended = false + yaml_stream_end_event_initialize(&e.event) + e.emit() +} + +func (e *encoder) destroy() { + yaml_emitter_delete(&e.emitter) +} + +func (e *encoder) emit() { + // This will internally delete the e.event value. + e.must(yaml_emitter_emit(&e.emitter, &e.event)) +} + +func (e *encoder) must(ok bool) { + if !ok { + msg := e.emitter.problem + if msg == "" { + msg = "unknown problem generating YAML content" + } + failf("%s", msg) + } +} + +func (e *encoder) marshalDoc(tag string, in reflect.Value) { + e.init() + yaml_document_start_event_initialize(&e.event, nil, nil, true) + e.emit() + e.marshal(tag, in) + yaml_document_end_event_initialize(&e.event, true) + e.emit() +} + +func (e *encoder) marshal(tag string, in reflect.Value) { + if !in.IsValid() || in.Kind() == reflect.Ptr && in.IsNil() { + e.nilv() + return + } + iface := in.Interface() + switch m := iface.(type) { + case jsonNumber: + integer, err := m.Int64() + if err == nil { + // In this case the json.Number is a valid int64 + in = reflect.ValueOf(integer) + break + } + float, err := m.Float64() + if err == nil { + // In this case the json.Number is a valid float64 + in = reflect.ValueOf(float) + break + } + // fallback case - no number could be obtained + in = reflect.ValueOf(m.String()) + case time.Time, *time.Time: + // Although time.Time implements TextMarshaler, + // we don't want to treat it as a string for YAML + // purposes because YAML has special support for + // timestamps. + case Marshaler: + v, err := m.MarshalYAML() + if err != nil { + fail(err) + } + if v == nil { + e.nilv() + return + } + in = reflect.ValueOf(v) + case encoding.TextMarshaler: + text, err := m.MarshalText() + if err != nil { + fail(err) + } + in = reflect.ValueOf(string(text)) + case nil: + e.nilv() + return + } + switch in.Kind() { + case reflect.Interface: + e.marshal(tag, in.Elem()) + case reflect.Map: + e.mapv(tag, in) + case reflect.Ptr: + if in.Type() == ptrTimeType { + e.timev(tag, in.Elem()) + } else { + e.marshal(tag, in.Elem()) + } + case reflect.Struct: + if in.Type() == timeType { + e.timev(tag, in) + } else { + e.structv(tag, in) + } + case reflect.Slice, reflect.Array: + if in.Type().Elem() == mapItemType { + e.itemsv(tag, in) + } else { + e.slicev(tag, in) + } + case reflect.String: + e.stringv(tag, in) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if in.Type() == durationType { + e.stringv(tag, reflect.ValueOf(iface.(time.Duration).String())) + } else { + e.intv(tag, in) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + e.uintv(tag, in) + case reflect.Float32, reflect.Float64: + e.floatv(tag, in) + case reflect.Bool: + e.boolv(tag, in) + default: + panic("cannot marshal type: " + in.Type().String()) + } +} + +func (e *encoder) mapv(tag string, in reflect.Value) { + e.mappingv(tag, func() { + keys := keyList(in.MapKeys()) + sort.Sort(keys) + for _, k := range keys { + e.marshal("", k) + e.marshal("", in.MapIndex(k)) + } + }) +} + +func (e *encoder) itemsv(tag string, in reflect.Value) { + e.mappingv(tag, func() { + slice := in.Convert(reflect.TypeOf([]MapItem{})).Interface().([]MapItem) + for _, item := range slice { + e.marshal("", reflect.ValueOf(item.Key)) + e.marshal("", reflect.ValueOf(item.Value)) + } + }) +} + +func (e *encoder) structv(tag string, in reflect.Value) { + sinfo, err := getStructInfo(in.Type()) + if err != nil { + panic(err) + } + e.mappingv(tag, func() { + for _, info := range sinfo.FieldsList { + var value reflect.Value + if info.Inline == nil { + value = in.Field(info.Num) + } else { + value = in.FieldByIndex(info.Inline) + } + if info.OmitEmpty && isZero(value) { + continue + } + e.marshal("", reflect.ValueOf(info.Key)) + e.flow = info.Flow + e.marshal("", value) + } + if sinfo.InlineMap >= 0 { + m := in.Field(sinfo.InlineMap) + if m.Len() > 0 { + e.flow = false + keys := keyList(m.MapKeys()) + sort.Sort(keys) + for _, k := range keys { + if _, found := sinfo.FieldsMap[k.String()]; found { + panic(fmt.Sprintf("Can't have key %q in inlined map; conflicts with struct field", k.String())) + } + e.marshal("", k) + e.flow = false + e.marshal("", m.MapIndex(k)) + } + } + } + }) +} + +func (e *encoder) mappingv(tag string, f func()) { + implicit := tag == "" + style := yaml_BLOCK_MAPPING_STYLE + if e.flow { + e.flow = false + style = yaml_FLOW_MAPPING_STYLE + } + yaml_mapping_start_event_initialize(&e.event, nil, []byte(tag), implicit, style) + e.emit() + f() + yaml_mapping_end_event_initialize(&e.event) + e.emit() +} + +func (e *encoder) slicev(tag string, in reflect.Value) { + implicit := tag == "" + style := yaml_BLOCK_SEQUENCE_STYLE + if e.flow { + e.flow = false + style = yaml_FLOW_SEQUENCE_STYLE + } + e.must(yaml_sequence_start_event_initialize(&e.event, nil, []byte(tag), implicit, style)) + e.emit() + n := in.Len() + for i := 0; i < n; i++ { + e.marshal("", in.Index(i)) + } + e.must(yaml_sequence_end_event_initialize(&e.event)) + e.emit() +} + +// isBase60 returns whether s is in base 60 notation as defined in YAML 1.1. +// +// The base 60 float notation in YAML 1.1 is a terrible idea and is unsupported +// in YAML 1.2 and by this package, but these should be marshalled quoted for +// the time being for compatibility with other parsers. +func isBase60Float(s string) (result bool) { + // Fast path. + if s == "" { + return false + } + c := s[0] + if !(c == '+' || c == '-' || c >= '0' && c <= '9') || strings.IndexByte(s, ':') < 0 { + return false + } + // Do the full match. + return base60float.MatchString(s) +} + +// From http://yaml.org/type/float.html, except the regular expression there +// is bogus. In practice parsers do not enforce the "\.[0-9_]*" suffix. +var base60float = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0-9_]*)?$`) + +func (e *encoder) stringv(tag string, in reflect.Value) { + var style yaml_scalar_style_t + s := in.String() + canUsePlain := true + switch { + case !utf8.ValidString(s): + if tag == yaml_BINARY_TAG { + failf("explicitly tagged !!binary data must be base64-encoded") + } + if tag != "" { + failf("cannot marshal invalid UTF-8 data as %s", shortTag(tag)) + } + // It can't be encoded directly as YAML so use a binary tag + // and encode it as base64. + tag = yaml_BINARY_TAG + s = encodeBase64(s) + case tag == "": + // Check to see if it would resolve to a specific + // tag when encoded unquoted. If it doesn't, + // there's no need to quote it. + rtag, _ := resolve("", s) + canUsePlain = rtag == yaml_STR_TAG && !isBase60Float(s) + } + // Note: it's possible for user code to emit invalid YAML + // if they explicitly specify a tag and a string containing + // text that's incompatible with that tag. + switch { + case strings.Contains(s, "\n"): + style = yaml_LITERAL_SCALAR_STYLE + case canUsePlain: + style = yaml_PLAIN_SCALAR_STYLE + default: + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + e.emitScalar(s, "", tag, style) +} + +func (e *encoder) boolv(tag string, in reflect.Value) { + var s string + if in.Bool() { + s = "true" + } else { + s = "false" + } + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) intv(tag string, in reflect.Value) { + s := strconv.FormatInt(in.Int(), 10) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) uintv(tag string, in reflect.Value) { + s := strconv.FormatUint(in.Uint(), 10) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) timev(tag string, in reflect.Value) { + t := in.Interface().(time.Time) + s := t.Format(time.RFC3339Nano) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) floatv(tag string, in reflect.Value) { + // Issue #352: When formatting, use the precision of the underlying value + precision := 64 + if in.Kind() == reflect.Float32 { + precision = 32 + } + + s := strconv.FormatFloat(in.Float(), 'g', -1, precision) + switch s { + case "+Inf": + s = ".inf" + case "-Inf": + s = "-.inf" + case "NaN": + s = ".nan" + } + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) nilv() { + e.emitScalar("null", "", "", yaml_PLAIN_SCALAR_STYLE) +} + +func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t) { + implicit := tag == "" + e.must(yaml_scalar_event_initialize(&e.event, []byte(anchor), []byte(tag), []byte(value), implicit, implicit, style)) + e.emit() +} diff --git a/goyaml.v2/encode_test.go b/goyaml.v2/encode_test.go new file mode 100644 index 0000000..c8caedc --- /dev/null +++ b/goyaml.v2/encode_test.go @@ -0,0 +1,646 @@ +package yaml_test + +import ( + "bytes" + "fmt" + "math" + "strconv" + "strings" + "time" + + "net" + "os" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v2" +) + +type jsonNumberT string + +func (j jsonNumberT) Int64() (int64, error) { + val, err := strconv.Atoi(string(j)) + if err != nil { + return 0, err + } + return int64(val), nil +} + +func (j jsonNumberT) Float64() (float64, error) { + return strconv.ParseFloat(string(j), 64) +} + +func (j jsonNumberT) String() string { + return string(j) +} + +var marshalIntTest = 123 + +var marshalTests = []struct { + value interface{} + data string +}{ + { + nil, + "null\n", + }, { + (*marshalerType)(nil), + "null\n", + }, { + &struct{}{}, + "{}\n", + }, { + map[string]string{"v": "hi"}, + "v: hi\n", + }, { + map[string]interface{}{"v": "hi"}, + "v: hi\n", + }, { + map[string]string{"v": "true"}, + "v: \"true\"\n", + }, { + map[string]string{"v": "false"}, + "v: \"false\"\n", + }, { + map[string]interface{}{"v": true}, + "v: true\n", + }, { + map[string]interface{}{"v": false}, + "v: false\n", + }, { + map[string]interface{}{"v": 10}, + "v: 10\n", + }, { + map[string]interface{}{"v": -10}, + "v: -10\n", + }, { + map[string]uint{"v": 42}, + "v: 42\n", + }, { + map[string]interface{}{"v": int64(4294967296)}, + "v: 4294967296\n", + }, { + map[string]int64{"v": int64(4294967296)}, + "v: 4294967296\n", + }, { + map[string]uint64{"v": 4294967296}, + "v: 4294967296\n", + }, { + map[string]interface{}{"v": "10"}, + "v: \"10\"\n", + }, { + map[string]interface{}{"v": 0.1}, + "v: 0.1\n", + }, { + map[string]interface{}{"v": float64(0.1)}, + "v: 0.1\n", + }, { + map[string]interface{}{"v": float32(0.99)}, + "v: 0.99\n", + }, { + map[string]interface{}{"v": -0.1}, + "v: -0.1\n", + }, { + map[string]interface{}{"v": math.Inf(+1)}, + "v: .inf\n", + }, { + map[string]interface{}{"v": math.Inf(-1)}, + "v: -.inf\n", + }, { + map[string]interface{}{"v": math.NaN()}, + "v: .nan\n", + }, { + map[string]interface{}{"v": nil}, + "v: null\n", + }, { + map[string]interface{}{"v": ""}, + "v: \"\"\n", + }, { + map[string][]string{"v": []string{"A", "B"}}, + "v:\n- A\n- B\n", + }, { + map[string][]string{"v": []string{"A", "B\nC"}}, + "v:\n- A\n- |-\n B\n C\n", + }, { + map[string][]interface{}{"v": []interface{}{"A", 1, map[string][]int{"B": []int{2, 3}}}}, + "v:\n- A\n- 1\n- B:\n - 2\n - 3\n", + }, { + map[string]interface{}{"a": map[interface{}]interface{}{"b": "c"}}, + "a:\n b: c\n", + }, { + map[string]interface{}{"a": "-"}, + "a: '-'\n", + }, + + // Simple values. + { + &marshalIntTest, + "123\n", + }, + + // Structures + { + &struct{ Hello string }{"world"}, + "hello: world\n", + }, { + &struct { + A struct { + B string + } + }{struct{ B string }{"c"}}, + "a:\n b: c\n", + }, { + &struct { + A *struct { + B string + } + }{&struct{ B string }{"c"}}, + "a:\n b: c\n", + }, { + &struct { + A *struct { + B string + } + }{}, + "a: null\n", + }, { + &struct{ A int }{1}, + "a: 1\n", + }, { + &struct{ A []int }{[]int{1, 2}}, + "a:\n- 1\n- 2\n", + }, { + &struct{ A [2]int }{[2]int{1, 2}}, + "a:\n- 1\n- 2\n", + }, { + &struct { + B int "a" + }{1}, + "a: 1\n", + }, { + &struct{ A bool }{true}, + "a: true\n", + }, + + // Conditional flag + { + &struct { + A int "a,omitempty" + B int "b,omitempty" + }{1, 0}, + "a: 1\n", + }, { + &struct { + A int "a,omitempty" + B int "b,omitempty" + }{0, 0}, + "{}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{&struct{ X, y int }{1, 2}}, + "a: {x: 1}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{nil}, + "{}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{&struct{ X, y int }{}}, + "a: {x: 0}\n", + }, { + &struct { + A struct{ X, y int } "a,omitempty,flow" + }{struct{ X, y int }{1, 2}}, + "a: {x: 1}\n", + }, { + &struct { + A struct{ X, y int } "a,omitempty,flow" + }{struct{ X, y int }{0, 1}}, + "{}\n", + }, { + &struct { + A float64 "a,omitempty" + B float64 "b,omitempty" + }{1, 0}, + "a: 1\n", + }, + { + &struct { + T1 time.Time "t1,omitempty" + T2 time.Time "t2,omitempty" + T3 *time.Time "t3,omitempty" + T4 *time.Time "t4,omitempty" + }{ + T2: time.Date(2018, 1, 9, 10, 40, 47, 0, time.UTC), + T4: newTime(time.Date(2098, 1, 9, 10, 40, 47, 0, time.UTC)), + }, + "t2: 2018-01-09T10:40:47Z\nt4: 2098-01-09T10:40:47Z\n", + }, + // Nil interface that implements Marshaler. + { + map[string]yaml.Marshaler{ + "a": nil, + }, + "a: null\n", + }, + + // Flow flag + { + &struct { + A []int "a,flow" + }{[]int{1, 2}}, + "a: [1, 2]\n", + }, { + &struct { + A map[string]string "a,flow" + }{map[string]string{"b": "c", "d": "e"}}, + "a: {b: c, d: e}\n", + }, { + &struct { + A struct { + B, D string + } "a,flow" + }{struct{ B, D string }{"c", "e"}}, + "a: {b: c, d: e}\n", + }, + + // Unexported field + { + &struct { + u int + A int + }{0, 1}, + "a: 1\n", + }, + + // Ignored field + { + &struct { + A int + B int "-" + }{1, 2}, + "a: 1\n", + }, + + // Struct inlining + { + &struct { + A int + C inlineB `yaml:",inline"` + }{1, inlineB{2, inlineC{3}}}, + "a: 1\nb: 2\nc: 3\n", + }, + + // Map inlining + { + &struct { + A int + C map[string]int `yaml:",inline"` + }{1, map[string]int{"b": 2, "c": 3}}, + "a: 1\nb: 2\nc: 3\n", + }, + + // Duration + { + map[string]time.Duration{"a": 3 * time.Second}, + "a: 3s\n", + }, + + // Issue #24: bug in map merging logic. + { + map[string]string{"a": ""}, + "a: \n", + }, + + // Issue #34: marshal unsupported base 60 floats quoted for compatibility + // with old YAML 1.1 parsers. + { + map[string]string{"a": "1:1"}, + "a: \"1:1\"\n", + }, + + // Binary data. + { + map[string]string{"a": "\x00"}, + "a: \"\\0\"\n", + }, { + map[string]string{"a": "\x80\x81\x82"}, + "a: !!binary gIGC\n", + }, { + map[string]string{"a": strings.Repeat("\x90", 54)}, + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + }, + + // Ordered maps. + { + &yaml.MapSlice{{"b", 2}, {"a", 1}, {"d", 4}, {"c", 3}, {"sub", yaml.MapSlice{{"e", 5}}}}, + "b: 2\na: 1\nd: 4\nc: 3\nsub:\n e: 5\n", + }, + + // Encode unicode as utf-8 rather than in escaped form. + { + map[string]string{"a": "你好"}, + "a: 你好\n", + }, + + // Support encoding.TextMarshaler. + { + map[string]net.IP{"a": net.IPv4(1, 2, 3, 4)}, + "a: 1.2.3.4\n", + }, + // time.Time gets a timestamp tag. + { + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + "a: 2015-02-24T18:19:39Z\n", + }, + { + map[string]*time.Time{"a": newTime(time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC))}, + "a: 2015-02-24T18:19:39Z\n", + }, + { + // This is confirmed to be properly decoded in Python (libyaml) without a timestamp tag. + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 123456789, time.FixedZone("FOO", -3*60*60))}, + "a: 2015-02-24T18:19:39.123456789-03:00\n", + }, + // Ensure timestamp-like strings are quoted. + { + map[string]string{"a": "2015-02-24T18:19:39Z"}, + "a: \"2015-02-24T18:19:39Z\"\n", + }, + + // Ensure strings containing ": " are quoted (reported as PR #43, but not reproducible). + { + map[string]string{"a": "b: c"}, + "a: 'b: c'\n", + }, + + // Containing hash mark ('#') in string should be quoted + { + map[string]string{"a": "Hello #comment"}, + "a: 'Hello #comment'\n", + }, + { + map[string]string{"a": "你好 #comment"}, + "a: '你好 #comment'\n", + }, + { + map[string]interface{}{"a": jsonNumberT("5")}, + "a: 5\n", + }, + { + map[string]interface{}{"a": jsonNumberT("100.5")}, + "a: 100.5\n", + }, + { + map[string]interface{}{"a": jsonNumberT("bogus")}, + "a: bogus\n", + }, +} + +func (s *S) TestLineWrapping(c *C) { + var v = map[string]string{ + "a": "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 ", + } + data, err := yaml.Marshal(v) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, + "a: 'abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz\n" + + " ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 '\n") + + // The API does not allow this process to be reversed as it's intended + // for migration only. v3 drops this method and instead offers more + // control on a per encoding basis. + yaml.FutureLineWrap() + + data, err = yaml.Marshal(v) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, + "a: 'abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 '\n") +} + +func (s *S) TestMarshal(c *C) { + defer os.Setenv("TZ", os.Getenv("TZ")) + os.Setenv("TZ", "UTC") + for i, item := range marshalTests { + c.Logf("test %d: %q", i, item.data) + data, err := yaml.Marshal(item.value) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, item.data) + } +} + +func (s *S) TestEncoderSingleDocument(c *C) { + for i, item := range marshalTests { + c.Logf("test %d. %q", i, item.data) + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(item.value) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, item.data) + } +} + +func (s *S) TestEncoderMultipleDocuments(c *C) { + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, Equals, nil) + err = enc.Encode(map[string]string{"c": "d"}) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, "a: b\n---\nc: d\n") +} + +func (s *S) TestEncoderWriteError(c *C) { + enc := yaml.NewEncoder(errorWriter{}) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, ErrorMatches, `yaml: write error: some write error`) // Data not flushed yet +} + +type errorWriter struct{} + +func (errorWriter) Write([]byte) (int, error) { + return 0, fmt.Errorf("some write error") +} + +var marshalErrorTests = []struct { + value interface{} + error string + panic string +}{{ + value: &struct { + B int + inlineB ",inline" + }{1, inlineB{2, inlineC{3}}}, + panic: `Duplicated key 'b' in struct struct \{ B int; .*`, +}, { + value: &struct { + A int + B map[string]int ",inline" + }{1, map[string]int{"a": 2}}, + panic: `Can't have key "a" in inlined map; conflicts with struct field`, +}} + +func (s *S) TestMarshalErrors(c *C) { + for _, item := range marshalErrorTests { + if item.panic != "" { + c.Assert(func() { yaml.Marshal(item.value) }, PanicMatches, item.panic) + } else { + _, err := yaml.Marshal(item.value) + c.Assert(err, ErrorMatches, item.error) + } + } +} + +func (s *S) TestMarshalTypeCache(c *C) { + var data []byte + var err error + func() { + type T struct{ A int } + data, err = yaml.Marshal(&T{}) + c.Assert(err, IsNil) + }() + func() { + type T struct{ B int } + data, err = yaml.Marshal(&T{}) + c.Assert(err, IsNil) + }() + c.Assert(string(data), Equals, "b: 0\n") +} + +var marshalerTests = []struct { + data string + value interface{} +}{ + {"_:\n hi: there\n", map[interface{}]interface{}{"hi": "there"}}, + {"_:\n- 1\n- A\n", []interface{}{1, "A"}}, + {"_: 10\n", 10}, + {"_: null\n", nil}, + {"_: BAR!\n", "BAR!"}, +} + +type marshalerType struct { + value interface{} +} + +func (o marshalerType) MarshalText() ([]byte, error) { + panic("MarshalText called on type with MarshalYAML") +} + +func (o marshalerType) MarshalYAML() (interface{}, error) { + return o.value, nil +} + +type marshalerValue struct { + Field marshalerType "_" +} + +func (s *S) TestMarshaler(c *C) { + for _, item := range marshalerTests { + obj := &marshalerValue{} + obj.Field.value = item.value + data, err := yaml.Marshal(obj) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, string(item.data)) + } +} + +func (s *S) TestMarshalerWholeDocument(c *C) { + obj := &marshalerType{} + obj.value = map[string]string{"hello": "world!"} + data, err := yaml.Marshal(obj) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, "hello: world!\n") +} + +type failingMarshaler struct{} + +func (ft *failingMarshaler) MarshalYAML() (interface{}, error) { + return nil, failingErr +} + +func (s *S) TestMarshalerError(c *C) { + _, err := yaml.Marshal(&failingMarshaler{}) + c.Assert(err, Equals, failingErr) +} + +func (s *S) TestSortedOutput(c *C) { + order := []interface{}{ + false, + true, + 1, + uint(1), + 1.0, + 1.1, + 1.2, + 2, + uint(2), + 2.0, + 2.1, + "", + ".1", + ".2", + ".a", + "1", + "2", + "a!10", + "a/0001", + "a/002", + "a/3", + "a/10", + "a/11", + "a/0012", + "a/100", + "a~10", + "ab/1", + "b/1", + "b/01", + "b/2", + "b/02", + "b/3", + "b/03", + "b1", + "b01", + "b3", + "c2.10", + "c10.2", + "d1", + "d7", + "d7abc", + "d12", + "d12a", + } + m := make(map[interface{}]int) + for _, k := range order { + m[k] = 1 + } + data, err := yaml.Marshal(m) + c.Assert(err, IsNil) + out := "\n" + string(data) + last := 0 + for i, k := range order { + repr := fmt.Sprint(k) + if s, ok := k.(string); ok { + if _, err = strconv.ParseFloat(repr, 32); s == "" || err == nil { + repr = `"` + repr + `"` + } + } + index := strings.Index(out, "\n"+repr+":") + if index == -1 { + c.Fatalf("%#v is not in the output: %#v", k, out) + } + if index < last { + c.Fatalf("%#v was generated before %#v: %q", k, order[i-1], out) + } + last = index + } +} + +func newTime(t time.Time) *time.Time { + return &t +} diff --git a/goyaml.v2/example_embedded_test.go b/goyaml.v2/example_embedded_test.go new file mode 100644 index 0000000..171c093 --- /dev/null +++ b/goyaml.v2/example_embedded_test.go @@ -0,0 +1,41 @@ +package yaml_test + +import ( + "fmt" + "log" + + "gopkg.in/yaml.v2" +) + +// An example showing how to unmarshal embedded +// structs from YAML. + +type StructA struct { + A string `yaml:"a"` +} + +type StructB struct { + // Embedded structs are not treated as embedded in YAML by default. To do that, + // add the ",inline" annotation below + StructA `yaml:",inline"` + B string `yaml:"b"` +} + +var data = ` +a: a string from struct A +b: a string from struct B +` + +func ExampleUnmarshal_embedded() { + var b StructB + + err := yaml.Unmarshal([]byte(data), &b) + if err != nil { + log.Fatalf("cannot unmarshal data: %v", err) + } + fmt.Println(b.A) + fmt.Println(b.B) + // Output: + // a string from struct A + // a string from struct B +} diff --git a/goyaml.v2/go.mod b/goyaml.v2/go.mod new file mode 100644 index 0000000..2cbb85a --- /dev/null +++ b/goyaml.v2/go.mod @@ -0,0 +1,5 @@ +module gopkg.in/yaml.v2 + +go 1.15 + +require gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 diff --git a/goyaml.v2/limit_test.go b/goyaml.v2/limit_test.go new file mode 100644 index 0000000..8d8ec2d --- /dev/null +++ b/goyaml.v2/limit_test.go @@ -0,0 +1,128 @@ +package yaml_test + +import ( + "strings" + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v2" +) + +var limitTests = []struct { + name string + data []byte + error string +}{ + { + name: "1000kb of maps with 100 aliases", + data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`), + error: "yaml: document contains excessive aliasing", + }, { + name: "1000kb of deeply nested slices", + data: []byte(strings.Repeat(`[`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested maps", + data: []byte("x: " + strings.Repeat(`{`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested indents", + data: []byte(strings.Repeat(`- `, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of 1000-indent lines", + data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)), + }, + {name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)}, + {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, + {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, + {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, + {name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, + {name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))}, + {name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))}, +} + +func (s *S) TestLimits(c *C) { + if testing.Short() { + return + } + for _, tc := range limitTests { + var v interface{} + err := yaml.Unmarshal(tc.data, &v) + if len(tc.error) > 0 { + c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name)) + } else { + c.Assert(err, IsNil, Commentf("testcase: %s", tc.name)) + } + } +} + +func Benchmark1000KB100Aliases(b *testing.B) { + benchmark(b, "1000kb of maps with 100 aliases") +} +func Benchmark1000KBDeeplyNestedSlices(b *testing.B) { + benchmark(b, "1000kb of deeply nested slices") +} +func Benchmark1000KBDeeplyNestedMaps(b *testing.B) { + benchmark(b, "1000kb of deeply nested maps") +} +func Benchmark1000KBDeeplyNestedIndents(b *testing.B) { + benchmark(b, "1000kb of deeply nested indents") +} +func Benchmark1000KB1000IndentLines(b *testing.B) { + benchmark(b, "1000kb of 1000-indent lines") +} +func Benchmark1KBMaps(b *testing.B) { + benchmark(b, "1kb of maps") +} +func Benchmark10KBMaps(b *testing.B) { + benchmark(b, "10kb of maps") +} +func Benchmark100KBMaps(b *testing.B) { + benchmark(b, "100kb of maps") +} +func Benchmark1000KBMaps(b *testing.B) { + benchmark(b, "1000kb of maps") +} + +func BenchmarkDeepSlice(b *testing.B) { + benchmark(b, "1000kb slice nested at max-depth") +} + +func BenchmarkDeepFlow(b *testing.B) { + benchmark(b, "1000kb slice nested in maps at max-depth") +} + +func Benchmark1000KBMaxDepthNested(b *testing.B) { + benchmark(b, "1000kb of 10000-nested lines") +} + +func benchmark(b *testing.B, name string) { + for _, t := range limitTests { + if t.name != name { + continue + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + var v interface{} + err := yaml.Unmarshal(t.data, &v) + if len(t.error) > 0 { + if err == nil { + b.Errorf("expected error, got none") + } else if err.Error() != t.error { + b.Errorf("expected error '%s', got '%s'", t.error, err.Error()) + } + } else { + if err != nil { + b.Errorf("unexpected error: %v", err) + } + } + } + + return + } + + b.Errorf("testcase %q not found", name) +} diff --git a/goyaml.v2/parserc.go b/goyaml.v2/parserc.go new file mode 100644 index 0000000..81d05df --- /dev/null +++ b/goyaml.v2/parserc.go @@ -0,0 +1,1095 @@ +package yaml + +import ( + "bytes" +) + +// The parser implements the following grammar: +// +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// implicit_document ::= block_node DOCUMENT-END* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// block_node_or_indentless_sequence ::= +// ALIAS +// | properties (block_content | indentless_block_sequence)? +// | block_content +// | indentless_block_sequence +// block_node ::= ALIAS +// | properties block_content? +// | block_content +// flow_node ::= ALIAS +// | properties flow_content? +// | flow_content +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// block_content ::= block_collection | flow_collection | SCALAR +// flow_content ::= flow_collection | SCALAR +// block_collection ::= block_sequence | block_mapping +// flow_collection ::= flow_sequence | flow_mapping +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// block_mapping ::= BLOCK-MAPPING_START +// ((KEY block_node_or_indentless_sequence?)? +// (VALUE block_node_or_indentless_sequence?)?)* +// BLOCK-END +// flow_sequence ::= FLOW-SEQUENCE-START +// (flow_sequence_entry FLOW-ENTRY)* +// flow_sequence_entry? +// FLOW-SEQUENCE-END +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// flow_mapping ::= FLOW-MAPPING-START +// (flow_mapping_entry FLOW-ENTRY)* +// flow_mapping_entry? +// FLOW-MAPPING-END +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + +// Peek the next token in the token queue. +func peek_token(parser *yaml_parser_t) *yaml_token_t { + if parser.token_available || yaml_parser_fetch_more_tokens(parser) { + return &parser.tokens[parser.tokens_head] + } + return nil +} + +// Remove the next token from the queue (must be called after peek_token). +func skip_token(parser *yaml_parser_t) { + parser.token_available = false + parser.tokens_parsed++ + parser.stream_end_produced = parser.tokens[parser.tokens_head].typ == yaml_STREAM_END_TOKEN + parser.tokens_head++ +} + +// Get the next event. +func yaml_parser_parse(parser *yaml_parser_t, event *yaml_event_t) bool { + // Erase the event object. + *event = yaml_event_t{} + + // No events after the end of the stream or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR || parser.state == yaml_PARSE_END_STATE { + return true + } + + // Generate the next event. + return yaml_parser_state_machine(parser, event) +} + +// Set parser error. +func yaml_parser_set_parser_error(parser *yaml_parser_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +func yaml_parser_set_parser_error_context(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +// State dispatcher. +func yaml_parser_state_machine(parser *yaml_parser_t, event *yaml_event_t) bool { + //trace("yaml_parser_state_machine", "state:", parser.state.String()) + + switch parser.state { + case yaml_PARSE_STREAM_START_STATE: + return yaml_parser_parse_stream_start(parser, event) + + case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, true) + + case yaml_PARSE_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, false) + + case yaml_PARSE_DOCUMENT_CONTENT_STATE: + return yaml_parser_parse_document_content(parser, event) + + case yaml_PARSE_DOCUMENT_END_STATE: + return yaml_parser_parse_document_end(parser, event) + + case yaml_PARSE_BLOCK_NODE_STATE: + return yaml_parser_parse_node(parser, event, true, false) + + case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return yaml_parser_parse_node(parser, event, true, true) + + case yaml_PARSE_FLOW_NODE_STATE: + return yaml_parser_parse_node(parser, event, false, false) + + case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, true) + + case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, false) + + case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_indentless_sequence_entry(parser, event) + + case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, true) + + case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, false) + + case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: + return yaml_parser_parse_block_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, true) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, false) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_key(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_end(parser, event) + + case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, true) + + case yaml_PARSE_FLOW_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, true) + + default: + panic("invalid parser state") + } +} + +// Parse the production: +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// ************ +func yaml_parser_parse_stream_start(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_STREAM_START_TOKEN { + return yaml_parser_set_parser_error(parser, "did not find expected ", token.start_mark) + } + parser.state = yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + encoding: token.encoding, + } + skip_token(parser) + return true +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// * +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// ************************* +func yaml_parser_parse_document_start(parser *yaml_parser_t, event *yaml_event_t, implicit bool) bool { + + token := peek_token(parser) + if token == nil { + return false + } + + // Parse extra document end indicators. + if !implicit { + for token.typ == yaml_DOCUMENT_END_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + if implicit && token.typ != yaml_VERSION_DIRECTIVE_TOKEN && + token.typ != yaml_TAG_DIRECTIVE_TOKEN && + token.typ != yaml_DOCUMENT_START_TOKEN && + token.typ != yaml_STREAM_END_TOKEN { + // Parse an implicit document. + if !yaml_parser_process_directives(parser, nil, nil) { + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_BLOCK_NODE_STATE + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + } else if token.typ != yaml_STREAM_END_TOKEN { + // Parse an explicit document. + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + start_mark := token.start_mark + if !yaml_parser_process_directives(parser, &version_directive, &tag_directives) { + return false + } + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_DOCUMENT_START_TOKEN { + yaml_parser_set_parser_error(parser, + "did not find expected ", token.start_mark) + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_DOCUMENT_CONTENT_STATE + end_mark := token.end_mark + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + version_directive: version_directive, + tag_directives: tag_directives, + implicit: false, + } + skip_token(parser) + + } else { + // Parse the stream end. + parser.state = yaml_PARSE_END_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + } + + return true +} + +// Parse the productions: +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// *********** +// +func yaml_parser_parse_document_content(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN || + token.typ == yaml_TAG_DIRECTIVE_TOKEN || + token.typ == yaml_DOCUMENT_START_TOKEN || + token.typ == yaml_DOCUMENT_END_TOKEN || + token.typ == yaml_STREAM_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + return yaml_parser_process_empty_scalar(parser, event, + token.start_mark) + } + return yaml_parser_parse_node(parser, event, true, false) +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// ************* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// +func yaml_parser_parse_document_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + start_mark := token.start_mark + end_mark := token.start_mark + + implicit := true + if token.typ == yaml_DOCUMENT_END_TOKEN { + end_mark = token.end_mark + skip_token(parser) + implicit = false + } + + parser.tag_directives = parser.tag_directives[:0] + + parser.state = yaml_PARSE_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_DOCUMENT_END_EVENT, + start_mark: start_mark, + end_mark: end_mark, + implicit: implicit, + } + return true +} + +// Parse the productions: +// block_node_or_indentless_sequence ::= +// ALIAS +// ***** +// | properties (block_content | indentless_block_sequence)? +// ********** * +// | block_content | indentless_block_sequence +// * +// block_node ::= ALIAS +// ***** +// | properties block_content? +// ********** * +// | block_content +// * +// flow_node ::= ALIAS +// ***** +// | properties flow_content? +// ********** * +// | flow_content +// * +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// ************************* +// block_content ::= block_collection | flow_collection | SCALAR +// ****** +// flow_content ::= flow_collection | SCALAR +// ****** +func yaml_parser_parse_node(parser *yaml_parser_t, event *yaml_event_t, block, indentless_sequence bool) bool { + //defer trace("yaml_parser_parse_node", "block:", block, "indentless_sequence:", indentless_sequence)() + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_ALIAS_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + *event = yaml_event_t{ + typ: yaml_ALIAS_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + anchor: token.value, + } + skip_token(parser) + return true + } + + start_mark := token.start_mark + end_mark := token.start_mark + + var tag_token bool + var tag_handle, tag_suffix, anchor []byte + var tag_mark yaml_mark_t + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.value + start_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.value + tag_suffix = token.suffix + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } else if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.value + tag_suffix = token.suffix + start_mark = token.start_mark + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.value + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + var tag []byte + if tag_token { + if len(tag_handle) == 0 { + tag = tag_suffix + tag_suffix = nil + } else { + for i := range parser.tag_directives { + if bytes.Equal(parser.tag_directives[i].handle, tag_handle) { + tag = append([]byte(nil), parser.tag_directives[i].prefix...) + tag = append(tag, tag_suffix...) + break + } + } + if len(tag) == 0 { + yaml_parser_set_parser_error_context(parser, + "while parsing a node", start_mark, + "found undefined tag handle", tag_mark) + return false + } + } + } + + implicit := len(tag) == 0 + if indentless_sequence && token.typ == yaml_BLOCK_ENTRY_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), + } + return true + } + if token.typ == yaml_SCALAR_TOKEN { + var plain_implicit, quoted_implicit bool + end_mark = token.end_mark + if (len(tag) == 0 && token.style == yaml_PLAIN_SCALAR_STYLE) || (len(tag) == 1 && tag[0] == '!') { + plain_implicit = true + } else if len(tag) == 0 { + quoted_implicit = true + } + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + value: token.value, + implicit: plain_implicit, + quoted_implicit: quoted_implicit, + style: yaml_style_t(token.style), + } + skip_token(parser) + return true + } + if token.typ == yaml_FLOW_SEQUENCE_START_TOKEN { + // [Go] Some of the events below can be merged as they differ only on style. + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_FLOW_SEQUENCE_STYLE), + } + return true + } + if token.typ == yaml_FLOW_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), + } + return true + } + if block && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), + } + return true + } + if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_MAPPING_STYLE), + } + return true + } + if len(anchor) > 0 || len(tag) > 0 { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + quoted_implicit: false, + style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), + } + return true + } + + context := "while parsing a flow node" + if block { + context = "while parsing a block node" + } + yaml_parser_set_parser_error_context(parser, context, start_mark, + "did not find expected node content", token.start_mark) + return false +} + +// Parse the productions: +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// ******************** *********** * ********* +// +func yaml_parser_parse_block_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } else { + parser.state = yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } + if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block collection", context_mark, + "did not find expected '-' indicator", token.start_mark) +} + +// Parse the productions: +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// *********** * +func yaml_parser_parse_indentless_sequence_entry(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && + token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be token.end_mark? + } + return true +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// ******************* +// ((KEY block_node_or_indentless_sequence?)? +// *** * +// (VALUE block_node_or_indentless_sequence?)?)* +// +// BLOCK-END +// ********* +// +func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_KEY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } else { + parser.state = yaml_PARSE_BLOCK_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } else if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block mapping", context_mark, + "did not find expected key", token.start_mark) +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// +// ((KEY block_node_or_indentless_sequence?)? +// +// (VALUE block_node_or_indentless_sequence?)?)* +// ***** * +// BLOCK-END +// +// +func yaml_parser_parse_block_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Parse the productions: +// flow_sequence ::= FLOW-SEQUENCE-START +// ******************* +// (flow_sequence_entry FLOW-ENTRY)* +// * ********** +// flow_sequence_entry? +// * +// FLOW-SEQUENCE-END +// ***************** +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow sequence", context_mark, + "did not find expected ',' or ']'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + implicit: true, + style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), + } + skip_token(parser) + return true + } else if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + skip_token(parser) + return true +} + +// +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// *** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_key(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + mark := token.end_mark + skip_token(parser) + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// ***** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry_mapping_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be end_mark? + } + return true +} + +// Parse the productions: +// flow_mapping ::= FLOW-MAPPING-START +// ****************** +// (flow_mapping_entry FLOW-ENTRY)* +// * ********** +// flow_mapping_entry? +// ****************** +// FLOW-MAPPING-END +// **************** +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * *** * +// +func yaml_parser_parse_flow_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow mapping", context_mark, + "did not find expected ',' or '}'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } else { + parser.state = yaml_PARSE_FLOW_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + } else if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + return true +} + +// Parse the productions: +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * ***** * +// +func yaml_parser_parse_flow_mapping_value(parser *yaml_parser_t, event *yaml_event_t, empty bool) bool { + token := peek_token(parser) + if token == nil { + return false + } + if empty { + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Generate an empty scalar event. +func yaml_parser_process_empty_scalar(parser *yaml_parser_t, event *yaml_event_t, mark yaml_mark_t) bool { + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: mark, + end_mark: mark, + value: nil, // Empty + implicit: true, + style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), + } + return true +} + +var default_tag_directives = []yaml_tag_directive_t{ + {[]byte("!"), []byte("!")}, + {[]byte("!!"), []byte("tag:yaml.org,2002:")}, +} + +// Parse directives. +func yaml_parser_process_directives(parser *yaml_parser_t, + version_directive_ref **yaml_version_directive_t, + tag_directives_ref *[]yaml_tag_directive_t) bool { + + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + + token := peek_token(parser) + if token == nil { + return false + } + + for token.typ == yaml_VERSION_DIRECTIVE_TOKEN || token.typ == yaml_TAG_DIRECTIVE_TOKEN { + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN { + if version_directive != nil { + yaml_parser_set_parser_error(parser, + "found duplicate %YAML directive", token.start_mark) + return false + } + if token.major != 1 || token.minor != 1 { + yaml_parser_set_parser_error(parser, + "found incompatible YAML document", token.start_mark) + return false + } + version_directive = &yaml_version_directive_t{ + major: token.major, + minor: token.minor, + } + } else if token.typ == yaml_TAG_DIRECTIVE_TOKEN { + value := yaml_tag_directive_t{ + handle: token.value, + prefix: token.prefix, + } + if !yaml_parser_append_tag_directive(parser, value, false, token.start_mark) { + return false + } + tag_directives = append(tag_directives, value) + } + + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + + for i := range default_tag_directives { + if !yaml_parser_append_tag_directive(parser, default_tag_directives[i], true, token.start_mark) { + return false + } + } + + if version_directive_ref != nil { + *version_directive_ref = version_directive + } + if tag_directives_ref != nil { + *tag_directives_ref = tag_directives + } + return true +} + +// Append a tag directive to the directives stack. +func yaml_parser_append_tag_directive(parser *yaml_parser_t, value yaml_tag_directive_t, allow_duplicates bool, mark yaml_mark_t) bool { + for i := range parser.tag_directives { + if bytes.Equal(value.handle, parser.tag_directives[i].handle) { + if allow_duplicates { + return true + } + return yaml_parser_set_parser_error(parser, "found duplicate %TAG directive", mark) + } + } + + // [Go] I suspect the copy is unnecessary. This was likely done + // because there was no way to track ownership of the data. + value_copy := yaml_tag_directive_t{ + handle: make([]byte, len(value.handle)), + prefix: make([]byte, len(value.prefix)), + } + copy(value_copy.handle, value.handle) + copy(value_copy.prefix, value.prefix) + parser.tag_directives = append(parser.tag_directives, value_copy) + return true +} diff --git a/goyaml.v2/readerc.go b/goyaml.v2/readerc.go new file mode 100644 index 0000000..7c1f5fa --- /dev/null +++ b/goyaml.v2/readerc.go @@ -0,0 +1,412 @@ +package yaml + +import ( + "io" +) + +// Set the reader error and return 0. +func yaml_parser_set_reader_error(parser *yaml_parser_t, problem string, offset int, value int) bool { + parser.error = yaml_READER_ERROR + parser.problem = problem + parser.problem_offset = offset + parser.problem_value = value + return false +} + +// Byte order marks. +const ( + bom_UTF8 = "\xef\xbb\xbf" + bom_UTF16LE = "\xff\xfe" + bom_UTF16BE = "\xfe\xff" +) + +// Determine the input stream encoding by checking the BOM symbol. If no BOM is +// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. +func yaml_parser_determine_encoding(parser *yaml_parser_t) bool { + // Ensure that we had enough bytes in the raw buffer. + for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 { + if !yaml_parser_update_raw_buffer(parser) { + return false + } + } + + // Determine the encoding. + buf := parser.raw_buffer + pos := parser.raw_buffer_pos + avail := len(buf) - pos + if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] { + parser.encoding = yaml_UTF16LE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] { + parser.encoding = yaml_UTF16BE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] { + parser.encoding = yaml_UTF8_ENCODING + parser.raw_buffer_pos += 3 + parser.offset += 3 + } else { + parser.encoding = yaml_UTF8_ENCODING + } + return true +} + +// Update the raw buffer. +func yaml_parser_update_raw_buffer(parser *yaml_parser_t) bool { + size_read := 0 + + // Return if the raw buffer is full. + if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) { + return true + } + + // Return on EOF. + if parser.eof { + return true + } + + // Move the remaining bytes in the raw buffer to the beginning. + if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) { + copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:]) + } + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos] + parser.raw_buffer_pos = 0 + + // Call the read handler to fill the buffer. + size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)]) + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read] + if err == io.EOF { + parser.eof = true + } else if err != nil { + return yaml_parser_set_reader_error(parser, "input error: "+err.Error(), parser.offset, -1) + } + return true +} + +// Ensure that the buffer contains at least `length` characters. +// Return true on success, false on failure. +// +// The length is supposed to be significantly less that the buffer size. +func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { + if parser.read_handler == nil { + panic("read handler must be set") + } + + // [Go] This function was changed to guarantee the requested length size at EOF. + // The fact we need to do this is pretty awful, but the description above implies + // for that to be the case, and there are tests + + // If the EOF flag is set and the raw buffer is empty, do nothing. + if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) { + // [Go] ACTUALLY! Read the documentation of this function above. + // This is just broken. To return true, we need to have the + // given length in the buffer. Not doing that means every single + // check that calls this function to make sure the buffer has a + // given length is Go) panicking; or C) accessing invalid memory. + //return true + } + + // Return if the buffer contains enough characters. + if parser.unread >= length { + return true + } + + // Determine the input encoding if it is not known yet. + if parser.encoding == yaml_ANY_ENCODING { + if !yaml_parser_determine_encoding(parser) { + return false + } + } + + // Move the unread characters to the beginning of the buffer. + buffer_len := len(parser.buffer) + if parser.buffer_pos > 0 && parser.buffer_pos < buffer_len { + copy(parser.buffer, parser.buffer[parser.buffer_pos:]) + buffer_len -= parser.buffer_pos + parser.buffer_pos = 0 + } else if parser.buffer_pos == buffer_len { + buffer_len = 0 + parser.buffer_pos = 0 + } + + // Open the whole buffer for writing, and cut it before returning. + parser.buffer = parser.buffer[:cap(parser.buffer)] + + // Fill the buffer until it has enough characters. + first := true + for parser.unread < length { + + // Fill the raw buffer if necessary. + if !first || parser.raw_buffer_pos == len(parser.raw_buffer) { + if !yaml_parser_update_raw_buffer(parser) { + parser.buffer = parser.buffer[:buffer_len] + return false + } + } + first = false + + // Decode the raw buffer. + inner: + for parser.raw_buffer_pos != len(parser.raw_buffer) { + var value rune + var width int + + raw_unread := len(parser.raw_buffer) - parser.raw_buffer_pos + + // Decode the next character. + switch parser.encoding { + case yaml_UTF8_ENCODING: + // Decode a UTF-8 character. Check RFC 3629 + // (http://www.ietf.org/rfc/rfc3629.txt) for more details. + // + // The following table (taken from the RFC) is used for + // decoding. + // + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // --------------------+------------------------------------ + // 0000 0000-0000 007F | 0xxxxxxx + // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Additionally, the characters in the range 0xD800-0xDFFF + // are prohibited as they are reserved for use with UTF-16 + // surrogate pairs. + + // Determine the length of the UTF-8 sequence. + octet := parser.raw_buffer[parser.raw_buffer_pos] + switch { + case octet&0x80 == 0x00: + width = 1 + case octet&0xE0 == 0xC0: + width = 2 + case octet&0xF0 == 0xE0: + width = 3 + case octet&0xF8 == 0xF0: + width = 4 + default: + // The leading octet is invalid. + return yaml_parser_set_reader_error(parser, + "invalid leading UTF-8 octet", + parser.offset, int(octet)) + } + + // Check if the raw buffer contains an incomplete character. + if width > raw_unread { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-8 octet sequence", + parser.offset, -1) + } + break inner + } + + // Decode the leading octet. + switch { + case octet&0x80 == 0x00: + value = rune(octet & 0x7F) + case octet&0xE0 == 0xC0: + value = rune(octet & 0x1F) + case octet&0xF0 == 0xE0: + value = rune(octet & 0x0F) + case octet&0xF8 == 0xF0: + value = rune(octet & 0x07) + default: + value = 0 + } + + // Check and decode the trailing octets. + for k := 1; k < width; k++ { + octet = parser.raw_buffer[parser.raw_buffer_pos+k] + + // Check if the octet is valid. + if (octet & 0xC0) != 0x80 { + return yaml_parser_set_reader_error(parser, + "invalid trailing UTF-8 octet", + parser.offset+k, int(octet)) + } + + // Decode the octet. + value = (value << 6) + rune(octet&0x3F) + } + + // Check the length of the sequence against the value. + switch { + case width == 1: + case width == 2 && value >= 0x80: + case width == 3 && value >= 0x800: + case width == 4 && value >= 0x10000: + default: + return yaml_parser_set_reader_error(parser, + "invalid length of a UTF-8 sequence", + parser.offset, -1) + } + + // Check the range of the value. + if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF { + return yaml_parser_set_reader_error(parser, + "invalid Unicode character", + parser.offset, int(value)) + } + + case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING: + var low, high int + if parser.encoding == yaml_UTF16LE_ENCODING { + low, high = 0, 1 + } else { + low, high = 1, 0 + } + + // The UTF-16 encoding is not as simple as one might + // naively think. Check RFC 2781 + // (http://www.ietf.org/rfc/rfc2781.txt). + // + // Normally, two subsequent bytes describe a Unicode + // character. However a special technique (called a + // surrogate pair) is used for specifying character + // values larger than 0xFFFF. + // + // A surrogate pair consists of two pseudo-characters: + // high surrogate area (0xD800-0xDBFF) + // low surrogate area (0xDC00-0xDFFF) + // + // The following formulas are used for decoding + // and encoding characters using surrogate pairs: + // + // U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) + // U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) + // W1 = 110110yyyyyyyyyy + // W2 = 110111xxxxxxxxxx + // + // where U is the character value, W1 is the high surrogate + // area, W2 is the low surrogate area. + + // Check for incomplete UTF-16 character. + if raw_unread < 2 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 character", + parser.offset, -1) + } + break inner + } + + // Get the character. + value = rune(parser.raw_buffer[parser.raw_buffer_pos+low]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high]) << 8) + + // Check for unexpected low surrogate area. + if value&0xFC00 == 0xDC00 { + return yaml_parser_set_reader_error(parser, + "unexpected low surrogate area", + parser.offset, int(value)) + } + + // Check for a high surrogate area. + if value&0xFC00 == 0xD800 { + width = 4 + + // Check for incomplete surrogate pair. + if raw_unread < 4 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 surrogate pair", + parser.offset, -1) + } + break inner + } + + // Get the next character. + value2 := rune(parser.raw_buffer[parser.raw_buffer_pos+low+2]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high+2]) << 8) + + // Check for a low surrogate area. + if value2&0xFC00 != 0xDC00 { + return yaml_parser_set_reader_error(parser, + "expected low surrogate area", + parser.offset+2, int(value2)) + } + + // Generate the value of the surrogate pair. + value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF) + } else { + width = 2 + } + + default: + panic("impossible") + } + + // Check if the character is in the allowed range: + // #x9 | #xA | #xD | [#x20-#x7E] (8 bit) + // | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) + // | [#x10000-#x10FFFF] (32 bit) + switch { + case value == 0x09: + case value == 0x0A: + case value == 0x0D: + case value >= 0x20 && value <= 0x7E: + case value == 0x85: + case value >= 0xA0 && value <= 0xD7FF: + case value >= 0xE000 && value <= 0xFFFD: + case value >= 0x10000 && value <= 0x10FFFF: + default: + return yaml_parser_set_reader_error(parser, + "control characters are not allowed", + parser.offset, int(value)) + } + + // Move the raw pointers. + parser.raw_buffer_pos += width + parser.offset += width + + // Finally put the character into the buffer. + if value <= 0x7F { + // 0000 0000-0000 007F . 0xxxxxxx + parser.buffer[buffer_len+0] = byte(value) + buffer_len += 1 + } else if value <= 0x7FF { + // 0000 0080-0000 07FF . 110xxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6)) + parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F)) + buffer_len += 2 + } else if value <= 0xFFFF { + // 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12)) + parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F)) + buffer_len += 3 + } else { + // 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18)) + parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F)) + parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F)) + buffer_len += 4 + } + + parser.unread++ + } + + // On EOF, put NUL into the buffer and return. + if parser.eof { + parser.buffer[buffer_len] = 0 + buffer_len++ + parser.unread++ + break + } + } + // [Go] Read the documentation of this function above. To return true, + // we need to have the given length in the buffer. Not doing that means + // every single check that calls this function to make sure the buffer + // has a given length is Go) panicking; or C) accessing invalid memory. + // This happens here due to the EOF above breaking early. + for buffer_len < length { + parser.buffer[buffer_len] = 0 + buffer_len++ + } + parser.buffer = parser.buffer[:buffer_len] + return true +} diff --git a/goyaml.v2/resolve.go b/goyaml.v2/resolve.go new file mode 100644 index 0000000..4120e0c --- /dev/null +++ b/goyaml.v2/resolve.go @@ -0,0 +1,258 @@ +package yaml + +import ( + "encoding/base64" + "math" + "regexp" + "strconv" + "strings" + "time" +) + +type resolveMapItem struct { + value interface{} + tag string +} + +var resolveTable = make([]byte, 256) +var resolveMap = make(map[string]resolveMapItem) + +func init() { + t := resolveTable + t[int('+')] = 'S' // Sign + t[int('-')] = 'S' + for _, c := range "0123456789" { + t[int(c)] = 'D' // Digit + } + for _, c := range "yYnNtTfFoO~" { + t[int(c)] = 'M' // In map + } + t[int('.')] = '.' // Float (potentially in map) + + var resolveMapList = []struct { + v interface{} + tag string + l []string + }{ + {true, yaml_BOOL_TAG, []string{"y", "Y", "yes", "Yes", "YES"}}, + {true, yaml_BOOL_TAG, []string{"true", "True", "TRUE"}}, + {true, yaml_BOOL_TAG, []string{"on", "On", "ON"}}, + {false, yaml_BOOL_TAG, []string{"n", "N", "no", "No", "NO"}}, + {false, yaml_BOOL_TAG, []string{"false", "False", "FALSE"}}, + {false, yaml_BOOL_TAG, []string{"off", "Off", "OFF"}}, + {nil, yaml_NULL_TAG, []string{"", "~", "null", "Null", "NULL"}}, + {math.NaN(), yaml_FLOAT_TAG, []string{".nan", ".NaN", ".NAN"}}, + {math.Inf(+1), yaml_FLOAT_TAG, []string{".inf", ".Inf", ".INF"}}, + {math.Inf(+1), yaml_FLOAT_TAG, []string{"+.inf", "+.Inf", "+.INF"}}, + {math.Inf(-1), yaml_FLOAT_TAG, []string{"-.inf", "-.Inf", "-.INF"}}, + {"<<", yaml_MERGE_TAG, []string{"<<"}}, + } + + m := resolveMap + for _, item := range resolveMapList { + for _, s := range item.l { + m[s] = resolveMapItem{item.v, item.tag} + } + } +} + +const longTagPrefix = "tag:yaml.org,2002:" + +func shortTag(tag string) string { + // TODO This can easily be made faster and produce less garbage. + if strings.HasPrefix(tag, longTagPrefix) { + return "!!" + tag[len(longTagPrefix):] + } + return tag +} + +func longTag(tag string) string { + if strings.HasPrefix(tag, "!!") { + return longTagPrefix + tag[2:] + } + return tag +} + +func resolvableTag(tag string) bool { + switch tag { + case "", yaml_STR_TAG, yaml_BOOL_TAG, yaml_INT_TAG, yaml_FLOAT_TAG, yaml_NULL_TAG, yaml_TIMESTAMP_TAG: + return true + } + return false +} + +var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`) + +func resolve(tag string, in string) (rtag string, out interface{}) { + if !resolvableTag(tag) { + return tag, in + } + + defer func() { + switch tag { + case "", rtag, yaml_STR_TAG, yaml_BINARY_TAG: + return + case yaml_FLOAT_TAG: + if rtag == yaml_INT_TAG { + switch v := out.(type) { + case int64: + rtag = yaml_FLOAT_TAG + out = float64(v) + return + case int: + rtag = yaml_FLOAT_TAG + out = float64(v) + return + } + } + } + failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag)) + }() + + // Any data is accepted as a !!str or !!binary. + // Otherwise, the prefix is enough of a hint about what it might be. + hint := byte('N') + if in != "" { + hint = resolveTable[in[0]] + } + if hint != 0 && tag != yaml_STR_TAG && tag != yaml_BINARY_TAG { + // Handle things we can lookup in a map. + if item, ok := resolveMap[in]; ok { + return item.tag, item.value + } + + // Base 60 floats are a bad idea, were dropped in YAML 1.2, and + // are purposefully unsupported here. They're still quoted on + // the way out for compatibility with other parser, though. + + switch hint { + case 'M': + // We've already checked the map above. + + case '.': + // Not in the map, so maybe a normal float. + floatv, err := strconv.ParseFloat(in, 64) + if err == nil { + return yaml_FLOAT_TAG, floatv + } + + case 'D', 'S': + // Int, float, or timestamp. + // Only try values as a timestamp if the value is unquoted or there's an explicit + // !!timestamp tag. + if tag == "" || tag == yaml_TIMESTAMP_TAG { + t, ok := parseTimestamp(in) + if ok { + return yaml_TIMESTAMP_TAG, t + } + } + + plain := strings.Replace(in, "_", "", -1) + intv, err := strconv.ParseInt(plain, 0, 64) + if err == nil { + if intv == int64(int(intv)) { + return yaml_INT_TAG, int(intv) + } else { + return yaml_INT_TAG, intv + } + } + uintv, err := strconv.ParseUint(plain, 0, 64) + if err == nil { + return yaml_INT_TAG, uintv + } + if yamlStyleFloat.MatchString(plain) { + floatv, err := strconv.ParseFloat(plain, 64) + if err == nil { + return yaml_FLOAT_TAG, floatv + } + } + if strings.HasPrefix(plain, "0b") { + intv, err := strconv.ParseInt(plain[2:], 2, 64) + if err == nil { + if intv == int64(int(intv)) { + return yaml_INT_TAG, int(intv) + } else { + return yaml_INT_TAG, intv + } + } + uintv, err := strconv.ParseUint(plain[2:], 2, 64) + if err == nil { + return yaml_INT_TAG, uintv + } + } else if strings.HasPrefix(plain, "-0b") { + intv, err := strconv.ParseInt("-" + plain[3:], 2, 64) + if err == nil { + if true || intv == int64(int(intv)) { + return yaml_INT_TAG, int(intv) + } else { + return yaml_INT_TAG, intv + } + } + } + default: + panic("resolveTable item not yet handled: " + string(rune(hint)) + " (with " + in + ")") + } + } + return yaml_STR_TAG, in +} + +// encodeBase64 encodes s as base64 that is broken up into multiple lines +// as appropriate for the resulting length. +func encodeBase64(s string) string { + const lineLen = 70 + encLen := base64.StdEncoding.EncodedLen(len(s)) + lines := encLen/lineLen + 1 + buf := make([]byte, encLen*2+lines) + in := buf[0:encLen] + out := buf[encLen:] + base64.StdEncoding.Encode(in, []byte(s)) + k := 0 + for i := 0; i < len(in); i += lineLen { + j := i + lineLen + if j > len(in) { + j = len(in) + } + k += copy(out[k:], in[i:j]) + if lines > 1 { + out[k] = '\n' + k++ + } + } + return string(out[:k]) +} + +// This is a subset of the formats allowed by the regular expression +// defined at http://yaml.org/type/timestamp.html. +var allowedTimestampFormats = []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + // Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5" + // from the set of examples. +} + +// parseTimestamp parses s as a timestamp string and +// returns the timestamp and reports whether it succeeded. +// Timestamp formats are defined at http://yaml.org/type/timestamp.html +func parseTimestamp(s string) (time.Time, bool) { + // TODO write code to check all the formats supported by + // http://yaml.org/type/timestamp.html instead of using time.Parse. + + // Quick check: all date formats start with YYYY-. + i := 0 + for ; i < len(s); i++ { + if c := s[i]; c < '0' || c > '9' { + break + } + } + if i != 4 || i == len(s) || s[i] != '-' { + return time.Time{}, false + } + for _, format := range allowedTimestampFormats { + if t, err := time.Parse(format, s); err == nil { + return t, true + } + } + return time.Time{}, false +} diff --git a/goyaml.v2/scannerc.go b/goyaml.v2/scannerc.go new file mode 100644 index 0000000..0b9bb60 --- /dev/null +++ b/goyaml.v2/scannerc.go @@ -0,0 +1,2711 @@ +package yaml + +import ( + "bytes" + "fmt" +) + +// Introduction +// ************ +// +// The following notes assume that you are familiar with the YAML specification +// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in +// some cases we are less restrictive that it requires. +// +// The process of transforming a YAML stream into a sequence of events is +// divided on two steps: Scanning and Parsing. +// +// The Scanner transforms the input stream into a sequence of tokens, while the +// parser transform the sequence of tokens produced by the Scanner into a +// sequence of parsing events. +// +// The Scanner is rather clever and complicated. The Parser, on the contrary, +// is a straightforward implementation of a recursive-descendant parser (or, +// LL(1) parser, as it is usually called). +// +// Actually there are two issues of Scanning that might be called "clever", the +// rest is quite straightforward. The issues are "block collection start" and +// "simple keys". Both issues are explained below in details. +// +// Here the Scanning step is explained and implemented. We start with the list +// of all the tokens produced by the Scanner together with short descriptions. +// +// Now, tokens: +// +// STREAM-START(encoding) # The stream start. +// STREAM-END # The stream end. +// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. +// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. +// DOCUMENT-START # '---' +// DOCUMENT-END # '...' +// BLOCK-SEQUENCE-START # Indentation increase denoting a block +// BLOCK-MAPPING-START # sequence or a block mapping. +// BLOCK-END # Indentation decrease. +// FLOW-SEQUENCE-START # '[' +// FLOW-SEQUENCE-END # ']' +// BLOCK-SEQUENCE-START # '{' +// BLOCK-SEQUENCE-END # '}' +// BLOCK-ENTRY # '-' +// FLOW-ENTRY # ',' +// KEY # '?' or nothing (simple keys). +// VALUE # ':' +// ALIAS(anchor) # '*anchor' +// ANCHOR(anchor) # '&anchor' +// TAG(handle,suffix) # '!handle!suffix' +// SCALAR(value,style) # A scalar. +// +// The following two tokens are "virtual" tokens denoting the beginning and the +// end of the stream: +// +// STREAM-START(encoding) +// STREAM-END +// +// We pass the information about the input stream encoding with the +// STREAM-START token. +// +// The next two tokens are responsible for tags: +// +// VERSION-DIRECTIVE(major,minor) +// TAG-DIRECTIVE(handle,prefix) +// +// Example: +// +// %YAML 1.1 +// %TAG ! !foo +// %TAG !yaml! tag:yaml.org,2002: +// --- +// +// The correspoding sequence of tokens: +// +// STREAM-START(utf-8) +// VERSION-DIRECTIVE(1,1) +// TAG-DIRECTIVE("!","!foo") +// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") +// DOCUMENT-START +// STREAM-END +// +// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole +// line. +// +// The document start and end indicators are represented by: +// +// DOCUMENT-START +// DOCUMENT-END +// +// Note that if a YAML stream contains an implicit document (without '---' +// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be +// produced. +// +// In the following examples, we present whole documents together with the +// produced tokens. +// +// 1. An implicit document: +// +// 'a scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// STREAM-END +// +// 2. An explicit document: +// +// --- +// 'a scalar' +// ... +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// SCALAR("a scalar",single-quoted) +// DOCUMENT-END +// STREAM-END +// +// 3. Several documents in a stream: +// +// 'a scalar' +// --- +// 'another scalar' +// --- +// 'yet another scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// DOCUMENT-START +// SCALAR("another scalar",single-quoted) +// DOCUMENT-START +// SCALAR("yet another scalar",single-quoted) +// STREAM-END +// +// We have already introduced the SCALAR token above. The following tokens are +// used to describe aliases, anchors, tag, and scalars: +// +// ALIAS(anchor) +// ANCHOR(anchor) +// TAG(handle,suffix) +// SCALAR(value,style) +// +// The following series of examples illustrate the usage of these tokens: +// +// 1. A recursive sequence: +// +// &A [ *A ] +// +// Tokens: +// +// STREAM-START(utf-8) +// ANCHOR("A") +// FLOW-SEQUENCE-START +// ALIAS("A") +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A tagged scalar: +// +// !!float "3.14" # A good approximation. +// +// Tokens: +// +// STREAM-START(utf-8) +// TAG("!!","float") +// SCALAR("3.14",double-quoted) +// STREAM-END +// +// 3. Various scalar styles: +// +// --- # Implicit empty plain scalars do not produce tokens. +// --- a plain scalar +// --- 'a single-quoted scalar' +// --- "a double-quoted scalar" +// --- |- +// a literal scalar +// --- >- +// a folded +// scalar +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// DOCUMENT-START +// SCALAR("a plain scalar",plain) +// DOCUMENT-START +// SCALAR("a single-quoted scalar",single-quoted) +// DOCUMENT-START +// SCALAR("a double-quoted scalar",double-quoted) +// DOCUMENT-START +// SCALAR("a literal scalar",literal) +// DOCUMENT-START +// SCALAR("a folded scalar",folded) +// STREAM-END +// +// Now it's time to review collection-related tokens. We will start with +// flow collections: +// +// FLOW-SEQUENCE-START +// FLOW-SEQUENCE-END +// FLOW-MAPPING-START +// FLOW-MAPPING-END +// FLOW-ENTRY +// KEY +// VALUE +// +// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and +// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' +// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the +// indicators '?' and ':', which are used for denoting mapping keys and values, +// are represented by the KEY and VALUE tokens. +// +// The following examples show flow collections: +// +// 1. A flow sequence: +// +// [item 1, item 2, item 3] +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-SEQUENCE-START +// SCALAR("item 1",plain) +// FLOW-ENTRY +// SCALAR("item 2",plain) +// FLOW-ENTRY +// SCALAR("item 3",plain) +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A flow mapping: +// +// { +// a simple key: a value, # Note that the KEY token is produced. +// ? a complex key: another value, +// } +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// FLOW-ENTRY +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// FLOW-ENTRY +// FLOW-MAPPING-END +// STREAM-END +// +// A simple key is a key which is not denoted by the '?' indicator. Note that +// the Scanner still produce the KEY token whenever it encounters a simple key. +// +// For scanning block collections, the following tokens are used (note that we +// repeat KEY and VALUE here): +// +// BLOCK-SEQUENCE-START +// BLOCK-MAPPING-START +// BLOCK-END +// BLOCK-ENTRY +// KEY +// VALUE +// +// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation +// increase that precedes a block collection (cf. the INDENT token in Python). +// The token BLOCK-END denote indentation decrease that ends a block collection +// (cf. the DEDENT token in Python). However YAML has some syntax pecularities +// that makes detections of these tokens more complex. +// +// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators +// '-', '?', and ':' correspondingly. +// +// The following examples show how the tokens BLOCK-SEQUENCE-START, +// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: +// +// 1. Block sequences: +// +// - item 1 +// - item 2 +// - +// - item 3.1 +// - item 3.2 +// - +// key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 3.1",plain) +// BLOCK-ENTRY +// SCALAR("item 3.2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Block mappings: +// +// a simple key: a value # The KEY token is produced here. +// ? a complex key +// : another value +// a mapping: +// key 1: value 1 +// key 2: value 2 +// a sequence: +// - item 1 +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// KEY +// SCALAR("a mapping",plain) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML does not always require to start a new block collection from a new +// line. If the current line contains only '-', '?', and ':' indicators, a new +// block collection may start at the current line. The following examples +// illustrate this case: +// +// 1. Collections in a sequence: +// +// - - item 1 +// - item 2 +// - key 1: value 1 +// key 2: value 2 +// - ? complex key +// : complex value +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("complex key") +// VALUE +// SCALAR("complex value") +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Collections in a mapping: +// +// ? a sequence +// : - item 1 +// - item 2 +// ? a mapping +// : key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// KEY +// SCALAR("a mapping",plain) +// VALUE +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML also permits non-indented sequences if they are included into a block +// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: +// +// key: +// - item 1 # BLOCK-SEQUENCE-START is NOT produced here. +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key",plain) +// VALUE +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// + +// Ensure that the buffer contains the required number of characters. +// Return true on success, false on failure (reader error or memory error). +func cache(parser *yaml_parser_t, length int) bool { + // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) + return parser.unread >= length || yaml_parser_update_buffer(parser, length) +} + +// Advance the buffer pointer. +func skip(parser *yaml_parser_t) { + parser.mark.index++ + parser.mark.column++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) +} + +func skip_line(parser *yaml_parser_t) { + if is_crlf(parser.buffer, parser.buffer_pos) { + parser.mark.index += 2 + parser.mark.column = 0 + parser.mark.line++ + parser.unread -= 2 + parser.buffer_pos += 2 + } else if is_break(parser.buffer, parser.buffer_pos) { + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) + } +} + +// Copy a character to a string buffer and advance pointers. +func read(parser *yaml_parser_t, s []byte) []byte { + w := width(parser.buffer[parser.buffer_pos]) + if w == 0 { + panic("invalid character sequence") + } + if len(s) == 0 { + s = make([]byte, 0, 32) + } + if w == 1 && len(s)+w <= cap(s) { + s = s[:len(s)+1] + s[len(s)-1] = parser.buffer[parser.buffer_pos] + parser.buffer_pos++ + } else { + s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) + parser.buffer_pos += w + } + parser.mark.index++ + parser.mark.column++ + parser.unread-- + return s +} + +// Copy a line break character to a string buffer and advance pointers. +func read_line(parser *yaml_parser_t, s []byte) []byte { + buf := parser.buffer + pos := parser.buffer_pos + switch { + case buf[pos] == '\r' && buf[pos+1] == '\n': + // CR LF . LF + s = append(s, '\n') + parser.buffer_pos += 2 + parser.mark.index++ + parser.unread-- + case buf[pos] == '\r' || buf[pos] == '\n': + // CR|LF . LF + s = append(s, '\n') + parser.buffer_pos += 1 + case buf[pos] == '\xC2' && buf[pos+1] == '\x85': + // NEL . LF + s = append(s, '\n') + parser.buffer_pos += 2 + case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): + // LS|PS . LS|PS + s = append(s, buf[parser.buffer_pos:pos+3]...) + parser.buffer_pos += 3 + default: + return s + } + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + return s +} + +// Get the next token. +func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { + // Erase the token object. + *token = yaml_token_t{} // [Go] Is this necessary? + + // No tokens after STREAM-END or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR { + return true + } + + // Ensure that the tokens queue contains enough tokens. + if !parser.token_available { + if !yaml_parser_fetch_more_tokens(parser) { + return false + } + } + + // Fetch the next token from the queue. + *token = parser.tokens[parser.tokens_head] + parser.tokens_head++ + parser.tokens_parsed++ + parser.token_available = false + + if token.typ == yaml_STREAM_END_TOKEN { + parser.stream_end_produced = true + } + return true +} + +// Set the scanner error and return false. +func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { + parser.error = yaml_SCANNER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = parser.mark + return false +} + +func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { + context := "while parsing a tag" + if directive { + context = "while parsing a %TAG directive" + } + return yaml_parser_set_scanner_error(parser, context, context_mark, problem) +} + +func trace(args ...interface{}) func() { + pargs := append([]interface{}{"+++"}, args...) + fmt.Println(pargs...) + pargs = append([]interface{}{"---"}, args...) + return func() { fmt.Println(pargs...) } +} + +// Ensure that the tokens queue contains at least one token which can be +// returned to the Parser. +func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { + // While we need more tokens to fetch, do it. + for { + if parser.tokens_head != len(parser.tokens) { + // If queue is non-empty, check if any potential simple key may + // occupy the head position. + head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] + if !ok { + break + } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { + return false + } else if !valid { + break + } + } + // Fetch the next token. + if !yaml_parser_fetch_next_token(parser) { + return false + } + } + + parser.token_available = true + return true +} + +// The dispatcher for token fetchers. +func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { + // Ensure that the buffer is initialized. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check if we just started scanning. Fetch STREAM-START then. + if !parser.stream_start_produced { + return yaml_parser_fetch_stream_start(parser) + } + + // Eat whitespaces and comments until we reach the next token. + if !yaml_parser_scan_to_next_token(parser) { + return false + } + + // Check the indentation level against the current column. + if !yaml_parser_unroll_indent(parser, parser.mark.column) { + return false + } + + // Ensure that the buffer contains at least 4 characters. 4 is the length + // of the longest indicators ('--- ' and '... '). + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + + // Is it the end of the stream? + if is_z(parser.buffer, parser.buffer_pos) { + return yaml_parser_fetch_stream_end(parser) + } + + // Is it a directive? + if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { + return yaml_parser_fetch_directive(parser) + } + + buf := parser.buffer + pos := parser.buffer_pos + + // Is it the document start indicator? + if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) + } + + // Is it the document end indicator? + if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) + } + + // Is it the flow sequence start indicator? + if buf[pos] == '[' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) + } + + // Is it the flow mapping start indicator? + if parser.buffer[parser.buffer_pos] == '{' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) + } + + // Is it the flow sequence end indicator? + if parser.buffer[parser.buffer_pos] == ']' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_SEQUENCE_END_TOKEN) + } + + // Is it the flow mapping end indicator? + if parser.buffer[parser.buffer_pos] == '}' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_MAPPING_END_TOKEN) + } + + // Is it the flow entry indicator? + if parser.buffer[parser.buffer_pos] == ',' { + return yaml_parser_fetch_flow_entry(parser) + } + + // Is it the block entry indicator? + if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { + return yaml_parser_fetch_block_entry(parser) + } + + // Is it the key indicator? + if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_key(parser) + } + + // Is it the value indicator? + if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_value(parser) + } + + // Is it an alias? + if parser.buffer[parser.buffer_pos] == '*' { + return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) + } + + // Is it an anchor? + if parser.buffer[parser.buffer_pos] == '&' { + return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) + } + + // Is it a tag? + if parser.buffer[parser.buffer_pos] == '!' { + return yaml_parser_fetch_tag(parser) + } + + // Is it a literal scalar? + if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, true) + } + + // Is it a folded scalar? + if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, false) + } + + // Is it a single-quoted scalar? + if parser.buffer[parser.buffer_pos] == '\'' { + return yaml_parser_fetch_flow_scalar(parser, true) + } + + // Is it a double-quoted scalar? + if parser.buffer[parser.buffer_pos] == '"' { + return yaml_parser_fetch_flow_scalar(parser, false) + } + + // Is it a plain scalar? + // + // A plain scalar may start with any non-blank characters except + // + // '-', '?', ':', ',', '[', ']', '{', '}', + // '#', '&', '*', '!', '|', '>', '\'', '\"', + // '%', '@', '`'. + // + // In the block context (and, for the '-' indicator, in the flow context + // too), it may also start with the characters + // + // '-', '?', ':' + // + // if it is followed by a non-space character. + // + // The last rule is more restrictive than the specification requires. + // [Go] Make this logic more reasonable. + //switch parser.buffer[parser.buffer_pos] { + //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': + //} + if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || + parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || + parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || + (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level == 0 && + (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && + !is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_plain_scalar(parser) + } + + // If we don't determine the token type so far, it is an error. + return yaml_parser_set_scanner_error(parser, + "while scanning for the next token", parser.mark, + "found character that cannot start any token") +} + +func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { + if !simple_key.possible { + return false, true + } + + // The 1.2 specification says: + // + // "If the ? indicator is omitted, parsing needs to see past the + // implicit key to recognize it as such. To limit the amount of + // lookahead required, the “:” indicator must appear at most 1024 + // Unicode characters beyond the start of the key. In addition, the key + // is restricted to a single line." + // + if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { + // Check if the potential simple key to be removed is required. + if simple_key.required { + return false, yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key.mark, + "could not find expected ':'") + } + simple_key.possible = false + return false, true + } + return true, true +} + +// Check if a simple key may start at the current position and add it if +// needed. +func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { + // A simple key is required at the current position if the scanner is in + // the block context and the current column coincides with the indentation + // level. + + required := parser.flow_level == 0 && parser.indent == parser.mark.column + + // + // If the current position may start a simple key, save it. + // + if parser.simple_key_allowed { + simple_key := yaml_simple_key_t{ + possible: true, + required: required, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + } + + if !yaml_parser_remove_simple_key(parser) { + return false + } + parser.simple_keys[len(parser.simple_keys)-1] = simple_key + parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 + } + return true +} + +// Remove a potential simple key at the current flow level. +func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { + i := len(parser.simple_keys) - 1 + if parser.simple_keys[i].possible { + // If the key is required, it is an error. + if parser.simple_keys[i].required { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", parser.simple_keys[i].mark, + "could not find expected ':'") + } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) + } + return true +} + +// max_flow_level limits the flow_level +const max_flow_level = 10000 + +// Increase the flow level and resize the simple key list if needed. +func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { + // Reset the simple key on the next level. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ + possible: false, + required: false, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + }) + + // Increase the flow level. + parser.flow_level++ + if parser.flow_level > max_flow_level { + return yaml_parser_set_scanner_error(parser, + "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_flow_level)) + } + return true +} + +// Decrease the flow level. +func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { + if parser.flow_level > 0 { + parser.flow_level-- + last := len(parser.simple_keys) - 1 + delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) + parser.simple_keys = parser.simple_keys[:last] + } + return true +} + +// max_indents limits the indents stack size +const max_indents = 10000 + +// Push the current indentation level to the stack and set the new level +// the current column is greater than the indentation level. In this case, +// append or insert the specified token into the token queue. +func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + if parser.indent < column { + // Push the current indentation level to the stack and set the new + // indentation level. + parser.indents = append(parser.indents, parser.indent) + parser.indent = column + if len(parser.indents) > max_indents { + return yaml_parser_set_scanner_error(parser, + "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_indents)) + } + + // Create a token and insert it into the queue. + token := yaml_token_t{ + typ: typ, + start_mark: mark, + end_mark: mark, + } + if number > -1 { + number -= parser.tokens_parsed + } + yaml_insert_token(parser, number, &token) + } + return true +} + +// Pop indentation levels from the indents stack until the current level +// becomes less or equal to the column. For each indentation level, append +// the BLOCK-END token. +func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + // Loop through the indentation levels in the stack. + for parser.indent > column { + // Create a token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_END_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + yaml_insert_token(parser, -1, &token) + + // Pop the indentation level. + parser.indent = parser.indents[len(parser.indents)-1] + parser.indents = parser.indents[:len(parser.indents)-1] + } + return true +} + +// Initialize the scanner and produce the STREAM-START token. +func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { + + // Set the initial indentation. + parser.indent = -1 + + // Initialize the simple key stack. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + + parser.simple_keys_by_tok = make(map[int]int) + + // A simple key is allowed at the beginning of the stream. + parser.simple_key_allowed = true + + // We have started. + parser.stream_start_produced = true + + // Create the STREAM-START token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_START_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + encoding: parser.encoding, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the STREAM-END token and shut down the scanner. +func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { + + // Force new line. + if parser.mark.column != 0 { + parser.mark.column = 0 + parser.mark.line++ + } + + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the STREAM-END token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_END_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. +func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. + token := yaml_token_t{} + if !yaml_parser_scan_directive(parser, &token) { + return false + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the DOCUMENT-START or DOCUMENT-END token. +func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Consume the token. + start_mark := parser.mark + + skip(parser) + skip(parser) + skip(parser) + + end_mark := parser.mark + + // Create the DOCUMENT-START or DOCUMENT-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. +func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // The indicators '[' and '{' may start a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // Increase the flow level. + if !yaml_parser_increase_flow_level(parser) { + return false + } + + // A simple key may follow the indicators '[' and '{'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. +func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset any potential simple key on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Decrease the flow level. + if !yaml_parser_decrease_flow_level(parser) { + return false + } + + // No simple keys after the indicators ']' and '}'. + parser.simple_key_allowed = false + + // Consume the token. + + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-ENTRY token. +func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after ','. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_FLOW_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the BLOCK-ENTRY token. +func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { + // Check if the scanner is in the block context. + if parser.flow_level == 0 { + // Check if we are allowed to start a new entry. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "block sequence entries are not allowed in this context") + } + // Add the BLOCK-SEQUENCE-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { + return false + } + } else { + // It is an error for the '-' indicator to occur in the flow context, + // but we let the Parser detect and report about it because the Parser + // is able to point to the context. + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '-'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the BLOCK-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the KEY token. +func yaml_parser_fetch_key(parser *yaml_parser_t) bool { + + // In the block context, additional checks are required. + if parser.flow_level == 0 { + // Check if we are allowed to start a new key (not nessesary simple). + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping keys are not allowed in this context") + } + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '?' in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the KEY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the VALUE token. +func yaml_parser_fetch_value(parser *yaml_parser_t) bool { + + simple_key := &parser.simple_keys[len(parser.simple_keys)-1] + + // Have we found a simple key? + if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { + return false + + } else if valid { + + // Create the KEY token and insert it into the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: simple_key.mark, + end_mark: simple_key.mark, + } + yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) + + // In the block context, we may need to add the BLOCK-MAPPING-START token. + if !yaml_parser_roll_indent(parser, simple_key.mark.column, + simple_key.token_number, + yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { + return false + } + + // Remove the simple key. + simple_key.possible = false + delete(parser.simple_keys_by_tok, simple_key.token_number) + + // A simple key cannot follow another simple key. + parser.simple_key_allowed = false + + } else { + // The ':' indicator follows a complex key. + + // In the block context, extra checks are required. + if parser.flow_level == 0 { + + // Check if we are allowed to start a complex value. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping values are not allowed in this context") + } + + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Simple keys after ':' are allowed in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + } + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the VALUE token and append it to the queue. + token := yaml_token_t{ + typ: yaml_VALUE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the ALIAS or ANCHOR token. +func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // An anchor or an alias could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow an anchor or an alias. + parser.simple_key_allowed = false + + // Create the ALIAS or ANCHOR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_anchor(parser, &token, typ) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the TAG token. +func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { + // A tag could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a tag. + parser.simple_key_allowed = false + + // Create the TAG token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_tag(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. +func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { + // Remove any potential simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // A simple key may follow a block scalar. + parser.simple_key_allowed = true + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_block_scalar(parser, &token, literal) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. +func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_flow_scalar(parser, &token, single) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,plain) token. +func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_plain_scalar(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Eat whitespaces and comments until the next token is found. +func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { + + // Until the next token is not found. + for { + // Allow the BOM mark to start a line. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { + skip(parser) + } + + // Eat whitespaces. + // Tabs are allowed: + // - in the flow context + // - in the block context, but not at the beginning of the line or + // after '-', '?', or ':' (complex value). + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Eat a comment until a line break. + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + } + + // If it is a line break, eat it. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + + // In the block context, a new line may start a simple key. + if parser.flow_level == 0 { + parser.simple_key_allowed = true + } + } else { + break // We have found a token. + } + } + + return true +} + +// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { + // Eat '%'. + start_mark := parser.mark + skip(parser) + + // Scan the directive name. + var name []byte + if !yaml_parser_scan_directive_name(parser, start_mark, &name) { + return false + } + + // Is it a YAML directive? + if bytes.Equal(name, []byte("YAML")) { + // Scan the VERSION directive value. + var major, minor int8 + if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { + return false + } + end_mark := parser.mark + + // Create a VERSION-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_VERSION_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + major: major, + minor: minor, + } + + // Is it a TAG directive? + } else if bytes.Equal(name, []byte("TAG")) { + // Scan the TAG directive value. + var handle, prefix []byte + if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { + return false + } + end_mark := parser.mark + + // Create a TAG-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_TAG_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: handle, + prefix: prefix, + } + + // Unknown directive. + } else { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unknown directive name") + return false + } + + // Eat the rest of the line including any comments. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } + + return true +} + +// Scan the directive name. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^ +// +func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { + // Consume the directive name. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + var s []byte + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the name is empty. + if len(s) == 0 { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "could not find expected directive name") + return false + } + + // Check for an blank character after the name. + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unexpected non-alphabetical character") + return false + } + *name = s + return true +} + +// Scan the value of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^ +func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { + // Eat whitespaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Consume the major version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { + return false + } + + // Eat '.'. + if parser.buffer[parser.buffer_pos] != '.' { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected digit or '.' character") + } + + skip(parser) + + // Consume the minor version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { + return false + } + return true +} + +const max_number_length = 2 + +// Scan the version number of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^ +// %YAML 1.1 # a comment \n +// ^ +func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { + + // Repeat while the next character is digit. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + var value, length int8 + for is_digit(parser.buffer, parser.buffer_pos) { + // Check if the number is too long. + length++ + if length > max_number_length { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "found extremely long version number") + } + value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the number was present. + if length == 0 { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected version number") + } + *number = value + return true +} + +// Scan the value of a TAG-DIRECTIVE token. +// +// Scope: +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { + var handle_value, prefix_value []byte + + // Eat whitespaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Scan a handle. + if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { + return false + } + + // Expect a whitespace. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blank(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace") + return false + } + + // Eat whitespaces. + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Scan a prefix. + if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { + return false + } + + // Expect a whitespace or line break. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace or line break") + return false + } + + *handle = handle_value + *prefix = prefix_value + return true +} + +func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { + var s []byte + + // Eat the indicator character. + start_mark := parser.mark + skip(parser) + + // Consume the value. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + end_mark := parser.mark + + /* + * Check if length of the anchor is greater than 0 and it is followed by + * a whitespace character or one of the indicators: + * + * '?', ':', ',', ']', '}', '%', '@', '`'. + */ + + if len(s) == 0 || + !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || + parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '`') { + context := "while scanning an alias" + if typ == yaml_ANCHOR_TOKEN { + context = "while scanning an anchor" + } + yaml_parser_set_scanner_error(parser, context, start_mark, + "did not find expected alphabetic or numeric character") + return false + } + + // Create a token. + *token = yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + value: s, + } + + return true +} + +/* + * Scan a TAG token. + */ + +func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { + var handle, suffix []byte + + start_mark := parser.mark + + // Check if the tag is in the canonical form. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + if parser.buffer[parser.buffer_pos+1] == '<' { + // Keep the handle as '' + + // Eat '!<' + skip(parser) + skip(parser) + + // Consume the tag value. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + + // Check for '>' and eat it. + if parser.buffer[parser.buffer_pos] != '>' { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find the expected '>'") + return false + } + + skip(parser) + } else { + // The tag has either the '!suffix' or the '!handle!suffix' form. + + // First, try to scan a handle. + if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { + return false + } + + // Check if it is, indeed, handle. + if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { + // Scan the suffix now. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + } else { + // It wasn't a handle after all. Scan the rest of the tag. + if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { + return false + } + + // Set the handle to '!'. + handle = []byte{'!'} + + // A special case: the '!' tag. Set the handle to '' and the + // suffix to '!'. + if len(suffix) == 0 { + handle, suffix = suffix, handle + } + } + } + + // Check the character which ends the tag. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find expected whitespace or line break") + return false + } + + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_TAG_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: handle, + suffix: suffix, + } + return true +} + +// Scan a tag handle. +func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { + // Check the initial '!' character. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] != '!' { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + + var s []byte + + // Copy the '!' character. + s = read(parser, s) + + // Copy all subsequent alphabetical and numerical characters. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the trailing character is '!' and copy it. + if parser.buffer[parser.buffer_pos] == '!' { + s = read(parser, s) + } else { + // It's either the '!' tag or not really a tag handle. If it's a %TAG + // directive, it's an error. If it's a tag token, it must be a part of URI. + if directive && string(s) != "!" { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + } + + *handle = s + return true +} + +// Scan a tag. +func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { + //size_t length = head ? strlen((char *)head) : 0 + var s []byte + hasTag := len(head) > 0 + + // Copy the head if needed. + // + // Note that we don't copy the leading '!' character. + if len(head) > 1 { + s = append(s, head[1:]...) + } + + // Scan the tag. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // The set of characters that may appear in URI is as follows: + // + // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + // '%'. + // [Go] Convert this into more reasonable logic. + for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || + parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || + parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || + parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || + parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || + parser.buffer[parser.buffer_pos] == '%' { + // Check if it is a URI-escape sequence. + if parser.buffer[parser.buffer_pos] == '%' { + if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { + return false + } + } else { + s = read(parser, s) + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + hasTag = true + } + + if !hasTag { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected tag URI") + return false + } + *uri = s + return true +} + +// Decode an URI-escape sequence corresponding to a single UTF-8 character. +func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { + + // Decode the required number of characters. + w := 1024 + for w > 0 { + // Check for a URI-escaped octet. + if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { + return false + } + + if !(parser.buffer[parser.buffer_pos] == '%' && + is_hex(parser.buffer, parser.buffer_pos+1) && + is_hex(parser.buffer, parser.buffer_pos+2)) { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find URI escaped octet") + } + + // Get the octet. + octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) + + // If it is the leading octet, determine the length of the UTF-8 sequence. + if w == 1024 { + w = width(octet) + if w == 0 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect leading UTF-8 octet") + } + } else { + // Check if the trailing octet is correct. + if octet&0xC0 != 0x80 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect trailing UTF-8 octet") + } + } + + // Copy the octet and move the pointers. + *s = append(*s, octet) + skip(parser) + skip(parser) + skip(parser) + w-- + } + return true +} + +// Scan a block scalar. +func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { + // Eat the indicator '|' or '>'. + start_mark := parser.mark + skip(parser) + + // Scan the additional block scalar indicators. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check for a chomping indicator. + var chomping, increment int + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + // Set the chomping method and eat the indicator. + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + + // Check for an indentation indicator. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_digit(parser.buffer, parser.buffer_pos) { + // Check that the indentation is greater than 0. + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an indentation indicator equal to 0") + return false + } + + // Get the indentation level and eat the indicator. + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + } + + } else if is_digit(parser.buffer, parser.buffer_pos) { + // Do the same as above, but in the opposite order. + + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an indentation indicator equal to 0") + return false + } + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + } + } + + // Eat whitespaces and comments to the end of the line. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } + + end_mark := parser.mark + + // Set the indentation level if it was specified. + var indent int + if increment > 0 { + if parser.indent >= 0 { + indent = parser.indent + increment + } else { + indent = increment + } + } + + // Scan the leading line breaks and determine the indentation level if needed. + var s, leading_break, trailing_breaks []byte + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + + // Scan the block scalar content. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + var leading_blank, trailing_blank bool + for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { + // We are at the beginning of a non-empty line. + + // Is it a trailing whitespace? + trailing_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Check if we need to fold the leading line break. + if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { + // Do we need to join the lines by space? + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } + } else { + s = append(s, leading_break...) + } + leading_break = leading_break[:0] + + // Append the remaining line breaks. + s = append(s, trailing_breaks...) + trailing_breaks = trailing_breaks[:0] + + // Is it a leading whitespace? + leading_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Consume the current line. + for !is_breakz(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Consume the line break. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + leading_break = read_line(parser, leading_break) + + // Eat the following indentation spaces and line breaks. + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + } + + // Chomp the tail. + if chomping != -1 { + s = append(s, leading_break...) + } + if chomping == 1 { + s = append(s, trailing_breaks...) + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_LITERAL_SCALAR_STYLE, + } + if !literal { + token.style = yaml_FOLDED_SCALAR_STYLE + } + return true +} + +// Scan indentation spaces and line breaks for a block scalar. Determine the +// indentation level if needed. +func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { + *end_mark = parser.mark + + // Eat the indentation spaces and line breaks. + max_indent := 0 + for { + // Eat the indentation spaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + if parser.mark.column > max_indent { + max_indent = parser.mark.column + } + + // Check for a tab character messing the indentation. + if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { + return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found a tab character where an indentation space is expected") + } + + // Have we found a non-empty line? + if !is_break(parser.buffer, parser.buffer_pos) { + break + } + + // Consume the line break. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + // [Go] Should really be returning breaks instead. + *breaks = read_line(parser, *breaks) + *end_mark = parser.mark + } + + // Determine the indentation level if needed. + if *indent == 0 { + *indent = max_indent + if *indent < parser.indent+1 { + *indent = parser.indent + 1 + } + if *indent < 1 { + *indent = 1 + } + } + return true +} + +// Scan a quoted scalar. +func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { + // Eat the left quote. + start_mark := parser.mark + skip(parser) + + // Consume the content of the quoted scalar. + var s, leading_break, trailing_breaks, whitespaces []byte + for { + // Check that there are no document indicators at the beginning of the line. + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected document indicator") + return false + } + + // Check for EOF. + if is_z(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected end of stream") + return false + } + + // Consume non-blank characters. + leading_blanks := false + for !is_blankz(parser.buffer, parser.buffer_pos) { + if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { + // Is is an escaped single quote. + s = append(s, '\'') + skip(parser) + skip(parser) + + } else if single && parser.buffer[parser.buffer_pos] == '\'' { + // It is a right single quote. + break + } else if !single && parser.buffer[parser.buffer_pos] == '"' { + // It is a right double quote. + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { + // It is an escaped line break. + if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { + return false + } + skip(parser) + skip_line(parser) + leading_blanks = true + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' { + // It is an escape sequence. + code_length := 0 + + // Check the escape character. + switch parser.buffer[parser.buffer_pos+1] { + case '0': + s = append(s, 0) + case 'a': + s = append(s, '\x07') + case 'b': + s = append(s, '\x08') + case 't', '\t': + s = append(s, '\x09') + case 'n': + s = append(s, '\x0A') + case 'v': + s = append(s, '\x0B') + case 'f': + s = append(s, '\x0C') + case 'r': + s = append(s, '\x0D') + case 'e': + s = append(s, '\x1B') + case ' ': + s = append(s, '\x20') + case '"': + s = append(s, '"') + case '\'': + s = append(s, '\'') + case '\\': + s = append(s, '\\') + case 'N': // NEL (#x85) + s = append(s, '\xC2') + s = append(s, '\x85') + case '_': // #xA0 + s = append(s, '\xC2') + s = append(s, '\xA0') + case 'L': // LS (#x2028) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA8') + case 'P': // PS (#x2029) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA9') + case 'x': + code_length = 2 + case 'u': + code_length = 4 + case 'U': + code_length = 8 + default: + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found unknown escape character") + return false + } + + skip(parser) + skip(parser) + + // Consume an arbitrary escape code. + if code_length > 0 { + var value int + + // Scan the character value. + if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { + return false + } + for k := 0; k < code_length; k++ { + if !is_hex(parser.buffer, parser.buffer_pos+k) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "did not find expected hexdecimal number") + return false + } + value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) + } + + // Check the value and write the character. + if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found invalid Unicode character escape code") + return false + } + if value <= 0x7F { + s = append(s, byte(value)) + } else if value <= 0x7FF { + s = append(s, byte(0xC0+(value>>6))) + s = append(s, byte(0x80+(value&0x3F))) + } else if value <= 0xFFFF { + s = append(s, byte(0xE0+(value>>12))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } else { + s = append(s, byte(0xF0+(value>>18))) + s = append(s, byte(0x80+((value>>12)&0x3F))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } + + // Advance the pointer. + for k := 0; k < code_length; k++ { + skip(parser) + } + } + } else { + // It is a non-escaped non-blank character. + s = read(parser, s) + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + } + + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check if we are at the end of the scalar. + if single { + if parser.buffer[parser.buffer_pos] == '\'' { + break + } + } else { + if parser.buffer[parser.buffer_pos] == '"' { + break + } + } + + // Consume blank characters. + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + // Consume a space or a tab character. + if !leading_blanks { + whitespaces = read(parser, whitespaces) + } else { + skip(parser) + } + } else { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + leading_break = read_line(parser, leading_break) + leading_blanks = true + } else { + trailing_breaks = read_line(parser, trailing_breaks) + } + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Join the whitespaces or fold line breaks. + if leading_blanks { + // Do we need to fold line breaks? + if len(leading_break) > 0 && leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Eat the right quote. + skip(parser) + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_SINGLE_QUOTED_SCALAR_STYLE, + } + if !single { + token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + return true +} + +// Scan a plain scalar. +func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { + + var s, leading_break, trailing_breaks, whitespaces []byte + var leading_blanks bool + var indent = parser.indent + 1 + + start_mark := parser.mark + end_mark := parser.mark + + // Consume the content of the plain scalar. + for { + // Check for a document indicator. + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + break + } + + // Check for a comment. + if parser.buffer[parser.buffer_pos] == '#' { + break + } + + // Consume non-blank characters. + for !is_blankz(parser.buffer, parser.buffer_pos) { + + // Check for indicators that may end a plain scalar. + if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level > 0 && + (parser.buffer[parser.buffer_pos] == ',' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}')) { + break + } + + // Check if we need to join whitespaces and breaks. + if leading_blanks || len(whitespaces) > 0 { + if leading_blanks { + // Do we need to fold line breaks? + if leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + leading_blanks = false + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Copy the character. + s = read(parser, s) + + end_mark = parser.mark + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + } + + // Is it the end? + if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { + break + } + + // Consume blank characters. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + + // Check for tab characters that abuse indentation. + if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found a tab character that violates indentation") + return false + } + + // Consume a space or a tab character. + if !leading_blanks { + whitespaces = read(parser, whitespaces) + } else { + skip(parser) + } + } else { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + leading_break = read_line(parser, leading_break) + leading_blanks = true + } else { + trailing_breaks = read_line(parser, trailing_breaks) + } + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check indentation level. + if parser.flow_level == 0 && parser.mark.column < indent { + break + } + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_PLAIN_SCALAR_STYLE, + } + + // Note that we change the 'simple_key_allowed' flag. + if leading_blanks { + parser.simple_key_allowed = true + } + return true +} diff --git a/goyaml.v2/sorter.go b/goyaml.v2/sorter.go new file mode 100644 index 0000000..4c45e66 --- /dev/null +++ b/goyaml.v2/sorter.go @@ -0,0 +1,113 @@ +package yaml + +import ( + "reflect" + "unicode" +) + +type keyList []reflect.Value + +func (l keyList) Len() int { return len(l) } +func (l keyList) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +func (l keyList) Less(i, j int) bool { + a := l[i] + b := l[j] + ak := a.Kind() + bk := b.Kind() + for (ak == reflect.Interface || ak == reflect.Ptr) && !a.IsNil() { + a = a.Elem() + ak = a.Kind() + } + for (bk == reflect.Interface || bk == reflect.Ptr) && !b.IsNil() { + b = b.Elem() + bk = b.Kind() + } + af, aok := keyFloat(a) + bf, bok := keyFloat(b) + if aok && bok { + if af != bf { + return af < bf + } + if ak != bk { + return ak < bk + } + return numLess(a, b) + } + if ak != reflect.String || bk != reflect.String { + return ak < bk + } + ar, br := []rune(a.String()), []rune(b.String()) + for i := 0; i < len(ar) && i < len(br); i++ { + if ar[i] == br[i] { + continue + } + al := unicode.IsLetter(ar[i]) + bl := unicode.IsLetter(br[i]) + if al && bl { + return ar[i] < br[i] + } + if al || bl { + return bl + } + var ai, bi int + var an, bn int64 + if ar[i] == '0' || br[i] == '0' { + for j := i-1; j >= 0 && unicode.IsDigit(ar[j]); j-- { + if ar[j] != '0' { + an = 1 + bn = 1 + break + } + } + } + for ai = i; ai < len(ar) && unicode.IsDigit(ar[ai]); ai++ { + an = an*10 + int64(ar[ai]-'0') + } + for bi = i; bi < len(br) && unicode.IsDigit(br[bi]); bi++ { + bn = bn*10 + int64(br[bi]-'0') + } + if an != bn { + return an < bn + } + if ai != bi { + return ai < bi + } + return ar[i] < br[i] + } + return len(ar) < len(br) +} + +// keyFloat returns a float value for v if it is a number/bool +// and whether it is a number/bool or not. +func keyFloat(v reflect.Value) (f float64, ok bool) { + switch v.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return float64(v.Int()), true + case reflect.Float32, reflect.Float64: + return v.Float(), true + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return float64(v.Uint()), true + case reflect.Bool: + if v.Bool() { + return 1, true + } + return 0, true + } + return 0, false +} + +// numLess returns whether a < b. +// a and b must necessarily have the same kind. +func numLess(a, b reflect.Value) bool { + switch a.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return a.Int() < b.Int() + case reflect.Float32, reflect.Float64: + return a.Float() < b.Float() + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return a.Uint() < b.Uint() + case reflect.Bool: + return !a.Bool() && b.Bool() + } + panic("not a number") +} diff --git a/goyaml.v2/suite_test.go b/goyaml.v2/suite_test.go new file mode 100644 index 0000000..c5cf1ed --- /dev/null +++ b/goyaml.v2/suite_test.go @@ -0,0 +1,12 @@ +package yaml_test + +import ( + . "gopkg.in/check.v1" + "testing" +) + +func Test(t *testing.T) { TestingT(t) } + +type S struct{} + +var _ = Suite(&S{}) diff --git a/goyaml.v2/writerc.go b/goyaml.v2/writerc.go new file mode 100644 index 0000000..a2dde60 --- /dev/null +++ b/goyaml.v2/writerc.go @@ -0,0 +1,26 @@ +package yaml + +// Set the writer error and return false. +func yaml_emitter_set_writer_error(emitter *yaml_emitter_t, problem string) bool { + emitter.error = yaml_WRITER_ERROR + emitter.problem = problem + return false +} + +// Flush the output buffer. +func yaml_emitter_flush(emitter *yaml_emitter_t) bool { + if emitter.write_handler == nil { + panic("write handler not set") + } + + // Check if the buffer is empty. + if emitter.buffer_pos == 0 { + return true + } + + if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil { + return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error()) + } + emitter.buffer_pos = 0 + return true +} diff --git a/goyaml.v2/yaml.go b/goyaml.v2/yaml.go new file mode 100644 index 0000000..3081388 --- /dev/null +++ b/goyaml.v2/yaml.go @@ -0,0 +1,478 @@ +// Package yaml implements YAML support for the Go language. +// +// Source code and other details for the project are available at GitHub: +// +// https://github.com/go-yaml/yaml +// +package yaml + +import ( + "errors" + "fmt" + "io" + "reflect" + "strings" + "sync" +) + +// MapSlice encodes and decodes as a YAML map. +// The order of keys is preserved when encoding and decoding. +type MapSlice []MapItem + +// MapItem is an item in a MapSlice. +type MapItem struct { + Key, Value interface{} +} + +// The Unmarshaler interface may be implemented by types to customize their +// behavior when being unmarshaled from a YAML document. The UnmarshalYAML +// method receives a function that may be called to unmarshal the original +// YAML value into a field or variable. It is safe to call the unmarshal +// function parameter more than once if necessary. +type Unmarshaler interface { + UnmarshalYAML(unmarshal func(interface{}) error) error +} + +// The Marshaler interface may be implemented by types to customize their +// behavior when being marshaled into a YAML document. The returned value +// is marshaled in place of the original value implementing Marshaler. +// +// If an error is returned by MarshalYAML, the marshaling procedure stops +// and returns with the provided error. +type Marshaler interface { + MarshalYAML() (interface{}, error) +} + +// Unmarshal decodes the first document found within the in byte slice +// and assigns decoded values into the out value. +// +// Maps and pointers (to a struct, string, int, etc) are accepted as out +// values. If an internal pointer within a struct is not initialized, +// the yaml package will initialize it if necessary for unmarshalling +// the provided data. The out parameter must not be nil. +// +// The type of the decoded values should be compatible with the respective +// values in out. If one or more values cannot be decoded due to a type +// mismatches, decoding continues partially until the end of the YAML +// content, and a *yaml.TypeError is returned with details for all +// missed values. +// +// Struct fields are only unmarshalled if they are exported (have an +// upper case first letter), and are unmarshalled using the field name +// lowercased as the default key. Custom keys may be defined via the +// "yaml" name in the field tag: the content preceding the first comma +// is used as the key, and the following comma-separated options are +// used to tweak the marshalling process (see Marshal). +// Conflicting names result in a runtime error. +// +// For example: +// +// type T struct { +// F int `yaml:"a,omitempty"` +// B int +// } +// var t T +// yaml.Unmarshal([]byte("a: 1\nb: 2"), &t) +// +// See the documentation of Marshal for the format of tags and a list of +// supported tag options. +// +func Unmarshal(in []byte, out interface{}) (err error) { + return unmarshal(in, out, false) +} + +// UnmarshalStrict is like Unmarshal except that any fields that are found +// in the data that do not have corresponding struct members, or mapping +// keys that are duplicates, will result in +// an error. +func UnmarshalStrict(in []byte, out interface{}) (err error) { + return unmarshal(in, out, true) +} + +// A Decoder reads and decodes YAML values from an input stream. +type Decoder struct { + strict bool + parser *parser +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may read +// data from r beyond the YAML values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{ + parser: newParserFromReader(r), + } +} + +// SetStrict sets whether strict decoding behaviour is enabled when +// decoding items in the data (see UnmarshalStrict). By default, decoding is not strict. +func (dec *Decoder) SetStrict(strict bool) { + dec.strict = strict +} + +// Decode reads the next YAML-encoded value from its input +// and stores it in the value pointed to by v. +// +// See the documentation for Unmarshal for details about the +// conversion of YAML into a Go value. +func (dec *Decoder) Decode(v interface{}) (err error) { + d := newDecoder(dec.strict) + defer handleErr(&err) + node := dec.parser.parse() + if node == nil { + return io.EOF + } + out := reflect.ValueOf(v) + if out.Kind() == reflect.Ptr && !out.IsNil() { + out = out.Elem() + } + d.unmarshal(node, out) + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +func unmarshal(in []byte, out interface{}, strict bool) (err error) { + defer handleErr(&err) + d := newDecoder(strict) + p := newParser(in) + defer p.destroy() + node := p.parse() + if node != nil { + v := reflect.ValueOf(out) + if v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + d.unmarshal(node, v) + } + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +// Marshal serializes the value provided into a YAML document. The structure +// of the generated document will reflect the structure of the value itself. +// Maps and pointers (to struct, string, int, etc) are accepted as the in value. +// +// Struct fields are only marshalled if they are exported (have an upper case +// first letter), and are marshalled using the field name lowercased as the +// default key. Custom keys may be defined via the "yaml" name in the field +// tag: the content preceding the first comma is used as the key, and the +// following comma-separated options are used to tweak the marshalling process. +// Conflicting names result in a runtime error. +// +// The field tag format accepted is: +// +// `(...) yaml:"[][,[,]]" (...)` +// +// The following flags are currently supported: +// +// omitempty Only include the field if it's not set to the zero +// value for the type or to empty slices or maps. +// Zero valued structs will be omitted if all their public +// fields are zero, unless they implement an IsZero +// method (see the IsZeroer interface type), in which +// case the field will be excluded if IsZero returns true. +// +// flow Marshal using a flow style (useful for structs, +// sequences and maps). +// +// inline Inline the field, which must be a struct or a map, +// causing all of its fields or keys to be processed as if +// they were part of the outer struct. For maps, keys must +// not conflict with the yaml keys of other struct fields. +// +// In addition, if the key is "-", the field is ignored. +// +// For example: +// +// type T struct { +// F int `yaml:"a,omitempty"` +// B int +// } +// yaml.Marshal(&T{B: 2}) // Returns "b: 2\n" +// yaml.Marshal(&T{F: 1}} // Returns "a: 1\nb: 0\n" +// +func Marshal(in interface{}) (out []byte, err error) { + defer handleErr(&err) + e := newEncoder() + defer e.destroy() + e.marshalDoc("", reflect.ValueOf(in)) + e.finish() + out = e.out + return +} + +// An Encoder writes YAML values to an output stream. +type Encoder struct { + encoder *encoder +} + +// NewEncoder returns a new encoder that writes to w. +// The Encoder should be closed after use to flush all data +// to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{ + encoder: newEncoderWithWriter(w), + } +} + +// Encode writes the YAML encoding of v to the stream. +// If multiple items are encoded to the stream, the +// second and subsequent document will be preceded +// with a "---" document separator, but the first will not. +// +// See the documentation for Marshal for details about the conversion of Go +// values to YAML. +func (e *Encoder) Encode(v interface{}) (err error) { + defer handleErr(&err) + e.encoder.marshalDoc("", reflect.ValueOf(v)) + return nil +} + +// Close closes the encoder by writing any remaining data. +// It does not write a stream terminating string "...". +func (e *Encoder) Close() (err error) { + defer handleErr(&err) + e.encoder.finish() + return nil +} + +func handleErr(err *error) { + if v := recover(); v != nil { + if e, ok := v.(yamlError); ok { + *err = e.err + } else { + panic(v) + } + } +} + +type yamlError struct { + err error +} + +func fail(err error) { + panic(yamlError{err}) +} + +func failf(format string, args ...interface{}) { + panic(yamlError{fmt.Errorf("yaml: "+format, args...)}) +} + +// A TypeError is returned by Unmarshal when one or more fields in +// the YAML document cannot be properly decoded into the requested +// types. When this error is returned, the value is still +// unmarshaled partially. +type TypeError struct { + Errors []string +} + +func (e *TypeError) Error() string { + return fmt.Sprintf("yaml: unmarshal errors:\n %s", strings.Join(e.Errors, "\n ")) +} + +// -------------------------------------------------------------------------- +// Maintain a mapping of keys to structure field indexes + +// The code in this section was copied from mgo/bson. + +// structInfo holds details for the serialization of fields of +// a given struct. +type structInfo struct { + FieldsMap map[string]fieldInfo + FieldsList []fieldInfo + + // InlineMap is the number of the field in the struct that + // contains an ,inline map, or -1 if there's none. + InlineMap int +} + +type fieldInfo struct { + Key string + Num int + OmitEmpty bool + Flow bool + // Id holds the unique field identifier, so we can cheaply + // check for field duplicates without maintaining an extra map. + Id int + + // Inline holds the field index if the field is part of an inlined struct. + Inline []int +} + +var structMap = make(map[reflect.Type]*structInfo) +var fieldMapMutex sync.RWMutex + +func getStructInfo(st reflect.Type) (*structInfo, error) { + fieldMapMutex.RLock() + sinfo, found := structMap[st] + fieldMapMutex.RUnlock() + if found { + return sinfo, nil + } + + n := st.NumField() + fieldsMap := make(map[string]fieldInfo) + fieldsList := make([]fieldInfo, 0, n) + inlineMap := -1 + for i := 0; i != n; i++ { + field := st.Field(i) + if field.PkgPath != "" && !field.Anonymous { + continue // Private field + } + + info := fieldInfo{Num: i} + + tag := field.Tag.Get("yaml") + if tag == "" && strings.Index(string(field.Tag), ":") < 0 { + tag = string(field.Tag) + } + if tag == "-" { + continue + } + + inline := false + fields := strings.Split(tag, ",") + if len(fields) > 1 { + for _, flag := range fields[1:] { + switch flag { + case "omitempty": + info.OmitEmpty = true + case "flow": + info.Flow = true + case "inline": + inline = true + default: + return nil, errors.New(fmt.Sprintf("Unsupported flag %q in tag %q of type %s", flag, tag, st)) + } + } + tag = fields[0] + } + + if inline { + switch field.Type.Kind() { + case reflect.Map: + if inlineMap >= 0 { + return nil, errors.New("Multiple ,inline maps in struct " + st.String()) + } + if field.Type.Key() != reflect.TypeOf("") { + return nil, errors.New("Option ,inline needs a map with string keys in struct " + st.String()) + } + inlineMap = info.Num + case reflect.Struct: + sinfo, err := getStructInfo(field.Type) + if err != nil { + return nil, err + } + for _, finfo := range sinfo.FieldsList { + if _, found := fieldsMap[finfo.Key]; found { + msg := "Duplicated key '" + finfo.Key + "' in struct " + st.String() + return nil, errors.New(msg) + } + if finfo.Inline == nil { + finfo.Inline = []int{i, finfo.Num} + } else { + finfo.Inline = append([]int{i}, finfo.Inline...) + } + finfo.Id = len(fieldsList) + fieldsMap[finfo.Key] = finfo + fieldsList = append(fieldsList, finfo) + } + default: + //return nil, errors.New("Option ,inline needs a struct value or map field") + return nil, errors.New("Option ,inline needs a struct value field") + } + continue + } + + if tag != "" { + info.Key = tag + } else { + info.Key = strings.ToLower(field.Name) + } + + if _, found = fieldsMap[info.Key]; found { + msg := "Duplicated key '" + info.Key + "' in struct " + st.String() + return nil, errors.New(msg) + } + + info.Id = len(fieldsList) + fieldsList = append(fieldsList, info) + fieldsMap[info.Key] = info + } + + sinfo = &structInfo{ + FieldsMap: fieldsMap, + FieldsList: fieldsList, + InlineMap: inlineMap, + } + + fieldMapMutex.Lock() + structMap[st] = sinfo + fieldMapMutex.Unlock() + return sinfo, nil +} + +// IsZeroer is used to check whether an object is zero to +// determine whether it should be omitted when marshaling +// with the omitempty flag. One notable implementation +// is time.Time. +type IsZeroer interface { + IsZero() bool +} + +func isZero(v reflect.Value) bool { + kind := v.Kind() + if z, ok := v.Interface().(IsZeroer); ok { + if (kind == reflect.Ptr || kind == reflect.Interface) && v.IsNil() { + return true + } + return z.IsZero() + } + switch kind { + case reflect.String: + return len(v.String()) == 0 + case reflect.Interface, reflect.Ptr: + return v.IsNil() + case reflect.Slice: + return v.Len() == 0 + case reflect.Map: + return v.Len() == 0 + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Bool: + return !v.Bool() + case reflect.Struct: + vt := v.Type() + for i := v.NumField() - 1; i >= 0; i-- { + if vt.Field(i).PkgPath != "" { + continue // Private field + } + if !isZero(v.Field(i)) { + return false + } + } + return true + } + return false +} + +// FutureLineWrap globally disables line wrapping when encoding long strings. +// This is a temporary and thus deprecated method introduced to faciliate +// migration towards v3, which offers more control of line lengths on +// individual encodings, and has a default matching the behavior introduced +// by this function. +// +// The default formatting of v2 was erroneously changed in v2.3.0 and reverted +// in v2.4.0, at which point this function was introduced to help migration. +func FutureLineWrap() { + disableLineWrapping = true +} diff --git a/goyaml.v2/yamlh.go b/goyaml.v2/yamlh.go new file mode 100644 index 0000000..f6a9c8e --- /dev/null +++ b/goyaml.v2/yamlh.go @@ -0,0 +1,739 @@ +package yaml + +import ( + "fmt" + "io" +) + +// The version directive data. +type yaml_version_directive_t struct { + major int8 // The major version number. + minor int8 // The minor version number. +} + +// The tag directive data. +type yaml_tag_directive_t struct { + handle []byte // The tag handle. + prefix []byte // The tag prefix. +} + +type yaml_encoding_t int + +// The stream encoding. +const ( + // Let the parser choose the encoding. + yaml_ANY_ENCODING yaml_encoding_t = iota + + yaml_UTF8_ENCODING // The default UTF-8 encoding. + yaml_UTF16LE_ENCODING // The UTF-16-LE encoding with BOM. + yaml_UTF16BE_ENCODING // The UTF-16-BE encoding with BOM. +) + +type yaml_break_t int + +// Line break types. +const ( + // Let the parser choose the break type. + yaml_ANY_BREAK yaml_break_t = iota + + yaml_CR_BREAK // Use CR for line breaks (Mac style). + yaml_LN_BREAK // Use LN for line breaks (Unix style). + yaml_CRLN_BREAK // Use CR LN for line breaks (DOS style). +) + +type yaml_error_type_t int + +// Many bad things could happen with the parser and emitter. +const ( + // No error is produced. + yaml_NO_ERROR yaml_error_type_t = iota + + yaml_MEMORY_ERROR // Cannot allocate or reallocate a block of memory. + yaml_READER_ERROR // Cannot read or decode the input stream. + yaml_SCANNER_ERROR // Cannot scan the input stream. + yaml_PARSER_ERROR // Cannot parse the input stream. + yaml_COMPOSER_ERROR // Cannot compose a YAML document. + yaml_WRITER_ERROR // Cannot write to the output stream. + yaml_EMITTER_ERROR // Cannot emit a YAML stream. +) + +// The pointer position. +type yaml_mark_t struct { + index int // The position index. + line int // The position line. + column int // The position column. +} + +// Node Styles + +type yaml_style_t int8 + +type yaml_scalar_style_t yaml_style_t + +// Scalar styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SCALAR_STYLE yaml_scalar_style_t = iota + + yaml_PLAIN_SCALAR_STYLE // The plain scalar style. + yaml_SINGLE_QUOTED_SCALAR_STYLE // The single-quoted scalar style. + yaml_DOUBLE_QUOTED_SCALAR_STYLE // The double-quoted scalar style. + yaml_LITERAL_SCALAR_STYLE // The literal scalar style. + yaml_FOLDED_SCALAR_STYLE // The folded scalar style. +) + +type yaml_sequence_style_t yaml_style_t + +// Sequence styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SEQUENCE_STYLE yaml_sequence_style_t = iota + + yaml_BLOCK_SEQUENCE_STYLE // The block sequence style. + yaml_FLOW_SEQUENCE_STYLE // The flow sequence style. +) + +type yaml_mapping_style_t yaml_style_t + +// Mapping styles. +const ( + // Let the emitter choose the style. + yaml_ANY_MAPPING_STYLE yaml_mapping_style_t = iota + + yaml_BLOCK_MAPPING_STYLE // The block mapping style. + yaml_FLOW_MAPPING_STYLE // The flow mapping style. +) + +// Tokens + +type yaml_token_type_t int + +// Token types. +const ( + // An empty token. + yaml_NO_TOKEN yaml_token_type_t = iota + + yaml_STREAM_START_TOKEN // A STREAM-START token. + yaml_STREAM_END_TOKEN // A STREAM-END token. + + yaml_VERSION_DIRECTIVE_TOKEN // A VERSION-DIRECTIVE token. + yaml_TAG_DIRECTIVE_TOKEN // A TAG-DIRECTIVE token. + yaml_DOCUMENT_START_TOKEN // A DOCUMENT-START token. + yaml_DOCUMENT_END_TOKEN // A DOCUMENT-END token. + + yaml_BLOCK_SEQUENCE_START_TOKEN // A BLOCK-SEQUENCE-START token. + yaml_BLOCK_MAPPING_START_TOKEN // A BLOCK-SEQUENCE-END token. + yaml_BLOCK_END_TOKEN // A BLOCK-END token. + + yaml_FLOW_SEQUENCE_START_TOKEN // A FLOW-SEQUENCE-START token. + yaml_FLOW_SEQUENCE_END_TOKEN // A FLOW-SEQUENCE-END token. + yaml_FLOW_MAPPING_START_TOKEN // A FLOW-MAPPING-START token. + yaml_FLOW_MAPPING_END_TOKEN // A FLOW-MAPPING-END token. + + yaml_BLOCK_ENTRY_TOKEN // A BLOCK-ENTRY token. + yaml_FLOW_ENTRY_TOKEN // A FLOW-ENTRY token. + yaml_KEY_TOKEN // A KEY token. + yaml_VALUE_TOKEN // A VALUE token. + + yaml_ALIAS_TOKEN // An ALIAS token. + yaml_ANCHOR_TOKEN // An ANCHOR token. + yaml_TAG_TOKEN // A TAG token. + yaml_SCALAR_TOKEN // A SCALAR token. +) + +func (tt yaml_token_type_t) String() string { + switch tt { + case yaml_NO_TOKEN: + return "yaml_NO_TOKEN" + case yaml_STREAM_START_TOKEN: + return "yaml_STREAM_START_TOKEN" + case yaml_STREAM_END_TOKEN: + return "yaml_STREAM_END_TOKEN" + case yaml_VERSION_DIRECTIVE_TOKEN: + return "yaml_VERSION_DIRECTIVE_TOKEN" + case yaml_TAG_DIRECTIVE_TOKEN: + return "yaml_TAG_DIRECTIVE_TOKEN" + case yaml_DOCUMENT_START_TOKEN: + return "yaml_DOCUMENT_START_TOKEN" + case yaml_DOCUMENT_END_TOKEN: + return "yaml_DOCUMENT_END_TOKEN" + case yaml_BLOCK_SEQUENCE_START_TOKEN: + return "yaml_BLOCK_SEQUENCE_START_TOKEN" + case yaml_BLOCK_MAPPING_START_TOKEN: + return "yaml_BLOCK_MAPPING_START_TOKEN" + case yaml_BLOCK_END_TOKEN: + return "yaml_BLOCK_END_TOKEN" + case yaml_FLOW_SEQUENCE_START_TOKEN: + return "yaml_FLOW_SEQUENCE_START_TOKEN" + case yaml_FLOW_SEQUENCE_END_TOKEN: + return "yaml_FLOW_SEQUENCE_END_TOKEN" + case yaml_FLOW_MAPPING_START_TOKEN: + return "yaml_FLOW_MAPPING_START_TOKEN" + case yaml_FLOW_MAPPING_END_TOKEN: + return "yaml_FLOW_MAPPING_END_TOKEN" + case yaml_BLOCK_ENTRY_TOKEN: + return "yaml_BLOCK_ENTRY_TOKEN" + case yaml_FLOW_ENTRY_TOKEN: + return "yaml_FLOW_ENTRY_TOKEN" + case yaml_KEY_TOKEN: + return "yaml_KEY_TOKEN" + case yaml_VALUE_TOKEN: + return "yaml_VALUE_TOKEN" + case yaml_ALIAS_TOKEN: + return "yaml_ALIAS_TOKEN" + case yaml_ANCHOR_TOKEN: + return "yaml_ANCHOR_TOKEN" + case yaml_TAG_TOKEN: + return "yaml_TAG_TOKEN" + case yaml_SCALAR_TOKEN: + return "yaml_SCALAR_TOKEN" + } + return "" +} + +// The token structure. +type yaml_token_t struct { + // The token type. + typ yaml_token_type_t + + // The start/end of the token. + start_mark, end_mark yaml_mark_t + + // The stream encoding (for yaml_STREAM_START_TOKEN). + encoding yaml_encoding_t + + // The alias/anchor/scalar value or tag/tag directive handle + // (for yaml_ALIAS_TOKEN, yaml_ANCHOR_TOKEN, yaml_SCALAR_TOKEN, yaml_TAG_TOKEN, yaml_TAG_DIRECTIVE_TOKEN). + value []byte + + // The tag suffix (for yaml_TAG_TOKEN). + suffix []byte + + // The tag directive prefix (for yaml_TAG_DIRECTIVE_TOKEN). + prefix []byte + + // The scalar style (for yaml_SCALAR_TOKEN). + style yaml_scalar_style_t + + // The version directive major/minor (for yaml_VERSION_DIRECTIVE_TOKEN). + major, minor int8 +} + +// Events + +type yaml_event_type_t int8 + +// Event types. +const ( + // An empty event. + yaml_NO_EVENT yaml_event_type_t = iota + + yaml_STREAM_START_EVENT // A STREAM-START event. + yaml_STREAM_END_EVENT // A STREAM-END event. + yaml_DOCUMENT_START_EVENT // A DOCUMENT-START event. + yaml_DOCUMENT_END_EVENT // A DOCUMENT-END event. + yaml_ALIAS_EVENT // An ALIAS event. + yaml_SCALAR_EVENT // A SCALAR event. + yaml_SEQUENCE_START_EVENT // A SEQUENCE-START event. + yaml_SEQUENCE_END_EVENT // A SEQUENCE-END event. + yaml_MAPPING_START_EVENT // A MAPPING-START event. + yaml_MAPPING_END_EVENT // A MAPPING-END event. +) + +var eventStrings = []string{ + yaml_NO_EVENT: "none", + yaml_STREAM_START_EVENT: "stream start", + yaml_STREAM_END_EVENT: "stream end", + yaml_DOCUMENT_START_EVENT: "document start", + yaml_DOCUMENT_END_EVENT: "document end", + yaml_ALIAS_EVENT: "alias", + yaml_SCALAR_EVENT: "scalar", + yaml_SEQUENCE_START_EVENT: "sequence start", + yaml_SEQUENCE_END_EVENT: "sequence end", + yaml_MAPPING_START_EVENT: "mapping start", + yaml_MAPPING_END_EVENT: "mapping end", +} + +func (e yaml_event_type_t) String() string { + if e < 0 || int(e) >= len(eventStrings) { + return fmt.Sprintf("unknown event %d", e) + } + return eventStrings[e] +} + +// The event structure. +type yaml_event_t struct { + + // The event type. + typ yaml_event_type_t + + // The start and end of the event. + start_mark, end_mark yaml_mark_t + + // The document encoding (for yaml_STREAM_START_EVENT). + encoding yaml_encoding_t + + // The version directive (for yaml_DOCUMENT_START_EVENT). + version_directive *yaml_version_directive_t + + // The list of tag directives (for yaml_DOCUMENT_START_EVENT). + tag_directives []yaml_tag_directive_t + + // The anchor (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_ALIAS_EVENT). + anchor []byte + + // The tag (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). + tag []byte + + // The scalar value (for yaml_SCALAR_EVENT). + value []byte + + // Is the document start/end indicator implicit, or the tag optional? + // (for yaml_DOCUMENT_START_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_SCALAR_EVENT). + implicit bool + + // Is the tag optional for any non-plain style? (for yaml_SCALAR_EVENT). + quoted_implicit bool + + // The style (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). + style yaml_style_t +} + +func (e *yaml_event_t) scalar_style() yaml_scalar_style_t { return yaml_scalar_style_t(e.style) } +func (e *yaml_event_t) sequence_style() yaml_sequence_style_t { return yaml_sequence_style_t(e.style) } +func (e *yaml_event_t) mapping_style() yaml_mapping_style_t { return yaml_mapping_style_t(e.style) } + +// Nodes + +const ( + yaml_NULL_TAG = "tag:yaml.org,2002:null" // The tag !!null with the only possible value: null. + yaml_BOOL_TAG = "tag:yaml.org,2002:bool" // The tag !!bool with the values: true and false. + yaml_STR_TAG = "tag:yaml.org,2002:str" // The tag !!str for string values. + yaml_INT_TAG = "tag:yaml.org,2002:int" // The tag !!int for integer values. + yaml_FLOAT_TAG = "tag:yaml.org,2002:float" // The tag !!float for float values. + yaml_TIMESTAMP_TAG = "tag:yaml.org,2002:timestamp" // The tag !!timestamp for date and time values. + + yaml_SEQ_TAG = "tag:yaml.org,2002:seq" // The tag !!seq is used to denote sequences. + yaml_MAP_TAG = "tag:yaml.org,2002:map" // The tag !!map is used to denote mapping. + + // Not in original libyaml. + yaml_BINARY_TAG = "tag:yaml.org,2002:binary" + yaml_MERGE_TAG = "tag:yaml.org,2002:merge" + + yaml_DEFAULT_SCALAR_TAG = yaml_STR_TAG // The default scalar tag is !!str. + yaml_DEFAULT_SEQUENCE_TAG = yaml_SEQ_TAG // The default sequence tag is !!seq. + yaml_DEFAULT_MAPPING_TAG = yaml_MAP_TAG // The default mapping tag is !!map. +) + +type yaml_node_type_t int + +// Node types. +const ( + // An empty node. + yaml_NO_NODE yaml_node_type_t = iota + + yaml_SCALAR_NODE // A scalar node. + yaml_SEQUENCE_NODE // A sequence node. + yaml_MAPPING_NODE // A mapping node. +) + +// An element of a sequence node. +type yaml_node_item_t int + +// An element of a mapping node. +type yaml_node_pair_t struct { + key int // The key of the element. + value int // The value of the element. +} + +// The node structure. +type yaml_node_t struct { + typ yaml_node_type_t // The node type. + tag []byte // The node tag. + + // The node data. + + // The scalar parameters (for yaml_SCALAR_NODE). + scalar struct { + value []byte // The scalar value. + length int // The length of the scalar value. + style yaml_scalar_style_t // The scalar style. + } + + // The sequence parameters (for YAML_SEQUENCE_NODE). + sequence struct { + items_data []yaml_node_item_t // The stack of sequence items. + style yaml_sequence_style_t // The sequence style. + } + + // The mapping parameters (for yaml_MAPPING_NODE). + mapping struct { + pairs_data []yaml_node_pair_t // The stack of mapping pairs (key, value). + pairs_start *yaml_node_pair_t // The beginning of the stack. + pairs_end *yaml_node_pair_t // The end of the stack. + pairs_top *yaml_node_pair_t // The top of the stack. + style yaml_mapping_style_t // The mapping style. + } + + start_mark yaml_mark_t // The beginning of the node. + end_mark yaml_mark_t // The end of the node. + +} + +// The document structure. +type yaml_document_t struct { + + // The document nodes. + nodes []yaml_node_t + + // The version directive. + version_directive *yaml_version_directive_t + + // The list of tag directives. + tag_directives_data []yaml_tag_directive_t + tag_directives_start int // The beginning of the tag directives list. + tag_directives_end int // The end of the tag directives list. + + start_implicit int // Is the document start indicator implicit? + end_implicit int // Is the document end indicator implicit? + + // The start/end of the document. + start_mark, end_mark yaml_mark_t +} + +// The prototype of a read handler. +// +// The read handler is called when the parser needs to read more bytes from the +// source. The handler should write not more than size bytes to the buffer. +// The number of written bytes should be set to the size_read variable. +// +// [in,out] data A pointer to an application data specified by +// yaml_parser_set_input(). +// [out] buffer The buffer to write the data from the source. +// [in] size The size of the buffer. +// [out] size_read The actual number of bytes read from the source. +// +// On success, the handler should return 1. If the handler failed, +// the returned value should be 0. On EOF, the handler should set the +// size_read to 0 and return 1. +type yaml_read_handler_t func(parser *yaml_parser_t, buffer []byte) (n int, err error) + +// This structure holds information about a potential simple key. +type yaml_simple_key_t struct { + possible bool // Is a simple key possible? + required bool // Is a simple key required? + token_number int // The number of the token. + mark yaml_mark_t // The position mark. +} + +// The states of the parser. +type yaml_parser_state_t int + +const ( + yaml_PARSE_STREAM_START_STATE yaml_parser_state_t = iota + + yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE // Expect the beginning of an implicit document. + yaml_PARSE_DOCUMENT_START_STATE // Expect DOCUMENT-START. + yaml_PARSE_DOCUMENT_CONTENT_STATE // Expect the content of a document. + yaml_PARSE_DOCUMENT_END_STATE // Expect DOCUMENT-END. + yaml_PARSE_BLOCK_NODE_STATE // Expect a block node. + yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE // Expect a block node or indentless sequence. + yaml_PARSE_FLOW_NODE_STATE // Expect a flow node. + yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a block sequence. + yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE // Expect an entry of a block sequence. + yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE // Expect an entry of an indentless sequence. + yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. + yaml_PARSE_BLOCK_MAPPING_KEY_STATE // Expect a block mapping key. + yaml_PARSE_BLOCK_MAPPING_VALUE_STATE // Expect a block mapping value. + yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE // Expect an entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE // Expect a key of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE // Expect the and of an ordered mapping entry. + yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. + yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE // Expect an empty value of a flow mapping. + yaml_PARSE_END_STATE // Expect nothing. +) + +func (ps yaml_parser_state_t) String() string { + switch ps { + case yaml_PARSE_STREAM_START_STATE: + return "yaml_PARSE_STREAM_START_STATE" + case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return "yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE" + case yaml_PARSE_DOCUMENT_START_STATE: + return "yaml_PARSE_DOCUMENT_START_STATE" + case yaml_PARSE_DOCUMENT_CONTENT_STATE: + return "yaml_PARSE_DOCUMENT_CONTENT_STATE" + case yaml_PARSE_DOCUMENT_END_STATE: + return "yaml_PARSE_DOCUMENT_END_STATE" + case yaml_PARSE_BLOCK_NODE_STATE: + return "yaml_PARSE_BLOCK_NODE_STATE" + case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return "yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE" + case yaml_PARSE_FLOW_NODE_STATE: + return "yaml_PARSE_FLOW_NODE_STATE" + case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return "yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE" + case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return "yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE" + case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: + return "yaml_PARSE_BLOCK_MAPPING_KEY_STATE" + case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: + return "yaml_PARSE_BLOCK_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE" + case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return "yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE" + case yaml_PARSE_FLOW_MAPPING_KEY_STATE: + return "yaml_PARSE_FLOW_MAPPING_KEY_STATE" + case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: + return "yaml_PARSE_FLOW_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return "yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE" + case yaml_PARSE_END_STATE: + return "yaml_PARSE_END_STATE" + } + return "" +} + +// This structure holds aliases data. +type yaml_alias_data_t struct { + anchor []byte // The anchor. + index int // The node id. + mark yaml_mark_t // The anchor mark. +} + +// The parser structure. +// +// All members are internal. Manage the structure using the +// yaml_parser_ family of functions. +type yaml_parser_t struct { + + // Error handling + + error yaml_error_type_t // Error type. + + problem string // Error description. + + // The byte about which the problem occurred. + problem_offset int + problem_value int + problem_mark yaml_mark_t + + // The error context. + context string + context_mark yaml_mark_t + + // Reader stuff + + read_handler yaml_read_handler_t // Read handler. + + input_reader io.Reader // File input data. + input []byte // String input data. + input_pos int + + eof bool // EOF flag + + buffer []byte // The working buffer. + buffer_pos int // The current position of the buffer. + + unread int // The number of unread characters in the buffer. + + raw_buffer []byte // The raw buffer. + raw_buffer_pos int // The current position of the buffer. + + encoding yaml_encoding_t // The input encoding. + + offset int // The offset of the current position (in bytes). + mark yaml_mark_t // The mark of the current position. + + // Scanner stuff + + stream_start_produced bool // Have we started to scan the input stream? + stream_end_produced bool // Have we reached the end of the input stream? + + flow_level int // The number of unclosed '[' and '{' indicators. + + tokens []yaml_token_t // The tokens queue. + tokens_head int // The head of the tokens queue. + tokens_parsed int // The number of tokens fetched from the queue. + token_available bool // Does the tokens queue contain a token ready for dequeueing. + + indent int // The current indentation level. + indents []int // The indentation levels stack. + + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number + + // Parser stuff + + state yaml_parser_state_t // The current parser state. + states []yaml_parser_state_t // The parser states stack. + marks []yaml_mark_t // The stack of marks. + tag_directives []yaml_tag_directive_t // The list of TAG directives. + + // Dumper stuff + + aliases []yaml_alias_data_t // The alias data. + + document *yaml_document_t // The currently parsed document. +} + +// Emitter Definitions + +// The prototype of a write handler. +// +// The write handler is called when the emitter needs to flush the accumulated +// characters to the output. The handler should write @a size bytes of the +// @a buffer to the output. +// +// @param[in,out] data A pointer to an application data specified by +// yaml_emitter_set_output(). +// @param[in] buffer The buffer with bytes to be written. +// @param[in] size The size of the buffer. +// +// @returns On success, the handler should return @c 1. If the handler failed, +// the returned value should be @c 0. +// +type yaml_write_handler_t func(emitter *yaml_emitter_t, buffer []byte) error + +type yaml_emitter_state_t int + +// The emitter states. +const ( + // Expect STREAM-START. + yaml_EMIT_STREAM_START_STATE yaml_emitter_state_t = iota + + yaml_EMIT_FIRST_DOCUMENT_START_STATE // Expect the first DOCUMENT-START or STREAM-END. + yaml_EMIT_DOCUMENT_START_STATE // Expect DOCUMENT-START or STREAM-END. + yaml_EMIT_DOCUMENT_CONTENT_STATE // Expect the content of a document. + yaml_EMIT_DOCUMENT_END_STATE // Expect DOCUMENT-END. + yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a flow sequence. + yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE // Expect an item of a flow sequence. + yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. + yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a block sequence. + yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE // Expect an item of a block sequence. + yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_KEY_STATE // Expect the key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_VALUE_STATE // Expect a value of a block mapping. + yaml_EMIT_END_STATE // Expect nothing. +) + +// The emitter structure. +// +// All members are internal. Manage the structure using the @c yaml_emitter_ +// family of functions. +type yaml_emitter_t struct { + + // Error handling + + error yaml_error_type_t // Error type. + problem string // Error description. + + // Writer stuff + + write_handler yaml_write_handler_t // Write handler. + + output_buffer *[]byte // String output data. + output_writer io.Writer // File output data. + + buffer []byte // The working buffer. + buffer_pos int // The current position of the buffer. + + raw_buffer []byte // The raw buffer. + raw_buffer_pos int // The current position of the buffer. + + encoding yaml_encoding_t // The stream encoding. + + // Emitter stuff + + canonical bool // If the output is in the canonical style? + best_indent int // The number of indentation spaces. + best_width int // The preferred width of the output lines. + unicode bool // Allow unescaped non-ASCII characters? + line_break yaml_break_t // The preferred line break. + + state yaml_emitter_state_t // The current emitter state. + states []yaml_emitter_state_t // The stack of states. + + events []yaml_event_t // The event queue. + events_head int // The head of the event queue. + + indents []int // The stack of indentation levels. + + tag_directives []yaml_tag_directive_t // The list of tag directives. + + indent int // The current indentation level. + + flow_level int // The current flow level. + + root_context bool // Is it the document root context? + sequence_context bool // Is it a sequence context? + mapping_context bool // Is it a mapping context? + simple_key_context bool // Is it a simple mapping key context? + + line int // The current line. + column int // The current column. + whitespace bool // If the last character was a whitespace? + indention bool // If the last character was an indentation character (' ', '-', '?', ':')? + open_ended bool // If an explicit document end is required? + + // Anchor analysis. + anchor_data struct { + anchor []byte // The anchor value. + alias bool // Is it an alias? + } + + // Tag analysis. + tag_data struct { + handle []byte // The tag handle. + suffix []byte // The tag suffix. + } + + // Scalar analysis. + scalar_data struct { + value []byte // The scalar value. + multiline bool // Does the scalar contain line breaks? + flow_plain_allowed bool // Can the scalar be expessed in the flow plain style? + block_plain_allowed bool // Can the scalar be expressed in the block plain style? + single_quoted_allowed bool // Can the scalar be expressed in the single quoted style? + block_allowed bool // Can the scalar be expressed in the literal or folded styles? + style yaml_scalar_style_t // The output style. + } + + // Dumper stuff + + opened bool // If the stream was already opened? + closed bool // If the stream was already closed? + + // The information associated with the document nodes. + anchors *struct { + references int // The number of references. + anchor int // The anchor id. + serialized bool // If the node has been emitted? + } + + last_anchor_id int // The last assigned anchor id. + + document *yaml_document_t // The currently emitted document. +} diff --git a/goyaml.v2/yamlprivateh.go b/goyaml.v2/yamlprivateh.go new file mode 100644 index 0000000..8110ce3 --- /dev/null +++ b/goyaml.v2/yamlprivateh.go @@ -0,0 +1,173 @@ +package yaml + +const ( + // The size of the input raw buffer. + input_raw_buffer_size = 512 + + // The size of the input buffer. + // It should be possible to decode the whole raw buffer. + input_buffer_size = input_raw_buffer_size * 3 + + // The size of the output buffer. + output_buffer_size = 128 + + // The size of the output raw buffer. + // It should be possible to encode the whole output buffer. + output_raw_buffer_size = (output_buffer_size*2 + 2) + + // The size of other stacks and queues. + initial_stack_size = 16 + initial_queue_size = 16 + initial_string_size = 16 +) + +// Check if the character at the specified position is an alphabetical +// character, a digit, '_', or '-'. +func is_alpha(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'Z' || b[i] >= 'a' && b[i] <= 'z' || b[i] == '_' || b[i] == '-' +} + +// Check if the character at the specified position is a digit. +func is_digit(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' +} + +// Get the value of a digit. +func as_digit(b []byte, i int) int { + return int(b[i]) - '0' +} + +// Check if the character at the specified position is a hex-digit. +func is_hex(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'F' || b[i] >= 'a' && b[i] <= 'f' +} + +// Get the value of a hex-digit. +func as_hex(b []byte, i int) int { + bi := b[i] + if bi >= 'A' && bi <= 'F' { + return int(bi) - 'A' + 10 + } + if bi >= 'a' && bi <= 'f' { + return int(bi) - 'a' + 10 + } + return int(bi) - '0' +} + +// Check if the character is ASCII. +func is_ascii(b []byte, i int) bool { + return b[i] <= 0x7F +} + +// Check if the character at the start of the buffer can be printed unescaped. +func is_printable(b []byte, i int) bool { + return ((b[i] == 0x0A) || // . == #x0A + (b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E + (b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF + (b[i] > 0xC2 && b[i] < 0xED) || + (b[i] == 0xED && b[i+1] < 0xA0) || + (b[i] == 0xEE) || + (b[i] == 0xEF && // #xE000 <= . <= #xFFFD + !(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF + !(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF)))) +} + +// Check if the character at the specified position is NUL. +func is_z(b []byte, i int) bool { + return b[i] == 0x00 +} + +// Check if the beginning of the buffer is a BOM. +func is_bom(b []byte, i int) bool { + return b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF +} + +// Check if the character at the specified position is space. +func is_space(b []byte, i int) bool { + return b[i] == ' ' +} + +// Check if the character at the specified position is tab. +func is_tab(b []byte, i int) bool { + return b[i] == '\t' +} + +// Check if the character at the specified position is blank (space or tab). +func is_blank(b []byte, i int) bool { + //return is_space(b, i) || is_tab(b, i) + return b[i] == ' ' || b[i] == '\t' +} + +// Check if the character at the specified position is a line break. +func is_break(b []byte, i int) bool { + return (b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9) // PS (#x2029) +} + +func is_crlf(b []byte, i int) bool { + return b[i] == '\r' && b[i+1] == '\n' +} + +// Check if the character is a line break or NUL. +func is_breakz(b []byte, i int) bool { + //return is_break(b, i) || is_z(b, i) + return ( // is_break: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + // is_z: + b[i] == 0) +} + +// Check if the character is a line break, space, or NUL. +func is_spacez(b []byte, i int) bool { + //return is_space(b, i) || is_breakz(b, i) + return ( // is_space: + b[i] == ' ' || + // is_breakz: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + b[i] == 0) +} + +// Check if the character is a line break, space, tab, or NUL. +func is_blankz(b []byte, i int) bool { + //return is_blank(b, i) || is_breakz(b, i) + return ( // is_blank: + b[i] == ' ' || b[i] == '\t' || + // is_breakz: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + b[i] == 0) +} + +// Determine the width of the character. +func width(b byte) int { + // Don't replace these by a switch without first + // confirming that it is being inlined. + if b&0x80 == 0x00 { + return 1 + } + if b&0xE0 == 0xC0 { + return 2 + } + if b&0xF0 == 0xE0 { + return 3 + } + if b&0xF8 == 0xF0 { + return 4 + } + return 0 + +} diff --git a/goyaml.v3/LICENSE b/goyaml.v3/LICENSE new file mode 100644 index 0000000..2683e4b --- /dev/null +++ b/goyaml.v3/LICENSE @@ -0,0 +1,50 @@ + +This project is covered by two different licenses: MIT and Apache. + +#### MIT License #### + +The following files were ported to Go from C files of libyaml, and thus +are still covered by their original MIT license, with the additional +copyright staring in 2011 when the project was ported over: + + apic.go emitterc.go parserc.go readerc.go scannerc.go + writerc.go yamlh.go yamlprivateh.go + +Copyright (c) 2006-2010 Kirill Simonov +Copyright (c) 2006-2011 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +### Apache License ### + +All the remaining project files are covered by the Apache license: + +Copyright (c) 2011-2019 Canonical Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/goyaml.v3/NOTICE b/goyaml.v3/NOTICE new file mode 100644 index 0000000..866d74a --- /dev/null +++ b/goyaml.v3/NOTICE @@ -0,0 +1,13 @@ +Copyright 2011-2016 Canonical Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/goyaml.v3/README.md b/goyaml.v3/README.md new file mode 100644 index 0000000..08eb1ba --- /dev/null +++ b/goyaml.v3/README.md @@ -0,0 +1,150 @@ +# YAML support for the Go language + +Introduction +------------ + +The yaml package enables Go programs to comfortably encode and decode YAML +values. It was developed within [Canonical](https://www.canonical.com) as +part of the [juju](https://juju.ubuntu.com) project, and is based on a +pure Go port of the well-known [libyaml](http://pyyaml.org/wiki/LibYAML) +C library to parse and generate YAML data quickly and reliably. + +Compatibility +------------- + +The yaml package supports most of YAML 1.2, but preserves some behavior +from 1.1 for backwards compatibility. + +Specifically, as of v3 of the yaml package: + + - YAML 1.1 bools (_yes/no, on/off_) are supported as long as they are being + decoded into a typed bool value. Otherwise they behave as a string. Booleans + in YAML 1.2 are _true/false_ only. + - Octals encode and decode as _0777_ per YAML 1.1, rather than _0o777_ + as specified in YAML 1.2, because most parsers still use the old format. + Octals in the _0o777_ format are supported though, so new files work. + - Does not support base-60 floats. These are gone from YAML 1.2, and were + actually never supported by this package as it's clearly a poor choice. + +and offers backwards +compatibility with YAML 1.1 in some cases. +1.2, including support for +anchors, tags, map merging, etc. Multi-document unmarshalling is not yet +implemented, and base-60 floats from YAML 1.1 are purposefully not +supported since they're a poor design and are gone in YAML 1.2. + +Installation and usage +---------------------- + +The import path for the package is *gopkg.in/yaml.v3*. + +To install it, run: + + go get gopkg.in/yaml.v3 + +API documentation +----------------- + +If opened in a browser, the import path itself leads to the API documentation: + + - [https://gopkg.in/yaml.v3](https://gopkg.in/yaml.v3) + +API stability +------------- + +The package API for yaml v3 will remain stable as described in [gopkg.in](https://gopkg.in). + + +License +------- + +The yaml package is licensed under the MIT and Apache License 2.0 licenses. +Please see the LICENSE file for details. + + +Example +------- + +```Go +package main + +import ( + "fmt" + "log" + + "gopkg.in/yaml.v3" +) + +var data = ` +a: Easy! +b: + c: 2 + d: [3, 4] +` + +// Note: struct fields must be public in order for unmarshal to +// correctly populate the data. +type T struct { + A string + B struct { + RenamedC int `yaml:"c"` + D []int `yaml:",flow"` + } +} + +func main() { + t := T{} + + err := yaml.Unmarshal([]byte(data), &t) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- t:\n%v\n\n", t) + + d, err := yaml.Marshal(&t) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- t dump:\n%s\n\n", string(d)) + + m := make(map[interface{}]interface{}) + + err = yaml.Unmarshal([]byte(data), &m) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- m:\n%v\n\n", m) + + d, err = yaml.Marshal(&m) + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Printf("--- m dump:\n%s\n\n", string(d)) +} +``` + +This example will generate the following output: + +``` +--- t: +{Easy! {2 [3 4]}} + +--- t dump: +a: Easy! +b: + c: 2 + d: [3, 4] + + +--- m: +map[a:Easy! b:map[c:2 d:[3 4]]] + +--- m dump: +a: Easy! +b: + c: 2 + d: + - 3 + - 4 +``` + diff --git a/goyaml.v3/apic.go b/goyaml.v3/apic.go new file mode 100644 index 0000000..ae7d049 --- /dev/null +++ b/goyaml.v3/apic.go @@ -0,0 +1,747 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "io" +) + +func yaml_insert_token(parser *yaml_parser_t, pos int, token *yaml_token_t) { + //fmt.Println("yaml_insert_token", "pos:", pos, "typ:", token.typ, "head:", parser.tokens_head, "len:", len(parser.tokens)) + + // Check if we can move the queue at the beginning of the buffer. + if parser.tokens_head > 0 && len(parser.tokens) == cap(parser.tokens) { + if parser.tokens_head != len(parser.tokens) { + copy(parser.tokens, parser.tokens[parser.tokens_head:]) + } + parser.tokens = parser.tokens[:len(parser.tokens)-parser.tokens_head] + parser.tokens_head = 0 + } + parser.tokens = append(parser.tokens, *token) + if pos < 0 { + return + } + copy(parser.tokens[parser.tokens_head+pos+1:], parser.tokens[parser.tokens_head+pos:]) + parser.tokens[parser.tokens_head+pos] = *token +} + +// Create a new parser object. +func yaml_parser_initialize(parser *yaml_parser_t) bool { + *parser = yaml_parser_t{ + raw_buffer: make([]byte, 0, input_raw_buffer_size), + buffer: make([]byte, 0, input_buffer_size), + } + return true +} + +// Destroy a parser object. +func yaml_parser_delete(parser *yaml_parser_t) { + *parser = yaml_parser_t{} +} + +// String read handler. +func yaml_string_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + if parser.input_pos == len(parser.input) { + return 0, io.EOF + } + n = copy(buffer, parser.input[parser.input_pos:]) + parser.input_pos += n + return n, nil +} + +// Reader read handler. +func yaml_reader_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + return parser.input_reader.Read(buffer) +} + +// Set a string input. +func yaml_parser_set_input_string(parser *yaml_parser_t, input []byte) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_string_read_handler + parser.input = input + parser.input_pos = 0 +} + +// Set a file input. +func yaml_parser_set_input_reader(parser *yaml_parser_t, r io.Reader) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_reader_read_handler + parser.input_reader = r +} + +// Set the source encoding. +func yaml_parser_set_encoding(parser *yaml_parser_t, encoding yaml_encoding_t) { + if parser.encoding != yaml_ANY_ENCODING { + panic("must set the encoding only once") + } + parser.encoding = encoding +} + +// Create a new emitter object. +func yaml_emitter_initialize(emitter *yaml_emitter_t) { + *emitter = yaml_emitter_t{ + buffer: make([]byte, output_buffer_size), + raw_buffer: make([]byte, 0, output_raw_buffer_size), + states: make([]yaml_emitter_state_t, 0, initial_stack_size), + events: make([]yaml_event_t, 0, initial_queue_size), + best_width: -1, + } +} + +// Destroy an emitter object. +func yaml_emitter_delete(emitter *yaml_emitter_t) { + *emitter = yaml_emitter_t{} +} + +// String write handler. +func yaml_string_write_handler(emitter *yaml_emitter_t, buffer []byte) error { + *emitter.output_buffer = append(*emitter.output_buffer, buffer...) + return nil +} + +// yaml_writer_write_handler uses emitter.output_writer to write the +// emitted text. +func yaml_writer_write_handler(emitter *yaml_emitter_t, buffer []byte) error { + _, err := emitter.output_writer.Write(buffer) + return err +} + +// Set a string output. +func yaml_emitter_set_output_string(emitter *yaml_emitter_t, output_buffer *[]byte) { + if emitter.write_handler != nil { + panic("must set the output target only once") + } + emitter.write_handler = yaml_string_write_handler + emitter.output_buffer = output_buffer +} + +// Set a file output. +func yaml_emitter_set_output_writer(emitter *yaml_emitter_t, w io.Writer) { + if emitter.write_handler != nil { + panic("must set the output target only once") + } + emitter.write_handler = yaml_writer_write_handler + emitter.output_writer = w +} + +// Set the output encoding. +func yaml_emitter_set_encoding(emitter *yaml_emitter_t, encoding yaml_encoding_t) { + if emitter.encoding != yaml_ANY_ENCODING { + panic("must set the output encoding only once") + } + emitter.encoding = encoding +} + +// Set the canonical output style. +func yaml_emitter_set_canonical(emitter *yaml_emitter_t, canonical bool) { + emitter.canonical = canonical +} + +// Set the indentation increment. +func yaml_emitter_set_indent(emitter *yaml_emitter_t, indent int) { + if indent < 2 || indent > 9 { + indent = 2 + } + emitter.best_indent = indent +} + +// Set the preferred line width. +func yaml_emitter_set_width(emitter *yaml_emitter_t, width int) { + if width < 0 { + width = -1 + } + emitter.best_width = width +} + +// Set if unescaped non-ASCII characters are allowed. +func yaml_emitter_set_unicode(emitter *yaml_emitter_t, unicode bool) { + emitter.unicode = unicode +} + +// Set the preferred line break character. +func yaml_emitter_set_break(emitter *yaml_emitter_t, line_break yaml_break_t) { + emitter.line_break = line_break +} + +///* +// * Destroy a token object. +// */ +// +//YAML_DECLARE(void) +//yaml_token_delete(yaml_token_t *token) +//{ +// assert(token); // Non-NULL token object expected. +// +// switch (token.type) +// { +// case YAML_TAG_DIRECTIVE_TOKEN: +// yaml_free(token.data.tag_directive.handle); +// yaml_free(token.data.tag_directive.prefix); +// break; +// +// case YAML_ALIAS_TOKEN: +// yaml_free(token.data.alias.value); +// break; +// +// case YAML_ANCHOR_TOKEN: +// yaml_free(token.data.anchor.value); +// break; +// +// case YAML_TAG_TOKEN: +// yaml_free(token.data.tag.handle); +// yaml_free(token.data.tag.suffix); +// break; +// +// case YAML_SCALAR_TOKEN: +// yaml_free(token.data.scalar.value); +// break; +// +// default: +// break; +// } +// +// memset(token, 0, sizeof(yaml_token_t)); +//} +// +///* +// * Check if a string is a valid UTF-8 sequence. +// * +// * Check 'reader.c' for more details on UTF-8 encoding. +// */ +// +//static int +//yaml_check_utf8(yaml_char_t *start, size_t length) +//{ +// yaml_char_t *end = start+length; +// yaml_char_t *pointer = start; +// +// while (pointer < end) { +// unsigned char octet; +// unsigned int width; +// unsigned int value; +// size_t k; +// +// octet = pointer[0]; +// width = (octet & 0x80) == 0x00 ? 1 : +// (octet & 0xE0) == 0xC0 ? 2 : +// (octet & 0xF0) == 0xE0 ? 3 : +// (octet & 0xF8) == 0xF0 ? 4 : 0; +// value = (octet & 0x80) == 0x00 ? octet & 0x7F : +// (octet & 0xE0) == 0xC0 ? octet & 0x1F : +// (octet & 0xF0) == 0xE0 ? octet & 0x0F : +// (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; +// if (!width) return 0; +// if (pointer+width > end) return 0; +// for (k = 1; k < width; k ++) { +// octet = pointer[k]; +// if ((octet & 0xC0) != 0x80) return 0; +// value = (value << 6) + (octet & 0x3F); +// } +// if (!((width == 1) || +// (width == 2 && value >= 0x80) || +// (width == 3 && value >= 0x800) || +// (width == 4 && value >= 0x10000))) return 0; +// +// pointer += width; +// } +// +// return 1; +//} +// + +// Create STREAM-START. +func yaml_stream_start_event_initialize(event *yaml_event_t, encoding yaml_encoding_t) { + *event = yaml_event_t{ + typ: yaml_STREAM_START_EVENT, + encoding: encoding, + } +} + +// Create STREAM-END. +func yaml_stream_end_event_initialize(event *yaml_event_t) { + *event = yaml_event_t{ + typ: yaml_STREAM_END_EVENT, + } +} + +// Create DOCUMENT-START. +func yaml_document_start_event_initialize( + event *yaml_event_t, + version_directive *yaml_version_directive_t, + tag_directives []yaml_tag_directive_t, + implicit bool, +) { + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + version_directive: version_directive, + tag_directives: tag_directives, + implicit: implicit, + } +} + +// Create DOCUMENT-END. +func yaml_document_end_event_initialize(event *yaml_event_t, implicit bool) { + *event = yaml_event_t{ + typ: yaml_DOCUMENT_END_EVENT, + implicit: implicit, + } +} + +// Create ALIAS. +func yaml_alias_event_initialize(event *yaml_event_t, anchor []byte) bool { + *event = yaml_event_t{ + typ: yaml_ALIAS_EVENT, + anchor: anchor, + } + return true +} + +// Create SCALAR. +func yaml_scalar_event_initialize(event *yaml_event_t, anchor, tag, value []byte, plain_implicit, quoted_implicit bool, style yaml_scalar_style_t) bool { + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + anchor: anchor, + tag: tag, + value: value, + implicit: plain_implicit, + quoted_implicit: quoted_implicit, + style: yaml_style_t(style), + } + return true +} + +// Create SEQUENCE-START. +func yaml_sequence_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_sequence_style_t) bool { + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(style), + } + return true +} + +// Create SEQUENCE-END. +func yaml_sequence_end_event_initialize(event *yaml_event_t) bool { + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + } + return true +} + +// Create MAPPING-START. +func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_mapping_style_t) { + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(style), + } +} + +// Create MAPPING-END. +func yaml_mapping_end_event_initialize(event *yaml_event_t) { + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + } +} + +// Destroy an event object. +func yaml_event_delete(event *yaml_event_t) { + *event = yaml_event_t{} +} + +///* +// * Create a document object. +// */ +// +//YAML_DECLARE(int) +//yaml_document_initialize(document *yaml_document_t, +// version_directive *yaml_version_directive_t, +// tag_directives_start *yaml_tag_directive_t, +// tag_directives_end *yaml_tag_directive_t, +// start_implicit int, end_implicit int) +//{ +// struct { +// error yaml_error_type_t +// } context +// struct { +// start *yaml_node_t +// end *yaml_node_t +// top *yaml_node_t +// } nodes = { NULL, NULL, NULL } +// version_directive_copy *yaml_version_directive_t = NULL +// struct { +// start *yaml_tag_directive_t +// end *yaml_tag_directive_t +// top *yaml_tag_directive_t +// } tag_directives_copy = { NULL, NULL, NULL } +// value yaml_tag_directive_t = { NULL, NULL } +// mark yaml_mark_t = { 0, 0, 0 } +// +// assert(document) // Non-NULL document object is expected. +// assert((tag_directives_start && tag_directives_end) || +// (tag_directives_start == tag_directives_end)) +// // Valid tag directives are expected. +// +// if (!STACK_INIT(&context, nodes, INITIAL_STACK_SIZE)) goto error +// +// if (version_directive) { +// version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)) +// if (!version_directive_copy) goto error +// version_directive_copy.major = version_directive.major +// version_directive_copy.minor = version_directive.minor +// } +// +// if (tag_directives_start != tag_directives_end) { +// tag_directive *yaml_tag_directive_t +// if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) +// goto error +// for (tag_directive = tag_directives_start +// tag_directive != tag_directives_end; tag_directive ++) { +// assert(tag_directive.handle) +// assert(tag_directive.prefix) +// if (!yaml_check_utf8(tag_directive.handle, +// strlen((char *)tag_directive.handle))) +// goto error +// if (!yaml_check_utf8(tag_directive.prefix, +// strlen((char *)tag_directive.prefix))) +// goto error +// value.handle = yaml_strdup(tag_directive.handle) +// value.prefix = yaml_strdup(tag_directive.prefix) +// if (!value.handle || !value.prefix) goto error +// if (!PUSH(&context, tag_directives_copy, value)) +// goto error +// value.handle = NULL +// value.prefix = NULL +// } +// } +// +// DOCUMENT_INIT(*document, nodes.start, nodes.end, version_directive_copy, +// tag_directives_copy.start, tag_directives_copy.top, +// start_implicit, end_implicit, mark, mark) +// +// return 1 +// +//error: +// STACK_DEL(&context, nodes) +// yaml_free(version_directive_copy) +// while (!STACK_EMPTY(&context, tag_directives_copy)) { +// value yaml_tag_directive_t = POP(&context, tag_directives_copy) +// yaml_free(value.handle) +// yaml_free(value.prefix) +// } +// STACK_DEL(&context, tag_directives_copy) +// yaml_free(value.handle) +// yaml_free(value.prefix) +// +// return 0 +//} +// +///* +// * Destroy a document object. +// */ +// +//YAML_DECLARE(void) +//yaml_document_delete(document *yaml_document_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// tag_directive *yaml_tag_directive_t +// +// context.error = YAML_NO_ERROR // Eliminate a compiler warning. +// +// assert(document) // Non-NULL document object is expected. +// +// while (!STACK_EMPTY(&context, document.nodes)) { +// node yaml_node_t = POP(&context, document.nodes) +// yaml_free(node.tag) +// switch (node.type) { +// case YAML_SCALAR_NODE: +// yaml_free(node.data.scalar.value) +// break +// case YAML_SEQUENCE_NODE: +// STACK_DEL(&context, node.data.sequence.items) +// break +// case YAML_MAPPING_NODE: +// STACK_DEL(&context, node.data.mapping.pairs) +// break +// default: +// assert(0) // Should not happen. +// } +// } +// STACK_DEL(&context, document.nodes) +// +// yaml_free(document.version_directive) +// for (tag_directive = document.tag_directives.start +// tag_directive != document.tag_directives.end +// tag_directive++) { +// yaml_free(tag_directive.handle) +// yaml_free(tag_directive.prefix) +// } +// yaml_free(document.tag_directives.start) +// +// memset(document, 0, sizeof(yaml_document_t)) +//} +// +///** +// * Get a document node. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_node(document *yaml_document_t, index int) +//{ +// assert(document) // Non-NULL document object is expected. +// +// if (index > 0 && document.nodes.start + index <= document.nodes.top) { +// return document.nodes.start + index - 1 +// } +// return NULL +//} +// +///** +// * Get the root object. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_root_node(document *yaml_document_t) +//{ +// assert(document) // Non-NULL document object is expected. +// +// if (document.nodes.top != document.nodes.start) { +// return document.nodes.start +// } +// return NULL +//} +// +///* +// * Add a scalar node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_scalar(document *yaml_document_t, +// tag *yaml_char_t, value *yaml_char_t, length int, +// style yaml_scalar_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// value_copy *yaml_char_t = NULL +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// assert(value) // Non-NULL value is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SCALAR_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (length < 0) { +// length = strlen((char *)value) +// } +// +// if (!yaml_check_utf8(value, length)) goto error +// value_copy = yaml_malloc(length+1) +// if (!value_copy) goto error +// memcpy(value_copy, value, length) +// value_copy[length] = '\0' +// +// SCALAR_NODE_INIT(node, tag_copy, value_copy, length, style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// yaml_free(tag_copy) +// yaml_free(value_copy) +// +// return 0 +//} +// +///* +// * Add a sequence node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_sequence(document *yaml_document_t, +// tag *yaml_char_t, style yaml_sequence_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// struct { +// start *yaml_node_item_t +// end *yaml_node_item_t +// top *yaml_node_item_t +// } items = { NULL, NULL, NULL } +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SEQUENCE_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (!STACK_INIT(&context, items, INITIAL_STACK_SIZE)) goto error +// +// SEQUENCE_NODE_INIT(node, tag_copy, items.start, items.end, +// style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// STACK_DEL(&context, items) +// yaml_free(tag_copy) +// +// return 0 +//} +// +///* +// * Add a mapping node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_mapping(document *yaml_document_t, +// tag *yaml_char_t, style yaml_mapping_style_t) +//{ +// struct { +// error yaml_error_type_t +// } context +// mark yaml_mark_t = { 0, 0, 0 } +// tag_copy *yaml_char_t = NULL +// struct { +// start *yaml_node_pair_t +// end *yaml_node_pair_t +// top *yaml_node_pair_t +// } pairs = { NULL, NULL, NULL } +// node yaml_node_t +// +// assert(document) // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_MAPPING_TAG +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error +// tag_copy = yaml_strdup(tag) +// if (!tag_copy) goto error +// +// if (!STACK_INIT(&context, pairs, INITIAL_STACK_SIZE)) goto error +// +// MAPPING_NODE_INIT(node, tag_copy, pairs.start, pairs.end, +// style, mark, mark) +// if (!PUSH(&context, document.nodes, node)) goto error +// +// return document.nodes.top - document.nodes.start +// +//error: +// STACK_DEL(&context, pairs) +// yaml_free(tag_copy) +// +// return 0 +//} +// +///* +// * Append an item to a sequence node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_sequence_item(document *yaml_document_t, +// sequence int, item int) +//{ +// struct { +// error yaml_error_type_t +// } context +// +// assert(document) // Non-NULL document is required. +// assert(sequence > 0 +// && document.nodes.start + sequence <= document.nodes.top) +// // Valid sequence id is required. +// assert(document.nodes.start[sequence-1].type == YAML_SEQUENCE_NODE) +// // A sequence node is required. +// assert(item > 0 && document.nodes.start + item <= document.nodes.top) +// // Valid item id is required. +// +// if (!PUSH(&context, +// document.nodes.start[sequence-1].data.sequence.items, item)) +// return 0 +// +// return 1 +//} +// +///* +// * Append a pair of a key and a value to a mapping node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_mapping_pair(document *yaml_document_t, +// mapping int, key int, value int) +//{ +// struct { +// error yaml_error_type_t +// } context +// +// pair yaml_node_pair_t +// +// assert(document) // Non-NULL document is required. +// assert(mapping > 0 +// && document.nodes.start + mapping <= document.nodes.top) +// // Valid mapping id is required. +// assert(document.nodes.start[mapping-1].type == YAML_MAPPING_NODE) +// // A mapping node is required. +// assert(key > 0 && document.nodes.start + key <= document.nodes.top) +// // Valid key id is required. +// assert(value > 0 && document.nodes.start + value <= document.nodes.top) +// // Valid value id is required. +// +// pair.key = key +// pair.value = value +// +// if (!PUSH(&context, +// document.nodes.start[mapping-1].data.mapping.pairs, pair)) +// return 0 +// +// return 1 +//} +// +// diff --git a/goyaml.v3/decode.go b/goyaml.v3/decode.go new file mode 100644 index 0000000..0173b69 --- /dev/null +++ b/goyaml.v3/decode.go @@ -0,0 +1,1000 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml + +import ( + "encoding" + "encoding/base64" + "fmt" + "io" + "math" + "reflect" + "strconv" + "time" +) + +// ---------------------------------------------------------------------------- +// Parser, produces a node tree out of a libyaml event stream. + +type parser struct { + parser yaml_parser_t + event yaml_event_t + doc *Node + anchors map[string]*Node + doneInit bool + textless bool +} + +func newParser(b []byte) *parser { + p := parser{} + if !yaml_parser_initialize(&p.parser) { + panic("failed to initialize YAML emitter") + } + if len(b) == 0 { + b = []byte{'\n'} + } + yaml_parser_set_input_string(&p.parser, b) + return &p +} + +func newParserFromReader(r io.Reader) *parser { + p := parser{} + if !yaml_parser_initialize(&p.parser) { + panic("failed to initialize YAML emitter") + } + yaml_parser_set_input_reader(&p.parser, r) + return &p +} + +func (p *parser) init() { + if p.doneInit { + return + } + p.anchors = make(map[string]*Node) + p.expect(yaml_STREAM_START_EVENT) + p.doneInit = true +} + +func (p *parser) destroy() { + if p.event.typ != yaml_NO_EVENT { + yaml_event_delete(&p.event) + } + yaml_parser_delete(&p.parser) +} + +// expect consumes an event from the event stream and +// checks that it's of the expected type. +func (p *parser) expect(e yaml_event_type_t) { + if p.event.typ == yaml_NO_EVENT { + if !yaml_parser_parse(&p.parser, &p.event) { + p.fail() + } + } + if p.event.typ == yaml_STREAM_END_EVENT { + failf("attempted to go past the end of stream; corrupted value?") + } + if p.event.typ != e { + p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) + p.fail() + } + yaml_event_delete(&p.event) + p.event.typ = yaml_NO_EVENT +} + +// peek peeks at the next event in the event stream, +// puts the results into p.event and returns the event type. +func (p *parser) peek() yaml_event_type_t { + if p.event.typ != yaml_NO_EVENT { + return p.event.typ + } + // It's curious choice from the underlying API to generally return a + // positive result on success, but on this case return true in an error + // scenario. This was the source of bugs in the past (issue #666). + if !yaml_parser_parse(&p.parser, &p.event) || p.parser.error != yaml_NO_ERROR { + p.fail() + } + return p.event.typ +} + +func (p *parser) fail() { + var where string + var line int + if p.parser.context_mark.line != 0 { + line = p.parser.context_mark.line + // Scanner errors don't iterate line before returning error + if p.parser.error == yaml_SCANNER_ERROR { + line++ + } + } else if p.parser.problem_mark.line != 0 { + line = p.parser.problem_mark.line + // Scanner errors don't iterate line before returning error + if p.parser.error == yaml_SCANNER_ERROR { + line++ + } + } + if line != 0 { + where = "line " + strconv.Itoa(line) + ": " + } + var msg string + if len(p.parser.problem) > 0 { + msg = p.parser.problem + } else { + msg = "unknown problem parsing YAML content" + } + failf("%s%s", where, msg) +} + +func (p *parser) anchor(n *Node, anchor []byte) { + if anchor != nil { + n.Anchor = string(anchor) + p.anchors[n.Anchor] = n + } +} + +func (p *parser) parse() *Node { + p.init() + switch p.peek() { + case yaml_SCALAR_EVENT: + return p.scalar() + case yaml_ALIAS_EVENT: + return p.alias() + case yaml_MAPPING_START_EVENT: + return p.mapping() + case yaml_SEQUENCE_START_EVENT: + return p.sequence() + case yaml_DOCUMENT_START_EVENT: + return p.document() + case yaml_STREAM_END_EVENT: + // Happens when attempting to decode an empty buffer. + return nil + case yaml_TAIL_COMMENT_EVENT: + panic("internal error: unexpected tail comment event (please report)") + default: + panic("internal error: attempted to parse unknown event (please report): " + p.event.typ.String()) + } +} + +func (p *parser) node(kind Kind, defaultTag, tag, value string) *Node { + var style Style + if tag != "" && tag != "!" { + tag = shortTag(tag) + style = TaggedStyle + } else if defaultTag != "" { + tag = defaultTag + } else if kind == ScalarNode { + tag, _ = resolve("", value) + } + n := &Node{ + Kind: kind, + Tag: tag, + Value: value, + Style: style, + } + if !p.textless { + n.Line = p.event.start_mark.line + 1 + n.Column = p.event.start_mark.column + 1 + n.HeadComment = string(p.event.head_comment) + n.LineComment = string(p.event.line_comment) + n.FootComment = string(p.event.foot_comment) + } + return n +} + +func (p *parser) parseChild(parent *Node) *Node { + child := p.parse() + parent.Content = append(parent.Content, child) + return child +} + +func (p *parser) document() *Node { + n := p.node(DocumentNode, "", "", "") + p.doc = n + p.expect(yaml_DOCUMENT_START_EVENT) + p.parseChild(n) + if p.peek() == yaml_DOCUMENT_END_EVENT { + n.FootComment = string(p.event.foot_comment) + } + p.expect(yaml_DOCUMENT_END_EVENT) + return n +} + +func (p *parser) alias() *Node { + n := p.node(AliasNode, "", "", string(p.event.anchor)) + n.Alias = p.anchors[n.Value] + if n.Alias == nil { + failf("unknown anchor '%s' referenced", n.Value) + } + p.expect(yaml_ALIAS_EVENT) + return n +} + +func (p *parser) scalar() *Node { + var parsedStyle = p.event.scalar_style() + var nodeStyle Style + switch { + case parsedStyle&yaml_DOUBLE_QUOTED_SCALAR_STYLE != 0: + nodeStyle = DoubleQuotedStyle + case parsedStyle&yaml_SINGLE_QUOTED_SCALAR_STYLE != 0: + nodeStyle = SingleQuotedStyle + case parsedStyle&yaml_LITERAL_SCALAR_STYLE != 0: + nodeStyle = LiteralStyle + case parsedStyle&yaml_FOLDED_SCALAR_STYLE != 0: + nodeStyle = FoldedStyle + } + var nodeValue = string(p.event.value) + var nodeTag = string(p.event.tag) + var defaultTag string + if nodeStyle == 0 { + if nodeValue == "<<" { + defaultTag = mergeTag + } + } else { + defaultTag = strTag + } + n := p.node(ScalarNode, defaultTag, nodeTag, nodeValue) + n.Style |= nodeStyle + p.anchor(n, p.event.anchor) + p.expect(yaml_SCALAR_EVENT) + return n +} + +func (p *parser) sequence() *Node { + n := p.node(SequenceNode, seqTag, string(p.event.tag), "") + if p.event.sequence_style()&yaml_FLOW_SEQUENCE_STYLE != 0 { + n.Style |= FlowStyle + } + p.anchor(n, p.event.anchor) + p.expect(yaml_SEQUENCE_START_EVENT) + for p.peek() != yaml_SEQUENCE_END_EVENT { + p.parseChild(n) + } + n.LineComment = string(p.event.line_comment) + n.FootComment = string(p.event.foot_comment) + p.expect(yaml_SEQUENCE_END_EVENT) + return n +} + +func (p *parser) mapping() *Node { + n := p.node(MappingNode, mapTag, string(p.event.tag), "") + block := true + if p.event.mapping_style()&yaml_FLOW_MAPPING_STYLE != 0 { + block = false + n.Style |= FlowStyle + } + p.anchor(n, p.event.anchor) + p.expect(yaml_MAPPING_START_EVENT) + for p.peek() != yaml_MAPPING_END_EVENT { + k := p.parseChild(n) + if block && k.FootComment != "" { + // Must be a foot comment for the prior value when being dedented. + if len(n.Content) > 2 { + n.Content[len(n.Content)-3].FootComment = k.FootComment + k.FootComment = "" + } + } + v := p.parseChild(n) + if k.FootComment == "" && v.FootComment != "" { + k.FootComment = v.FootComment + v.FootComment = "" + } + if p.peek() == yaml_TAIL_COMMENT_EVENT { + if k.FootComment == "" { + k.FootComment = string(p.event.foot_comment) + } + p.expect(yaml_TAIL_COMMENT_EVENT) + } + } + n.LineComment = string(p.event.line_comment) + n.FootComment = string(p.event.foot_comment) + if n.Style&FlowStyle == 0 && n.FootComment != "" && len(n.Content) > 1 { + n.Content[len(n.Content)-2].FootComment = n.FootComment + n.FootComment = "" + } + p.expect(yaml_MAPPING_END_EVENT) + return n +} + +// ---------------------------------------------------------------------------- +// Decoder, unmarshals a node into a provided value. + +type decoder struct { + doc *Node + aliases map[*Node]bool + terrors []string + + stringMapType reflect.Type + generalMapType reflect.Type + + knownFields bool + uniqueKeys bool + decodeCount int + aliasCount int + aliasDepth int + + mergedFields map[interface{}]bool +} + +var ( + nodeType = reflect.TypeOf(Node{}) + durationType = reflect.TypeOf(time.Duration(0)) + stringMapType = reflect.TypeOf(map[string]interface{}{}) + generalMapType = reflect.TypeOf(map[interface{}]interface{}{}) + ifaceType = generalMapType.Elem() + timeType = reflect.TypeOf(time.Time{}) + ptrTimeType = reflect.TypeOf(&time.Time{}) +) + +func newDecoder() *decoder { + d := &decoder{ + stringMapType: stringMapType, + generalMapType: generalMapType, + uniqueKeys: true, + } + d.aliases = make(map[*Node]bool) + return d +} + +func (d *decoder) terror(n *Node, tag string, out reflect.Value) { + if n.Tag != "" { + tag = n.Tag + } + value := n.Value + if tag != seqTag && tag != mapTag { + if len(value) > 10 { + value = " `" + value[:7] + "...`" + } else { + value = " `" + value + "`" + } + } + d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.Line, shortTag(tag), value, out.Type())) +} + +func (d *decoder) callUnmarshaler(n *Node, u Unmarshaler) (good bool) { + err := u.UnmarshalYAML(n) + if e, ok := err.(*TypeError); ok { + d.terrors = append(d.terrors, e.Errors...) + return false + } + if err != nil { + fail(err) + } + return true +} + +func (d *decoder) callObsoleteUnmarshaler(n *Node, u obsoleteUnmarshaler) (good bool) { + terrlen := len(d.terrors) + err := u.UnmarshalYAML(func(v interface{}) (err error) { + defer handleErr(&err) + d.unmarshal(n, reflect.ValueOf(v)) + if len(d.terrors) > terrlen { + issues := d.terrors[terrlen:] + d.terrors = d.terrors[:terrlen] + return &TypeError{issues} + } + return nil + }) + if e, ok := err.(*TypeError); ok { + d.terrors = append(d.terrors, e.Errors...) + return false + } + if err != nil { + fail(err) + } + return true +} + +// d.prepare initializes and dereferences pointers and calls UnmarshalYAML +// if a value is found to implement it. +// It returns the initialized and dereferenced out value, whether +// unmarshalling was already done by UnmarshalYAML, and if so whether +// its types unmarshalled appropriately. +// +// If n holds a null value, prepare returns before doing anything. +func (d *decoder) prepare(n *Node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) { + if n.ShortTag() == nullTag { + return out, false, false + } + again := true + for again { + again = false + if out.Kind() == reflect.Ptr { + if out.IsNil() { + out.Set(reflect.New(out.Type().Elem())) + } + out = out.Elem() + again = true + } + if out.CanAddr() { + outi := out.Addr().Interface() + if u, ok := outi.(Unmarshaler); ok { + good = d.callUnmarshaler(n, u) + return out, true, good + } + if u, ok := outi.(obsoleteUnmarshaler); ok { + good = d.callObsoleteUnmarshaler(n, u) + return out, true, good + } + } + } + return out, false, false +} + +func (d *decoder) fieldByIndex(n *Node, v reflect.Value, index []int) (field reflect.Value) { + if n.ShortTag() == nullTag { + return reflect.Value{} + } + for _, num := range index { + for { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + continue + } + break + } + v = v.Field(num) + } + return v +} + +const ( + // 400,000 decode operations is ~500kb of dense object declarations, or + // ~5kb of dense object declarations with 10000% alias expansion + alias_ratio_range_low = 400000 + + // 4,000,000 decode operations is ~5MB of dense object declarations, or + // ~4.5MB of dense object declarations with 10% alias expansion + alias_ratio_range_high = 4000000 + + // alias_ratio_range is the range over which we scale allowed alias ratios + alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low) +) + +func allowedAliasRatio(decodeCount int) float64 { + switch { + case decodeCount <= alias_ratio_range_low: + // allow 99% to come from alias expansion for small-to-medium documents + return 0.99 + case decodeCount >= alias_ratio_range_high: + // allow 10% to come from alias expansion for very large documents + return 0.10 + default: + // scale smoothly from 99% down to 10% over the range. + // this maps to 396,000 - 400,000 allowed alias-driven decodes over the range. + // 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps). + return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range) + } +} + +func (d *decoder) unmarshal(n *Node, out reflect.Value) (good bool) { + d.decodeCount++ + if d.aliasDepth > 0 { + d.aliasCount++ + } + if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) { + failf("document contains excessive aliasing") + } + if out.Type() == nodeType { + out.Set(reflect.ValueOf(n).Elem()) + return true + } + switch n.Kind { + case DocumentNode: + return d.document(n, out) + case AliasNode: + return d.alias(n, out) + } + out, unmarshaled, good := d.prepare(n, out) + if unmarshaled { + return good + } + switch n.Kind { + case ScalarNode: + good = d.scalar(n, out) + case MappingNode: + good = d.mapping(n, out) + case SequenceNode: + good = d.sequence(n, out) + case 0: + if n.IsZero() { + return d.null(out) + } + fallthrough + default: + failf("cannot decode node with unknown kind %d", n.Kind) + } + return good +} + +func (d *decoder) document(n *Node, out reflect.Value) (good bool) { + if len(n.Content) == 1 { + d.doc = n + d.unmarshal(n.Content[0], out) + return true + } + return false +} + +func (d *decoder) alias(n *Node, out reflect.Value) (good bool) { + if d.aliases[n] { + // TODO this could actually be allowed in some circumstances. + failf("anchor '%s' value contains itself", n.Value) + } + d.aliases[n] = true + d.aliasDepth++ + good = d.unmarshal(n.Alias, out) + d.aliasDepth-- + delete(d.aliases, n) + return good +} + +var zeroValue reflect.Value + +func resetMap(out reflect.Value) { + for _, k := range out.MapKeys() { + out.SetMapIndex(k, zeroValue) + } +} + +func (d *decoder) null(out reflect.Value) bool { + if out.CanAddr() { + switch out.Kind() { + case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice: + out.Set(reflect.Zero(out.Type())) + return true + } + } + return false +} + +func (d *decoder) scalar(n *Node, out reflect.Value) bool { + var tag string + var resolved interface{} + if n.indicatedString() { + tag = strTag + resolved = n.Value + } else { + tag, resolved = resolve(n.Tag, n.Value) + if tag == binaryTag { + data, err := base64.StdEncoding.DecodeString(resolved.(string)) + if err != nil { + failf("!!binary value contains invalid base64 data") + } + resolved = string(data) + } + } + if resolved == nil { + return d.null(out) + } + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + // We've resolved to exactly the type we want, so use that. + out.Set(resolvedv) + return true + } + // Perhaps we can use the value as a TextUnmarshaler to + // set its value. + if out.CanAddr() { + u, ok := out.Addr().Interface().(encoding.TextUnmarshaler) + if ok { + var text []byte + if tag == binaryTag { + text = []byte(resolved.(string)) + } else { + // We let any value be unmarshaled into TextUnmarshaler. + // That might be more lax than we'd like, but the + // TextUnmarshaler itself should bowl out any dubious values. + text = []byte(n.Value) + } + err := u.UnmarshalText(text) + if err != nil { + fail(err) + } + return true + } + } + switch out.Kind() { + case reflect.String: + if tag == binaryTag { + out.SetString(resolved.(string)) + return true + } + out.SetString(n.Value) + return true + case reflect.Interface: + out.Set(reflect.ValueOf(resolved)) + return true + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + // This used to work in v2, but it's very unfriendly. + isDuration := out.Type() == durationType + + switch resolved := resolved.(type) { + case int: + if !isDuration && !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case int64: + if !isDuration && !out.OverflowInt(resolved) { + out.SetInt(resolved) + return true + } + case uint64: + if !isDuration && resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case float64: + if !isDuration && resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { + out.SetInt(int64(resolved)) + return true + } + case string: + if out.Type() == durationType { + d, err := time.ParseDuration(resolved) + if err == nil { + out.SetInt(int64(d)) + return true + } + } + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + switch resolved := resolved.(type) { + case int: + if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case int64: + if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case uint64: + if !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + case float64: + if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) { + out.SetUint(uint64(resolved)) + return true + } + } + case reflect.Bool: + switch resolved := resolved.(type) { + case bool: + out.SetBool(resolved) + return true + case string: + // This offers some compatibility with the 1.1 spec (https://yaml.org/type/bool.html). + // It only works if explicitly attempting to unmarshal into a typed bool value. + switch resolved { + case "y", "Y", "yes", "Yes", "YES", "on", "On", "ON": + out.SetBool(true) + return true + case "n", "N", "no", "No", "NO", "off", "Off", "OFF": + out.SetBool(false) + return true + } + } + case reflect.Float32, reflect.Float64: + switch resolved := resolved.(type) { + case int: + out.SetFloat(float64(resolved)) + return true + case int64: + out.SetFloat(float64(resolved)) + return true + case uint64: + out.SetFloat(float64(resolved)) + return true + case float64: + out.SetFloat(resolved) + return true + } + case reflect.Struct: + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + out.Set(resolvedv) + return true + } + case reflect.Ptr: + panic("yaml internal error: please report the issue") + } + d.terror(n, tag, out) + return false +} + +func settableValueOf(i interface{}) reflect.Value { + v := reflect.ValueOf(i) + sv := reflect.New(v.Type()).Elem() + sv.Set(v) + return sv +} + +func (d *decoder) sequence(n *Node, out reflect.Value) (good bool) { + l := len(n.Content) + + var iface reflect.Value + switch out.Kind() { + case reflect.Slice: + out.Set(reflect.MakeSlice(out.Type(), l, l)) + case reflect.Array: + if l != out.Len() { + failf("invalid array: want %d elements but got %d", out.Len(), l) + } + case reflect.Interface: + // No type hints. Will have to use a generic sequence. + iface = out + out = settableValueOf(make([]interface{}, l)) + default: + d.terror(n, seqTag, out) + return false + } + et := out.Type().Elem() + + j := 0 + for i := 0; i < l; i++ { + e := reflect.New(et).Elem() + if ok := d.unmarshal(n.Content[i], e); ok { + out.Index(j).Set(e) + j++ + } + } + if out.Kind() != reflect.Array { + out.Set(out.Slice(0, j)) + } + if iface.IsValid() { + iface.Set(out) + } + return true +} + +func (d *decoder) mapping(n *Node, out reflect.Value) (good bool) { + l := len(n.Content) + if d.uniqueKeys { + nerrs := len(d.terrors) + for i := 0; i < l; i += 2 { + ni := n.Content[i] + for j := i + 2; j < l; j += 2 { + nj := n.Content[j] + if ni.Kind == nj.Kind && ni.Value == nj.Value { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: mapping key %#v already defined at line %d", nj.Line, nj.Value, ni.Line)) + } + } + } + if len(d.terrors) > nerrs { + return false + } + } + switch out.Kind() { + case reflect.Struct: + return d.mappingStruct(n, out) + case reflect.Map: + // okay + case reflect.Interface: + iface := out + if isStringMap(n) { + out = reflect.MakeMap(d.stringMapType) + } else { + out = reflect.MakeMap(d.generalMapType) + } + iface.Set(out) + default: + d.terror(n, mapTag, out) + return false + } + + outt := out.Type() + kt := outt.Key() + et := outt.Elem() + + stringMapType := d.stringMapType + generalMapType := d.generalMapType + if outt.Elem() == ifaceType { + if outt.Key().Kind() == reflect.String { + d.stringMapType = outt + } else if outt.Key() == ifaceType { + d.generalMapType = outt + } + } + + mergedFields := d.mergedFields + d.mergedFields = nil + + var mergeNode *Node + + mapIsNew := false + if out.IsNil() { + out.Set(reflect.MakeMap(outt)) + mapIsNew = true + } + for i := 0; i < l; i += 2 { + if isMerge(n.Content[i]) { + mergeNode = n.Content[i+1] + continue + } + k := reflect.New(kt).Elem() + if d.unmarshal(n.Content[i], k) { + if mergedFields != nil { + ki := k.Interface() + if mergedFields[ki] { + continue + } + mergedFields[ki] = true + } + kkind := k.Kind() + if kkind == reflect.Interface { + kkind = k.Elem().Kind() + } + if kkind == reflect.Map || kkind == reflect.Slice { + failf("invalid map key: %#v", k.Interface()) + } + e := reflect.New(et).Elem() + if d.unmarshal(n.Content[i+1], e) || n.Content[i+1].ShortTag() == nullTag && (mapIsNew || !out.MapIndex(k).IsValid()) { + out.SetMapIndex(k, e) + } + } + } + + d.mergedFields = mergedFields + if mergeNode != nil { + d.merge(n, mergeNode, out) + } + + d.stringMapType = stringMapType + d.generalMapType = generalMapType + return true +} + +func isStringMap(n *Node) bool { + if n.Kind != MappingNode { + return false + } + l := len(n.Content) + for i := 0; i < l; i += 2 { + shortTag := n.Content[i].ShortTag() + if shortTag != strTag && shortTag != mergeTag { + return false + } + } + return true +} + +func (d *decoder) mappingStruct(n *Node, out reflect.Value) (good bool) { + sinfo, err := getStructInfo(out.Type()) + if err != nil { + panic(err) + } + + var inlineMap reflect.Value + var elemType reflect.Type + if sinfo.InlineMap != -1 { + inlineMap = out.Field(sinfo.InlineMap) + elemType = inlineMap.Type().Elem() + } + + for _, index := range sinfo.InlineUnmarshalers { + field := d.fieldByIndex(n, out, index) + d.prepare(n, field) + } + + mergedFields := d.mergedFields + d.mergedFields = nil + var mergeNode *Node + var doneFields []bool + if d.uniqueKeys { + doneFields = make([]bool, len(sinfo.FieldsList)) + } + name := settableValueOf("") + l := len(n.Content) + for i := 0; i < l; i += 2 { + ni := n.Content[i] + if isMerge(ni) { + mergeNode = n.Content[i+1] + continue + } + if !d.unmarshal(ni, name) { + continue + } + sname := name.String() + if mergedFields != nil { + if mergedFields[sname] { + continue + } + mergedFields[sname] = true + } + if info, ok := sinfo.FieldsMap[sname]; ok { + if d.uniqueKeys { + if doneFields[info.Id] { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.Line, name.String(), out.Type())) + continue + } + doneFields[info.Id] = true + } + var field reflect.Value + if info.Inline == nil { + field = out.Field(info.Num) + } else { + field = d.fieldByIndex(n, out, info.Inline) + } + d.unmarshal(n.Content[i+1], field) + } else if sinfo.InlineMap != -1 { + if inlineMap.IsNil() { + inlineMap.Set(reflect.MakeMap(inlineMap.Type())) + } + value := reflect.New(elemType).Elem() + d.unmarshal(n.Content[i+1], value) + inlineMap.SetMapIndex(name, value) + } else if d.knownFields { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.Line, name.String(), out.Type())) + } + } + + d.mergedFields = mergedFields + if mergeNode != nil { + d.merge(n, mergeNode, out) + } + return true +} + +func failWantMap() { + failf("map merge requires map or sequence of maps as the value") +} + +func (d *decoder) merge(parent *Node, merge *Node, out reflect.Value) { + mergedFields := d.mergedFields + if mergedFields == nil { + d.mergedFields = make(map[interface{}]bool) + for i := 0; i < len(parent.Content); i += 2 { + k := reflect.New(ifaceType).Elem() + if d.unmarshal(parent.Content[i], k) { + d.mergedFields[k.Interface()] = true + } + } + } + + switch merge.Kind { + case MappingNode: + d.unmarshal(merge, out) + case AliasNode: + if merge.Alias != nil && merge.Alias.Kind != MappingNode { + failWantMap() + } + d.unmarshal(merge, out) + case SequenceNode: + for i := 0; i < len(merge.Content); i++ { + ni := merge.Content[i] + if ni.Kind == AliasNode { + if ni.Alias != nil && ni.Alias.Kind != MappingNode { + failWantMap() + } + } else if ni.Kind != MappingNode { + failWantMap() + } + d.unmarshal(ni, out) + } + default: + failWantMap() + } + + d.mergedFields = mergedFields +} + +func isMerge(n *Node) bool { + return n.Kind == ScalarNode && n.Value == "<<" && (n.Tag == "" || n.Tag == "!" || shortTag(n.Tag) == mergeTag) +} diff --git a/goyaml.v3/decode_test.go b/goyaml.v3/decode_test.go new file mode 100644 index 0000000..1925c1b --- /dev/null +++ b/goyaml.v3/decode_test.go @@ -0,0 +1,1771 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml_test + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "reflect" + "strings" + "time" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v3" +) + +var unmarshalIntTest = 123 + +var unmarshalTests = []struct { + data string + value interface{} +}{ + { + "", + (*struct{})(nil), + }, + { + "{}", &struct{}{}, + }, { + "v: hi", + map[string]string{"v": "hi"}, + }, { + "v: hi", map[string]interface{}{"v": "hi"}, + }, { + "v: true", + map[string]string{"v": "true"}, + }, { + "v: true", + map[string]interface{}{"v": true}, + }, { + "v: 10", + map[string]interface{}{"v": 10}, + }, { + "v: 0b10", + map[string]interface{}{"v": 2}, + }, { + "v: 0xA", + map[string]interface{}{"v": 10}, + }, { + "v: 4294967296", + map[string]int64{"v": 4294967296}, + }, { + "v: 0.1", + map[string]interface{}{"v": 0.1}, + }, { + "v: .1", + map[string]interface{}{"v": 0.1}, + }, { + "v: .Inf", + map[string]interface{}{"v": math.Inf(+1)}, + }, { + "v: -.Inf", + map[string]interface{}{"v": math.Inf(-1)}, + }, { + "v: -10", + map[string]interface{}{"v": -10}, + }, { + "v: -.1", + map[string]interface{}{"v": -0.1}, + }, + + // Simple values. + { + "123", + &unmarshalIntTest, + }, + + // Floats from spec + { + "canonical: 6.8523e+5", + map[string]interface{}{"canonical": 6.8523e+5}, + }, { + "expo: 685.230_15e+03", + map[string]interface{}{"expo": 685.23015e+03}, + }, { + "fixed: 685_230.15", + map[string]interface{}{"fixed": 685230.15}, + }, { + "neginf: -.inf", + map[string]interface{}{"neginf": math.Inf(-1)}, + }, { + "fixed: 685_230.15", + map[string]float64{"fixed": 685230.15}, + }, + //{"sexa: 190:20:30.15", map[string]interface{}{"sexa": 0}}, // Unsupported + //{"notanum: .NaN", map[string]interface{}{"notanum": math.NaN()}}, // Equality of NaN fails. + + // Bools are per 1.2 spec. + { + "canonical: true", + map[string]interface{}{"canonical": true}, + }, { + "canonical: false", + map[string]interface{}{"canonical": false}, + }, { + "bool: True", + map[string]interface{}{"bool": true}, + }, { + "bool: False", + map[string]interface{}{"bool": false}, + }, { + "bool: TRUE", + map[string]interface{}{"bool": true}, + }, { + "bool: FALSE", + map[string]interface{}{"bool": false}, + }, + // For backwards compatibility with 1.1, decoding old strings into typed values still works. + { + "option: on", + map[string]bool{"option": true}, + }, { + "option: y", + map[string]bool{"option": true}, + }, { + "option: Off", + map[string]bool{"option": false}, + }, { + "option: No", + map[string]bool{"option": false}, + }, { + "option: other", + map[string]bool{}, + }, + // Ints from spec + { + "canonical: 685230", + map[string]interface{}{"canonical": 685230}, + }, { + "decimal: +685_230", + map[string]interface{}{"decimal": 685230}, + }, { + "octal: 02472256", + map[string]interface{}{"octal": 685230}, + }, { + "octal: -02472256", + map[string]interface{}{"octal": -685230}, + }, { + "octal: 0o2472256", + map[string]interface{}{"octal": 685230}, + }, { + "octal: -0o2472256", + map[string]interface{}{"octal": -685230}, + }, { + "hexa: 0x_0A_74_AE", + map[string]interface{}{"hexa": 685230}, + }, { + "bin: 0b1010_0111_0100_1010_1110", + map[string]interface{}{"bin": 685230}, + }, { + "bin: -0b101010", + map[string]interface{}{"bin": -42}, + }, { + "bin: -0b1000000000000000000000000000000000000000000000000000000000000000", + map[string]interface{}{"bin": -9223372036854775808}, + }, { + "decimal: +685_230", + map[string]int{"decimal": 685230}, + }, + + //{"sexa: 190:20:30", map[string]interface{}{"sexa": 0}}, // Unsupported + + // Nulls from spec + { + "empty:", + map[string]interface{}{"empty": nil}, + }, { + "canonical: ~", + map[string]interface{}{"canonical": nil}, + }, { + "english: null", + map[string]interface{}{"english": nil}, + }, { + "~: null key", + map[interface{}]string{nil: "null key"}, + }, { + "empty:", + map[string]*bool{"empty": nil}, + }, + + // Flow sequence + { + "seq: [A,B]", + map[string]interface{}{"seq": []interface{}{"A", "B"}}, + }, { + "seq: [A,B,C,]", + map[string][]string{"seq": []string{"A", "B", "C"}}, + }, { + "seq: [A,1,C]", + map[string][]string{"seq": []string{"A", "1", "C"}}, + }, { + "seq: [A,1,C]", + map[string][]int{"seq": []int{1}}, + }, { + "seq: [A,1,C]", + map[string]interface{}{"seq": []interface{}{"A", 1, "C"}}, + }, + // Block sequence + { + "seq:\n - A\n - B", + map[string]interface{}{"seq": []interface{}{"A", "B"}}, + }, { + "seq:\n - A\n - B\n - C", + map[string][]string{"seq": []string{"A", "B", "C"}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string][]string{"seq": []string{"A", "1", "C"}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string][]int{"seq": []int{1}}, + }, { + "seq:\n - A\n - 1\n - C", + map[string]interface{}{"seq": []interface{}{"A", 1, "C"}}, + }, + + // Literal block scalar + { + "scalar: | # Comment\n\n literal\n\n \ttext\n\n", + map[string]string{"scalar": "\nliteral\n\n\ttext\n"}, + }, + + // Folded block scalar + { + "scalar: > # Comment\n\n folded\n line\n \n next\n line\n * one\n * two\n\n last\n line\n\n", + map[string]string{"scalar": "\nfolded line\nnext line\n * one\n * two\n\nlast line\n"}, + }, + + // Map inside interface with no type hints. + { + "a: {b: c}", + map[interface{}]interface{}{"a": map[string]interface{}{"b": "c"}}, + }, + // Non-string map inside interface with no type hints. + { + "a: {b: c, 1: d}", + map[interface{}]interface{}{"a": map[interface{}]interface{}{"b": "c", 1: "d"}}, + }, + + // Structs and type conversions. + { + "hello: world", + &struct{ Hello string }{"world"}, + }, { + "a: {b: c}", + &struct{ A struct{ B string } }{struct{ B string }{"c"}}, + }, { + "a: {b: c}", + &struct{ A *struct{ B string } }{&struct{ B string }{"c"}}, + }, { + "a: 'null'", + &struct{ A *unmarshalerType }{&unmarshalerType{"null"}}, + }, { + "a: {b: c}", + &struct{ A map[string]string }{map[string]string{"b": "c"}}, + }, { + "a: {b: c}", + &struct{ A *map[string]string }{&map[string]string{"b": "c"}}, + }, { + "a:", + &struct{ A map[string]string }{}, + }, { + "a: 1", + &struct{ A int }{1}, + }, { + "a: 1", + &struct{ A float64 }{1}, + }, { + "a: 1.0", + &struct{ A int }{1}, + }, { + "a: 1.0", + &struct{ A uint }{1}, + }, { + "a: [1, 2]", + &struct{ A []int }{[]int{1, 2}}, + }, { + "a: [1, 2]", + &struct{ A [2]int }{[2]int{1, 2}}, + }, { + "a: 1", + &struct{ B int }{0}, + }, { + "a: 1", + &struct { + B int "a" + }{1}, + }, { + // Some limited backwards compatibility with the 1.1 spec. + "a: YES", + &struct{ A bool }{true}, + }, + + // Some cross type conversions + { + "v: 42", + map[string]uint{"v": 42}, + }, { + "v: -42", + map[string]uint{}, + }, { + "v: 4294967296", + map[string]uint64{"v": 4294967296}, + }, { + "v: -4294967296", + map[string]uint64{}, + }, + + // int + { + "int_max: 2147483647", + map[string]int{"int_max": math.MaxInt32}, + }, + { + "int_min: -2147483648", + map[string]int{"int_min": math.MinInt32}, + }, + { + "int_overflow: 9223372036854775808", // math.MaxInt64 + 1 + map[string]int{}, + }, + + // int64 + { + "int64_max: 9223372036854775807", + map[string]int64{"int64_max": math.MaxInt64}, + }, + { + "int64_max_base2: 0b111111111111111111111111111111111111111111111111111111111111111", + map[string]int64{"int64_max_base2": math.MaxInt64}, + }, + { + "int64_min: -9223372036854775808", + map[string]int64{"int64_min": math.MinInt64}, + }, + { + "int64_neg_base2: -0b111111111111111111111111111111111111111111111111111111111111111", + map[string]int64{"int64_neg_base2": -math.MaxInt64}, + }, + { + "int64_overflow: 9223372036854775808", // math.MaxInt64 + 1 + map[string]int64{}, + }, + + // uint + { + "uint_min: 0", + map[string]uint{"uint_min": 0}, + }, + { + "uint_max: 4294967295", + map[string]uint{"uint_max": math.MaxUint32}, + }, + { + "uint_underflow: -1", + map[string]uint{}, + }, + + // uint64 + { + "uint64_min: 0", + map[string]uint{"uint64_min": 0}, + }, + { + "uint64_max: 18446744073709551615", + map[string]uint64{"uint64_max": math.MaxUint64}, + }, + { + "uint64_max_base2: 0b1111111111111111111111111111111111111111111111111111111111111111", + map[string]uint64{"uint64_max_base2": math.MaxUint64}, + }, + { + "uint64_maxint64: 9223372036854775807", + map[string]uint64{"uint64_maxint64": math.MaxInt64}, + }, + { + "uint64_underflow: -1", + map[string]uint64{}, + }, + + // float32 + { + "float32_max: 3.40282346638528859811704183484516925440e+38", + map[string]float32{"float32_max": math.MaxFloat32}, + }, + { + "float32_nonzero: 1.401298464324817070923729583289916131280e-45", + map[string]float32{"float32_nonzero": math.SmallestNonzeroFloat32}, + }, + { + "float32_maxuint64: 18446744073709551615", + map[string]float32{"float32_maxuint64": float32(math.MaxUint64)}, + }, + { + "float32_maxuint64+1: 18446744073709551616", + map[string]float32{"float32_maxuint64+1": float32(math.MaxUint64 + 1)}, + }, + + // float64 + { + "float64_max: 1.797693134862315708145274237317043567981e+308", + map[string]float64{"float64_max": math.MaxFloat64}, + }, + { + "float64_nonzero: 4.940656458412465441765687928682213723651e-324", + map[string]float64{"float64_nonzero": math.SmallestNonzeroFloat64}, + }, + { + "float64_maxuint64: 18446744073709551615", + map[string]float64{"float64_maxuint64": float64(math.MaxUint64)}, + }, + { + "float64_maxuint64+1: 18446744073709551616", + map[string]float64{"float64_maxuint64+1": float64(math.MaxUint64 + 1)}, + }, + + // Overflow cases. + { + "v: 4294967297", + map[string]int32{}, + }, { + "v: 128", + map[string]int8{}, + }, + + // Quoted values. + { + "'1': '\"2\"'", + map[interface{}]interface{}{"1": "\"2\""}, + }, { + "v:\n- A\n- 'B\n\n C'\n", + map[string][]string{"v": []string{"A", "B\nC"}}, + }, + + // Explicit tags. + { + "v: !!float '1.1'", + map[string]interface{}{"v": 1.1}, + }, { + "v: !!float 0", + map[string]interface{}{"v": float64(0)}, + }, { + "v: !!float -1", + map[string]interface{}{"v": float64(-1)}, + }, { + "v: !!null ''", + map[string]interface{}{"v": nil}, + }, { + "%TAG !y! tag:yaml.org,2002:\n---\nv: !y!int '1'", + map[string]interface{}{"v": 1}, + }, + + // Non-specific tag (Issue #75) + { + "v: ! test", + map[string]interface{}{"v": "test"}, + }, + + // Anchors and aliases. + { + "a: &x 1\nb: &y 2\nc: *x\nd: *y\n", + &struct{ A, B, C, D int }{1, 2, 1, 2}, + }, { + "a: &a {c: 1}\nb: *a", + &struct { + A, B struct { + C int + } + }{struct{ C int }{1}, struct{ C int }{1}}, + }, { + "a: &a [1, 2]\nb: *a", + &struct{ B []int }{[]int{1, 2}}, + }, + + // Bug #1133337 + { + "foo: ''", + map[string]*string{"foo": new(string)}, + }, { + "foo: null", + map[string]*string{"foo": nil}, + }, { + "foo: null", + map[string]string{"foo": ""}, + }, { + "foo: null", + map[string]interface{}{"foo": nil}, + }, + + // Support for ~ + { + "foo: ~", + map[string]*string{"foo": nil}, + }, { + "foo: ~", + map[string]string{"foo": ""}, + }, { + "foo: ~", + map[string]interface{}{"foo": nil}, + }, + + // Ignored field + { + "a: 1\nb: 2\n", + &struct { + A int + B int "-" + }{1, 0}, + }, + + // Bug #1191981 + { + "" + + "%YAML 1.1\n" + + "--- !!str\n" + + `"Generic line break (no glyph)\n\` + "\n" + + ` Generic line break (glyphed)\n\` + "\n" + + ` Line separator\u2028\` + "\n" + + ` Paragraph separator\u2029"` + "\n", + "" + + "Generic line break (no glyph)\n" + + "Generic line break (glyphed)\n" + + "Line separator\u2028Paragraph separator\u2029", + }, + + // Struct inlining + { + "a: 1\nb: 2\nc: 3\n", + &struct { + A int + C inlineB `yaml:",inline"` + }{1, inlineB{2, inlineC{3}}}, + }, + + // Struct inlining as a pointer. + { + "a: 1\nb: 2\nc: 3\n", + &struct { + A int + C *inlineB `yaml:",inline"` + }{1, &inlineB{2, inlineC{3}}}, + }, { + "a: 1\n", + &struct { + A int + C *inlineB `yaml:",inline"` + }{1, nil}, + }, { + "a: 1\nc: 3\nd: 4\n", + &struct { + A int + C *inlineD `yaml:",inline"` + }{1, &inlineD{&inlineC{3}, 4}}, + }, + + // Map inlining + { + "a: 1\nb: 2\nc: 3\n", + &struct { + A int + C map[string]int `yaml:",inline"` + }{1, map[string]int{"b": 2, "c": 3}}, + }, + + // bug 1243827 + { + "a: -b_c", + map[string]interface{}{"a": "-b_c"}, + }, + { + "a: +b_c", + map[string]interface{}{"a": "+b_c"}, + }, + { + "a: 50cent_of_dollar", + map[string]interface{}{"a": "50cent_of_dollar"}, + }, + + // issue #295 (allow scalars with colons in flow mappings and sequences) + { + "a: {b: https://github.com/go-yaml/yaml}", + map[string]interface{}{"a": map[string]interface{}{ + "b": "https://github.com/go-yaml/yaml", + }}, + }, + { + "a: [https://github.com/go-yaml/yaml]", + map[string]interface{}{"a": []interface{}{"https://github.com/go-yaml/yaml"}}, + }, + + // Duration + { + "a: 3s", + map[string]time.Duration{"a": 3 * time.Second}, + }, + + // Issue #24. + { + "a: ", + map[string]string{"a": ""}, + }, + + // Base 60 floats are obsolete and unsupported. + { + "a: 1:1\n", + map[string]string{"a": "1:1"}, + }, + + // Binary data. + { + "a: !!binary gIGC\n", + map[string]string{"a": "\x80\x81\x82"}, + }, { + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + map[string]string{"a": strings.Repeat("\x90", 54)}, + }, { + "a: !!binary |\n " + strings.Repeat("A", 70) + "\n ==\n", + map[string]string{"a": strings.Repeat("\x00", 52)}, + }, + + // Issue #39. + { + "a:\n b:\n c: d\n", + map[string]struct{ B interface{} }{"a": {map[string]interface{}{"c": "d"}}}, + }, + + // Custom map type. + { + "a: {b: c}", + M{"a": M{"b": "c"}}, + }, + + // Support encoding.TextUnmarshaler. + { + "a: 1.2.3.4\n", + map[string]textUnmarshaler{"a": textUnmarshaler{S: "1.2.3.4"}}, + }, + { + "a: 2015-02-24T18:19:39Z\n", + map[string]textUnmarshaler{"a": textUnmarshaler{"2015-02-24T18:19:39Z"}}, + }, + + // Timestamps + { + // Date only. + "a: 2015-01-01\n", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // RFC3339 + "a: 2015-02-24T18:19:39.12Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, .12e9, time.UTC)}, + }, + { + // RFC3339 with short dates. + "a: 2015-2-3T3:4:5Z", + map[string]time.Time{"a": time.Date(2015, 2, 3, 3, 4, 5, 0, time.UTC)}, + }, + { + // ISO8601 lower case t + "a: 2015-02-24t18:19:39Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + { + // space separate, no time zone + "a: 2015-02-24 18:19:39\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + // Some cases not currently handled. Uncomment these when + // the code is fixed. + // { + // // space separated with time zone + // "a: 2001-12-14 21:59:43.10 -5", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + // { + // // arbitrary whitespace between fields + // "a: 2001-12-14 \t\t \t21:59:43.10 \t Z", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + { + // explicit string tag + "a: !!str 2015-01-01", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag on quoted string + "a: !!timestamp \"2015-01-01\"", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // explicit timestamp tag on unquoted string + "a: !!timestamp 2015-01-01", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // quoted string that's a valid timestamp + "a: \"2015-01-01\"", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag into interface. + "a: !!timestamp \"2015-01-01\"", + map[string]interface{}{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // implicit timestamp tag into interface. + "a: 2015-01-01", + map[string]interface{}{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + + // Encode empty lists as zero-length slices. + { + "a: []", + &struct{ A []int }{[]int{}}, + }, + + // UTF-16-LE + { + "\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00", + M{"ñoño": "very yes"}, + }, + // UTF-16-LE with surrogate. + { + "\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00 \x00=\xd8\xd4\xdf\n\x00", + M{"ñoño": "very yes 🟔"}, + }, + + // UTF-16-BE + { + "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n", + M{"ñoño": "very yes"}, + }, + // UTF-16-BE with surrogate. + { + "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00 \xd8=\xdf\xd4\x00\n", + M{"ñoño": "very yes 🟔"}, + }, + + // This *is* in fact a float number, per the spec. #171 was a mistake. + { + "a: 123456e1\n", + M{"a": 123456e1}, + }, { + "a: 123456E1\n", + M{"a": 123456e1}, + }, + // yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes + { + "First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n", + map[string]interface{}{ + "First occurrence": "Foo", + "Second occurrence": "Foo", + "Override anchor": "Bar", + "Reuse anchor": "Bar", + }, + }, + // Single document with garbage following it. + { + "---\nhello\n...\n}not yaml", + "hello", + }, + + // Comment scan exhausting the input buffer (issue #469). + { + "true\n#" + strings.Repeat(" ", 512*3), + "true", + }, { + "true #" + strings.Repeat(" ", 512*3), + "true", + }, + + // CRLF + { + "a: b\r\nc:\r\n- d\r\n- e\r\n", + map[string]interface{}{ + "a": "b", + "c": []interface{}{"d", "e"}, + }, + }, +} + +type M map[string]interface{} + +type inlineB struct { + B int + inlineC `yaml:",inline"` +} + +type inlineC struct { + C int +} + +type inlineD struct { + C *inlineC `yaml:",inline"` + D int +} + +func (s *S) TestUnmarshal(c *C) { + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + if _, ok := err.(*yaml.TypeError); !ok { + c.Assert(err, IsNil) + } + c.Assert(value.Elem().Interface(), DeepEquals, item.value, Commentf("error: %v", err)) + } +} + +func (s *S) TestUnmarshalFullTimestamp(c *C) { + // Full timestamp in same format as encoded. This is confirmed to be + // properly decoded by Python as a timestamp as well. + var str = "2015-02-24T18:19:39.123456789-03:00" + var t interface{} + err := yaml.Unmarshal([]byte(str), &t) + c.Assert(err, IsNil) + c.Assert(t, Equals, time.Date(2015, 2, 24, 18, 19, 39, 123456789, t.(time.Time).Location())) + c.Assert(t.(time.Time).In(time.UTC), Equals, time.Date(2015, 2, 24, 21, 19, 39, 123456789, time.UTC)) +} + +func (s *S) TestDecoderSingleDocument(c *C) { + // Test that Decoder.Decode works as expected on + // all the unmarshal tests. + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) + if item.data == "" { + // Behaviour differs when there's no YAML. + continue + } + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(value.Interface()) + if _, ok := err.(*yaml.TypeError); !ok { + c.Assert(err, IsNil) + } + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + } +} + +var decoderTests = []struct { + data string + values []interface{} +}{{ + "", + nil, +}, { + "a: b", + []interface{}{ + map[string]interface{}{"a": "b"}, + }, +}, { + "---\na: b\n...\n", + []interface{}{ + map[string]interface{}{"a": "b"}, + }, +}, { + "---\n'hello'\n...\n---\ngoodbye\n...\n", + []interface{}{ + "hello", + "goodbye", + }, +}} + +func (s *S) TestDecoder(c *C) { + for i, item := range decoderTests { + c.Logf("test %d: %q", i, item.data) + var values []interface{} + dec := yaml.NewDecoder(strings.NewReader(item.data)) + for { + var value interface{} + err := dec.Decode(&value) + if err == io.EOF { + break + } + c.Assert(err, IsNil) + values = append(values, value) + } + c.Assert(values, DeepEquals, item.values) + } +} + +type errReader struct{} + +func (errReader) Read([]byte) (int, error) { + return 0, errors.New("some read error") +} + +func (s *S) TestDecoderReadError(c *C) { + err := yaml.NewDecoder(errReader{}).Decode(&struct{}{}) + c.Assert(err, ErrorMatches, `yaml: input error: some read error`) +} + +func (s *S) TestUnmarshalNaN(c *C) { + value := map[string]interface{}{} + err := yaml.Unmarshal([]byte("notanum: .NaN"), &value) + c.Assert(err, IsNil) + c.Assert(math.IsNaN(value["notanum"].(float64)), Equals, true) +} + +func (s *S) TestUnmarshalDurationInt(c *C) { + // Don't accept plain ints as durations as it's unclear (issue #200). + var d time.Duration + err := yaml.Unmarshal([]byte("123"), &d) + c.Assert(err, ErrorMatches, "(?s).* line 1: cannot unmarshal !!int `123` into time.Duration") +} + +var unmarshalErrorTests = []struct { + data, error string +}{ + {"v: !!float 'error'", "yaml: cannot decode !!str `error` as a !!float"}, + {"v: [A,", "yaml: line 1: did not find expected node content"}, + {"v:\n- [A,", "yaml: line 2: did not find expected node content"}, + {"a:\n- b: *,", "yaml: line 2: did not find expected alphabetic or numeric character"}, + {"a: *b\n", "yaml: unknown anchor 'b' referenced"}, + {"a: &a\n b: *a\n", "yaml: anchor 'a' value contains itself"}, + {"value: -", "yaml: block sequence entries are not allowed in this context"}, + {"a: !!binary ==", "yaml: !!binary value contains invalid base64 data"}, + {"{[.]}", `yaml: invalid map key: \[\]interface \{\}\{"\."\}`}, + {"{{.}}", `yaml: invalid map key: map\[string]interface \{\}\{".":interface \{\}\(nil\)\}`}, + {"b: *a\na: &a {c: 1}", `yaml: unknown anchor 'a' referenced`}, + {"%TAG !%79! tag:yaml.org,2002:\n---\nv: !%79!int '1'", "yaml: did not find expected whitespace"}, + {"a:\n 1:\nb\n 2:", ".*could not find expected ':'"}, + {"a: 1\nb: 2\nc 2\nd: 3\n", "^yaml: line 3: could not find expected ':'$"}, + {"#\n-\n{", "yaml: line 3: could not find expected ':'"}, // Issue #665 + {"0: [:!00 \xef", "yaml: incomplete UTF-8 octet sequence"}, // Issue #666 + { + "a: &a [00,00,00,00,00,00,00,00,00]\n" + + "b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]\n" + + "c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]\n" + + "d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]\n" + + "e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d]\n" + + "f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e]\n" + + "g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f]\n" + + "h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g]\n" + + "i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h]\n", + "yaml: document contains excessive aliasing", + }, +} + +func (s *S) TestUnmarshalErrors(c *C) { + for i, item := range unmarshalErrorTests { + c.Logf("test %d: %q", i, item.data) + var value interface{} + err := yaml.Unmarshal([]byte(item.data), &value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + } +} + +func (s *S) TestDecoderErrors(c *C) { + for _, item := range unmarshalErrorTests { + var value interface{} + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(&value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + } +} + +var unmarshalerTests = []struct { + data, tag string + value interface{} +}{ + {"_: {hi: there}", "!!map", map[string]interface{}{"hi": "there"}}, + {"_: [1,A]", "!!seq", []interface{}{1, "A"}}, + {"_: 10", "!!int", 10}, + {"_: null", "!!null", nil}, + {`_: BAR!`, "!!str", "BAR!"}, + {`_: "BAR!"`, "!!str", "BAR!"}, + {"_: !!foo 'BAR!'", "!!foo", "BAR!"}, + {`_: ""`, "!!str", ""}, +} + +var unmarshalerResult = map[int]error{} + +type unmarshalerType struct { + value interface{} +} + +func (o *unmarshalerType) UnmarshalYAML(value *yaml.Node) error { + if err := value.Decode(&o.value); err != nil { + return err + } + if i, ok := o.value.(int); ok { + if result, ok := unmarshalerResult[i]; ok { + return result + } + } + return nil +} + +type unmarshalerPointer struct { + Field *unmarshalerType "_" +} + +type unmarshalerValue struct { + Field unmarshalerType "_" +} + +type unmarshalerInlined struct { + Field *unmarshalerType "_" + Inlined unmarshalerType `yaml:",inline"` +} + +type unmarshalerInlinedTwice struct { + InlinedTwice unmarshalerInlined `yaml:",inline"` +} + +type obsoleteUnmarshalerType struct { + value interface{} +} + +func (o *obsoleteUnmarshalerType) UnmarshalYAML(unmarshal func(v interface{}) error) error { + if err := unmarshal(&o.value); err != nil { + return err + } + if i, ok := o.value.(int); ok { + if result, ok := unmarshalerResult[i]; ok { + return result + } + } + return nil +} + +type obsoleteUnmarshalerPointer struct { + Field *obsoleteUnmarshalerType "_" +} + +type obsoleteUnmarshalerValue struct { + Field obsoleteUnmarshalerType "_" +} + +func (s *S) TestUnmarshalerPointerField(c *C) { + for _, item := range unmarshalerTests { + obj := &unmarshalerPointer{} + err := yaml.Unmarshal([]byte(item.data), obj) + c.Assert(err, IsNil) + if item.value == nil { + c.Assert(obj.Field, IsNil) + } else { + c.Assert(obj.Field, NotNil, Commentf("Pointer not initialized (%#v)", item.value)) + c.Assert(obj.Field.value, DeepEquals, item.value) + } + } + for _, item := range unmarshalerTests { + obj := &obsoleteUnmarshalerPointer{} + err := yaml.Unmarshal([]byte(item.data), obj) + c.Assert(err, IsNil) + if item.value == nil { + c.Assert(obj.Field, IsNil) + } else { + c.Assert(obj.Field, NotNil, Commentf("Pointer not initialized (%#v)", item.value)) + c.Assert(obj.Field.value, DeepEquals, item.value) + } + } +} + +func (s *S) TestUnmarshalerValueField(c *C) { + for _, item := range unmarshalerTests { + obj := &obsoleteUnmarshalerValue{} + err := yaml.Unmarshal([]byte(item.data), obj) + c.Assert(err, IsNil) + c.Assert(obj.Field, NotNil, Commentf("Pointer not initialized (%#v)", item.value)) + c.Assert(obj.Field.value, DeepEquals, item.value) + } +} + +func (s *S) TestUnmarshalerInlinedField(c *C) { + obj := &unmarshalerInlined{} + err := yaml.Unmarshal([]byte("_: a\ninlined: b\n"), obj) + c.Assert(err, IsNil) + c.Assert(obj.Field, DeepEquals, &unmarshalerType{"a"}) + c.Assert(obj.Inlined, DeepEquals, unmarshalerType{map[string]interface{}{"_": "a", "inlined": "b"}}) + + twc := &unmarshalerInlinedTwice{} + err = yaml.Unmarshal([]byte("_: a\ninlined: b\n"), twc) + c.Assert(err, IsNil) + c.Assert(twc.InlinedTwice.Field, DeepEquals, &unmarshalerType{"a"}) + c.Assert(twc.InlinedTwice.Inlined, DeepEquals, unmarshalerType{map[string]interface{}{"_": "a", "inlined": "b"}}) +} + +func (s *S) TestUnmarshalerWholeDocument(c *C) { + obj := &obsoleteUnmarshalerType{} + err := yaml.Unmarshal([]byte(unmarshalerTests[0].data), obj) + c.Assert(err, IsNil) + value, ok := obj.value.(map[string]interface{}) + c.Assert(ok, Equals, true, Commentf("value: %#v", obj.value)) + c.Assert(value["_"], DeepEquals, unmarshalerTests[0].value) +} + +func (s *S) TestUnmarshalerTypeError(c *C) { + unmarshalerResult[2] = &yaml.TypeError{[]string{"foo"}} + unmarshalerResult[4] = &yaml.TypeError{[]string{"bar"}} + defer func() { + delete(unmarshalerResult, 2) + delete(unmarshalerResult, 4) + }() + + type T struct { + Before int + After int + M map[string]*unmarshalerType + } + var v T + data := `{before: A, m: {abc: 1, def: 2, ghi: 3, jkl: 4}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " foo\n"+ + " bar\n"+ + " line 1: cannot unmarshal !!str `B` into int") + c.Assert(v.M["abc"], NotNil) + c.Assert(v.M["def"], IsNil) + c.Assert(v.M["ghi"], NotNil) + c.Assert(v.M["jkl"], IsNil) + + c.Assert(v.M["abc"].value, Equals, 1) + c.Assert(v.M["ghi"].value, Equals, 3) +} + +func (s *S) TestObsoleteUnmarshalerTypeError(c *C) { + unmarshalerResult[2] = &yaml.TypeError{[]string{"foo"}} + unmarshalerResult[4] = &yaml.TypeError{[]string{"bar"}} + defer func() { + delete(unmarshalerResult, 2) + delete(unmarshalerResult, 4) + }() + + type T struct { + Before int + After int + M map[string]*obsoleteUnmarshalerType + } + var v T + data := `{before: A, m: {abc: 1, def: 2, ghi: 3, jkl: 4}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " foo\n"+ + " bar\n"+ + " line 1: cannot unmarshal !!str `B` into int") + c.Assert(v.M["abc"], NotNil) + c.Assert(v.M["def"], IsNil) + c.Assert(v.M["ghi"], NotNil) + c.Assert(v.M["jkl"], IsNil) + + c.Assert(v.M["abc"].value, Equals, 1) + c.Assert(v.M["ghi"].value, Equals, 3) +} + +type proxyTypeError struct{} + +func (v *proxyTypeError) UnmarshalYAML(node *yaml.Node) error { + var s string + var a int32 + var b int64 + if err := node.Decode(&s); err != nil { + panic(err) + } + if s == "a" { + if err := node.Decode(&b); err == nil { + panic("should have failed") + } + return node.Decode(&a) + } + if err := node.Decode(&a); err == nil { + panic("should have failed") + } + return node.Decode(&b) +} + +func (s *S) TestUnmarshalerTypeErrorProxying(c *C) { + type T struct { + Before int + After int + M map[string]*proxyTypeError + } + var v T + data := `{before: A, m: {abc: a, def: b}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " line 1: cannot unmarshal !!str `a` into int32\n"+ + " line 1: cannot unmarshal !!str `b` into int64\n"+ + " line 1: cannot unmarshal !!str `B` into int") +} + +type obsoleteProxyTypeError struct{} + +func (v *obsoleteProxyTypeError) UnmarshalYAML(unmarshal func(interface{}) error) error { + var s string + var a int32 + var b int64 + if err := unmarshal(&s); err != nil { + panic(err) + } + if s == "a" { + if err := unmarshal(&b); err == nil { + panic("should have failed") + } + return unmarshal(&a) + } + if err := unmarshal(&a); err == nil { + panic("should have failed") + } + return unmarshal(&b) +} + +func (s *S) TestObsoleteUnmarshalerTypeErrorProxying(c *C) { + type T struct { + Before int + After int + M map[string]*obsoleteProxyTypeError + } + var v T + data := `{before: A, m: {abc: a, def: b}, after: B}` + err := yaml.Unmarshal([]byte(data), &v) + c.Assert(err, ErrorMatches, ""+ + "yaml: unmarshal errors:\n"+ + " line 1: cannot unmarshal !!str `A` into int\n"+ + " line 1: cannot unmarshal !!str `a` into int32\n"+ + " line 1: cannot unmarshal !!str `b` into int64\n"+ + " line 1: cannot unmarshal !!str `B` into int") +} + +var failingErr = errors.New("failingErr") + +type failingUnmarshaler struct{} + +func (ft *failingUnmarshaler) UnmarshalYAML(node *yaml.Node) error { + return failingErr +} + +func (s *S) TestUnmarshalerError(c *C) { + err := yaml.Unmarshal([]byte("a: b"), &failingUnmarshaler{}) + c.Assert(err, Equals, failingErr) +} + +type obsoleteFailingUnmarshaler struct{} + +func (ft *obsoleteFailingUnmarshaler) UnmarshalYAML(unmarshal func(interface{}) error) error { + return failingErr +} + +func (s *S) TestObsoleteUnmarshalerError(c *C) { + err := yaml.Unmarshal([]byte("a: b"), &obsoleteFailingUnmarshaler{}) + c.Assert(err, Equals, failingErr) +} + +type sliceUnmarshaler []int + +func (su *sliceUnmarshaler) UnmarshalYAML(node *yaml.Node) error { + var slice []int + err := node.Decode(&slice) + if err == nil { + *su = slice + return nil + } + + var intVal int + err = node.Decode(&intVal) + if err == nil { + *su = []int{intVal} + return nil + } + + return err +} + +func (s *S) TestUnmarshalerRetry(c *C) { + var su sliceUnmarshaler + err := yaml.Unmarshal([]byte("[1, 2, 3]"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, sliceUnmarshaler([]int{1, 2, 3})) + + err = yaml.Unmarshal([]byte("1"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, sliceUnmarshaler([]int{1})) +} + +type obsoleteSliceUnmarshaler []int + +func (su *obsoleteSliceUnmarshaler) UnmarshalYAML(unmarshal func(interface{}) error) error { + var slice []int + err := unmarshal(&slice) + if err == nil { + *su = slice + return nil + } + + var intVal int + err = unmarshal(&intVal) + if err == nil { + *su = []int{intVal} + return nil + } + + return err +} + +func (s *S) TestObsoleteUnmarshalerRetry(c *C) { + var su obsoleteSliceUnmarshaler + err := yaml.Unmarshal([]byte("[1, 2, 3]"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, obsoleteSliceUnmarshaler([]int{1, 2, 3})) + + err = yaml.Unmarshal([]byte("1"), &su) + c.Assert(err, IsNil) + c.Assert(su, DeepEquals, obsoleteSliceUnmarshaler([]int{1})) +} + +// From http://yaml.org/type/merge.html +var mergeTests = ` +anchors: + list: + - &CENTER { "x": 1, "y": 2 } + - &LEFT { "x": 0, "y": 2 } + - &BIG { "r": 10 } + - &SMALL { "r": 1 } + +# All the following maps are equal: + +plain: + # Explicit keys + "x": 1 + "y": 2 + "r": 10 + label: center/big + +mergeOne: + # Merge one map + << : *CENTER + "r": 10 + label: center/big + +mergeMultiple: + # Merge multiple maps + << : [ *CENTER, *BIG ] + label: center/big + +override: + # Override + << : [ *BIG, *LEFT, *SMALL ] + "x": 1 + label: center/big + +shortTag: + # Explicit short merge tag + !!merge "<<" : [ *CENTER, *BIG ] + label: center/big + +longTag: + # Explicit merge long tag + ! "<<" : [ *CENTER, *BIG ] + label: center/big + +inlineMap: + # Inlined map + << : {"x": 1, "y": 2, "r": 10} + label: center/big + +inlineSequenceMap: + # Inlined map in sequence + << : [ *CENTER, {"r": 10} ] + label: center/big +` + +func (s *S) TestMerge(c *C) { + var want = map[string]interface{}{ + "x": 1, + "y": 2, + "r": 10, + "label": "center/big", + } + + wantStringMap := make(map[string]interface{}) + for k, v := range want { + wantStringMap[fmt.Sprintf("%v", k)] = v + } + + var m map[interface{}]interface{} + err := yaml.Unmarshal([]byte(mergeTests), &m) + c.Assert(err, IsNil) + for name, test := range m { + if name == "anchors" { + continue + } + if name == "plain" { + c.Assert(test, DeepEquals, wantStringMap, Commentf("test %q failed", name)) + continue + } + c.Assert(test, DeepEquals, want, Commentf("test %q failed", name)) + } +} + +func (s *S) TestMergeStruct(c *C) { + type Data struct { + X, Y, R int + Label string + } + want := Data{1, 2, 10, "center/big"} + + var m map[string]Data + err := yaml.Unmarshal([]byte(mergeTests), &m) + c.Assert(err, IsNil) + for name, test := range m { + if name == "anchors" { + continue + } + c.Assert(test, Equals, want, Commentf("test %q failed", name)) + } +} + +var mergeTestsNested = ` +mergeouter1: &mergeouter1 + d: 40 + e: 50 + +mergeouter2: &mergeouter2 + e: 5 + f: 6 + g: 70 + +mergeinner1: &mergeinner1 + <<: *mergeouter1 + inner: + a: 1 + b: 2 + +mergeinner2: &mergeinner2 + <<: *mergeouter2 + inner: + a: -1 + b: -2 + +outer: + <<: [*mergeinner1, *mergeinner2] + f: 60 + inner: + a: 10 +` + +func (s *S) TestMergeNestedStruct(c *C) { + // Issue #818: Merging used to just unmarshal twice on the target + // value, which worked for maps as these were replaced by the new map, + // but not on struct values as these are preserved. This resulted in + // the nested data from the merged map to be mixed up with the data + // from the map being merged into. + // + // This test also prevents two potential bugs from showing up: + // + // 1) A simple implementation might just zero out the nested value + // before unmarshaling the second time, but this would clobber previous + // data that is usually respected ({C: 30} below). + // + // 2) A simple implementation might attempt to handle the key skipping + // directly by iterating over the merging map without recursion, but + // there are more complex cases that require recursion. + // + // Quick summary of the fields: + // + // - A must come from outer and not overriden + // - B must not be set as its in the ignored merge + // - C should still be set as it's preset in the value + // - D should be set from the recursive merge + // - E should be set from the first recursive merge, ignored on the second + // - F should be set in the inlined map from outer, ignored later + // - G should be set in the inlined map from the second recursive merge + // + + type Inner struct { + A, B, C int + } + type Outer struct { + D, E int + Inner Inner + Inline map[string]int `yaml:",inline"` + } + type Data struct { + Outer Outer + } + + test := Data{Outer{0, 0, Inner{C: 30}, nil}} + want := Data{Outer{40, 50, Inner{A: 10, C: 30}, map[string]int{"f": 60, "g": 70}}} + + err := yaml.Unmarshal([]byte(mergeTestsNested), &test) + c.Assert(err, IsNil) + c.Assert(test, DeepEquals, want) + + // Repeat test with a map. + + var testm map[string]interface{} + var wantm = map[string]interface{}{ + "f": 60, + "inner": map[string]interface{}{ + "a": 10, + }, + "d": 40, + "e": 50, + "g": 70, + } + err = yaml.Unmarshal([]byte(mergeTestsNested), &testm) + c.Assert(err, IsNil) + c.Assert(testm["outer"], DeepEquals, wantm) +} + +var unmarshalNullTests = []struct { + input string + pristine, expected func() interface{} +}{{ + "null", + func() interface{} { var v interface{}; v = "v"; return &v }, + func() interface{} { var v interface{}; v = nil; return &v }, +}, { + "null", + func() interface{} { var s = "s"; return &s }, + func() interface{} { var s = "s"; return &s }, +}, { + "null", + func() interface{} { var s = "s"; sptr := &s; return &sptr }, + func() interface{} { var sptr *string; return &sptr }, +}, { + "null", + func() interface{} { var i = 1; return &i }, + func() interface{} { var i = 1; return &i }, +}, { + "null", + func() interface{} { var i = 1; iptr := &i; return &iptr }, + func() interface{} { var iptr *int; return &iptr }, +}, { + "null", + func() interface{} { var m = map[string]int{"s": 1}; return &m }, + func() interface{} { var m map[string]int; return &m }, +}, { + "null", + func() interface{} { var m = map[string]int{"s": 1}; return m }, + func() interface{} { var m = map[string]int{"s": 1}; return m }, +}, { + "s2: null\ns3: null", + func() interface{} { var m = map[string]int{"s1": 1, "s2": 2}; return m }, + func() interface{} { var m = map[string]int{"s1": 1, "s2": 2, "s3": 0}; return m }, +}, { + "s2: null\ns3: null", + func() interface{} { var m = map[string]interface{}{"s1": 1, "s2": 2}; return m }, + func() interface{} { var m = map[string]interface{}{"s1": 1, "s2": nil, "s3": nil}; return m }, +}} + +func (s *S) TestUnmarshalNull(c *C) { + for _, test := range unmarshalNullTests { + pristine := test.pristine() + expected := test.expected() + err := yaml.Unmarshal([]byte(test.input), pristine) + c.Assert(err, IsNil) + c.Assert(pristine, DeepEquals, expected) + } +} + +func (s *S) TestUnmarshalPreservesData(c *C) { + var v struct { + A, B int + C int `yaml:"-"` + } + v.A = 42 + v.C = 88 + err := yaml.Unmarshal([]byte("---"), &v) + c.Assert(err, IsNil) + c.Assert(v.A, Equals, 42) + c.Assert(v.B, Equals, 0) + c.Assert(v.C, Equals, 88) + + err = yaml.Unmarshal([]byte("b: 21\nc: 99"), &v) + c.Assert(err, IsNil) + c.Assert(v.A, Equals, 42) + c.Assert(v.B, Equals, 21) + c.Assert(v.C, Equals, 88) +} + +func (s *S) TestUnmarshalSliceOnPreset(c *C) { + // Issue #48. + v := struct{ A []int }{[]int{1}} + yaml.Unmarshal([]byte("a: [2]"), &v) + c.Assert(v.A, DeepEquals, []int{2}) +} + +var unmarshalStrictTests = []struct { + known bool + unique bool + data string + value interface{} + error string +}{{ + known: true, + data: "a: 1\nc: 2\n", + value: struct{ A, B int }{A: 1}, + error: `yaml: unmarshal errors:\n line 2: field c not found in type struct { A int; B int }`, +}, { + unique: true, + data: "a: 1\nb: 2\na: 3\n", + value: struct{ A, B int }{A: 3, B: 2}, + error: `yaml: unmarshal errors:\n line 3: mapping key "a" already defined at line 1`, +}, { + unique: true, + data: "c: 3\na: 1\nb: 2\nc: 4\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 4, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: mapping key "c" already defined at line 1`, +}, { + unique: true, + data: "c: 0\na: 1\nb: 2\nc: 1\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 1, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: mapping key "c" already defined at line 1`, +}, { + unique: true, + data: "c: 1\na: 1\nb: 2\nc: 3\n", + value: struct { + A int + M map[string]interface{} `yaml:",inline"` + }{ + A: 1, + M: map[string]interface{}{ + "b": 2, + "c": 3, + }, + }, + error: `yaml: unmarshal errors:\n line 4: mapping key "c" already defined at line 1`, +}, { + unique: true, + data: "a: 1\n9: 2\nnull: 3\n9: 4", + value: map[interface{}]interface{}{ + "a": 1, + nil: 3, + 9: 4, + }, + error: `yaml: unmarshal errors:\n line 4: mapping key "9" already defined at line 2`, +}} + +func (s *S) TestUnmarshalKnownFields(c *C) { + for i, item := range unmarshalStrictTests { + c.Logf("test %d: %q", i, item.data) + // First test that normal Unmarshal unmarshals to the expected value. + if !item.unique { + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + c.Assert(err, Equals, nil) + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + } + + // Then test that it fails on the same thing with KnownFields on. + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + dec := yaml.NewDecoder(bytes.NewBuffer([]byte(item.data))) + dec.KnownFields(item.known) + err := dec.Decode(value.Interface()) + c.Assert(err, ErrorMatches, item.error) + } +} + +type textUnmarshaler struct { + S string +} + +func (t *textUnmarshaler) UnmarshalText(s []byte) error { + t.S = string(s) + return nil +} + +func (s *S) TestFuzzCrashers(c *C) { + cases := []string{ + // runtime error: index out of range + "\"\\0\\\r\n", + + // should not happen + " 0: [\n] 0", + "? ? \"\n\" 0", + " - {\n000}0", + "0:\n 0: [0\n] 0", + " - \"\n000\"0", + " - \"\n000\"\"", + "0:\n - {\n000}0", + "0:\n - \"\n000\"0", + "0:\n - \"\n000\"\"", + + // runtime error: index out of range + " \ufeff\n", + "? \ufeff\n", + "? \ufeff:\n", + "0: \ufeff\n", + "? \ufeff: \ufeff\n", + } + for _, data := range cases { + var v interface{} + _ = yaml.Unmarshal([]byte(data), &v) + } +} + +//var data []byte +//func init() { +// var err error +// data, err = ioutil.ReadFile("/tmp/file.yaml") +// if err != nil { +// panic(err) +// } +//} +// +//func (s *S) BenchmarkUnmarshal(c *C) { +// var err error +// for i := 0; i < c.N; i++ { +// var v map[string]interface{} +// err = yaml.Unmarshal(data, &v) +// } +// if err != nil { +// panic(err) +// } +//} +// +//func (s *S) BenchmarkMarshal(c *C) { +// var v map[string]interface{} +// yaml.Unmarshal(data, &v) +// c.ResetTimer() +// for i := 0; i < c.N; i++ { +// yaml.Marshal(&v) +// } +//} diff --git a/goyaml.v3/emitterc.go b/goyaml.v3/emitterc.go new file mode 100644 index 0000000..0f47c9c --- /dev/null +++ b/goyaml.v3/emitterc.go @@ -0,0 +1,2020 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "bytes" + "fmt" +) + +// Flush the buffer if needed. +func flush(emitter *yaml_emitter_t) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) { + return yaml_emitter_flush(emitter) + } + return true +} + +// Put a character to the output buffer. +func put(emitter *yaml_emitter_t, value byte) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + emitter.buffer[emitter.buffer_pos] = value + emitter.buffer_pos++ + emitter.column++ + return true +} + +// Put a line break to the output buffer. +func put_break(emitter *yaml_emitter_t) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + switch emitter.line_break { + case yaml_CR_BREAK: + emitter.buffer[emitter.buffer_pos] = '\r' + emitter.buffer_pos += 1 + case yaml_LN_BREAK: + emitter.buffer[emitter.buffer_pos] = '\n' + emitter.buffer_pos += 1 + case yaml_CRLN_BREAK: + emitter.buffer[emitter.buffer_pos+0] = '\r' + emitter.buffer[emitter.buffer_pos+1] = '\n' + emitter.buffer_pos += 2 + default: + panic("unknown line break setting") + } + if emitter.column == 0 { + emitter.space_above = true + } + emitter.column = 0 + emitter.line++ + // [Go] Do this here and below and drop from everywhere else (see commented lines). + emitter.indention = true + return true +} + +// Copy a character from a string into buffer. +func write(emitter *yaml_emitter_t, s []byte, i *int) bool { + if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { + return false + } + p := emitter.buffer_pos + w := width(s[*i]) + switch w { + case 4: + emitter.buffer[p+3] = s[*i+3] + fallthrough + case 3: + emitter.buffer[p+2] = s[*i+2] + fallthrough + case 2: + emitter.buffer[p+1] = s[*i+1] + fallthrough + case 1: + emitter.buffer[p+0] = s[*i+0] + default: + panic("unknown character width") + } + emitter.column++ + emitter.buffer_pos += w + *i += w + return true +} + +// Write a whole string into buffer. +func write_all(emitter *yaml_emitter_t, s []byte) bool { + for i := 0; i < len(s); { + if !write(emitter, s, &i) { + return false + } + } + return true +} + +// Copy a line break character from a string into buffer. +func write_break(emitter *yaml_emitter_t, s []byte, i *int) bool { + if s[*i] == '\n' { + if !put_break(emitter) { + return false + } + *i++ + } else { + if !write(emitter, s, i) { + return false + } + if emitter.column == 0 { + emitter.space_above = true + } + emitter.column = 0 + emitter.line++ + // [Go] Do this here and above and drop from everywhere else (see commented lines). + emitter.indention = true + } + return true +} + +// Set an emitter error and return false. +func yaml_emitter_set_emitter_error(emitter *yaml_emitter_t, problem string) bool { + emitter.error = yaml_EMITTER_ERROR + emitter.problem = problem + return false +} + +// Emit an event. +func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool { + emitter.events = append(emitter.events, *event) + for !yaml_emitter_need_more_events(emitter) { + event := &emitter.events[emitter.events_head] + if !yaml_emitter_analyze_event(emitter, event) { + return false + } + if !yaml_emitter_state_machine(emitter, event) { + return false + } + yaml_event_delete(event) + emitter.events_head++ + } + return true +} + +// Check if we need to accumulate more events before emitting. +// +// We accumulate extra +// - 1 event for DOCUMENT-START +// - 2 events for SEQUENCE-START +// - 3 events for MAPPING-START +// +func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool { + if emitter.events_head == len(emitter.events) { + return true + } + var accumulate int + switch emitter.events[emitter.events_head].typ { + case yaml_DOCUMENT_START_EVENT: + accumulate = 1 + break + case yaml_SEQUENCE_START_EVENT: + accumulate = 2 + break + case yaml_MAPPING_START_EVENT: + accumulate = 3 + break + default: + return false + } + if len(emitter.events)-emitter.events_head > accumulate { + return false + } + var level int + for i := emitter.events_head; i < len(emitter.events); i++ { + switch emitter.events[i].typ { + case yaml_STREAM_START_EVENT, yaml_DOCUMENT_START_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT: + level++ + case yaml_STREAM_END_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_END_EVENT, yaml_MAPPING_END_EVENT: + level-- + } + if level == 0 { + return false + } + } + return true +} + +// Append a directive to the directives stack. +func yaml_emitter_append_tag_directive(emitter *yaml_emitter_t, value *yaml_tag_directive_t, allow_duplicates bool) bool { + for i := 0; i < len(emitter.tag_directives); i++ { + if bytes.Equal(value.handle, emitter.tag_directives[i].handle) { + if allow_duplicates { + return true + } + return yaml_emitter_set_emitter_error(emitter, "duplicate %TAG directive") + } + } + + // [Go] Do we actually need to copy this given garbage collection + // and the lack of deallocating destructors? + tag_copy := yaml_tag_directive_t{ + handle: make([]byte, len(value.handle)), + prefix: make([]byte, len(value.prefix)), + } + copy(tag_copy.handle, value.handle) + copy(tag_copy.prefix, value.prefix) + emitter.tag_directives = append(emitter.tag_directives, tag_copy) + return true +} + +// Increase the indentation level. +func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool) bool { + emitter.indents = append(emitter.indents, emitter.indent) + if emitter.indent < 0 { + if flow { + emitter.indent = emitter.best_indent + } else { + emitter.indent = 0 + } + } else if !indentless { + // [Go] This was changed so that indentations are more regular. + if emitter.states[len(emitter.states)-1] == yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE { + // The first indent inside a sequence will just skip the "- " indicator. + emitter.indent += 2 + } else { + // Everything else aligns to the chosen indentation. + emitter.indent = emitter.best_indent*((emitter.indent+emitter.best_indent)/emitter.best_indent) + } + } + return true +} + +// State dispatcher. +func yaml_emitter_state_machine(emitter *yaml_emitter_t, event *yaml_event_t) bool { + switch emitter.state { + default: + case yaml_EMIT_STREAM_START_STATE: + return yaml_emitter_emit_stream_start(emitter, event) + + case yaml_EMIT_FIRST_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, true) + + case yaml_EMIT_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, false) + + case yaml_EMIT_DOCUMENT_CONTENT_STATE: + return yaml_emitter_emit_document_content(emitter, event) + + case yaml_EMIT_DOCUMENT_END_STATE: + return yaml_emitter_emit_document_end(emitter, event) + + case yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, true, false) + + case yaml_EMIT_FLOW_SEQUENCE_TRAIL_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, false, true) + + case yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, false, false) + + case yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, true, false) + + case yaml_EMIT_FLOW_MAPPING_TRAIL_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, false, true) + + case yaml_EMIT_FLOW_MAPPING_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, false, false) + + case yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, true) + + case yaml_EMIT_FLOW_MAPPING_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, false) + + case yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, true) + + case yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, false) + + case yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, true) + + case yaml_EMIT_BLOCK_MAPPING_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, false) + + case yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, true) + + case yaml_EMIT_BLOCK_MAPPING_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, false) + + case yaml_EMIT_END_STATE: + return yaml_emitter_set_emitter_error(emitter, "expected nothing after STREAM-END") + } + panic("invalid emitter state") +} + +// Expect STREAM-START. +func yaml_emitter_emit_stream_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if event.typ != yaml_STREAM_START_EVENT { + return yaml_emitter_set_emitter_error(emitter, "expected STREAM-START") + } + if emitter.encoding == yaml_ANY_ENCODING { + emitter.encoding = event.encoding + if emitter.encoding == yaml_ANY_ENCODING { + emitter.encoding = yaml_UTF8_ENCODING + } + } + if emitter.best_indent < 2 || emitter.best_indent > 9 { + emitter.best_indent = 2 + } + if emitter.best_width >= 0 && emitter.best_width <= emitter.best_indent*2 { + emitter.best_width = 80 + } + if emitter.best_width < 0 { + emitter.best_width = 1<<31 - 1 + } + if emitter.line_break == yaml_ANY_BREAK { + emitter.line_break = yaml_LN_BREAK + } + + emitter.indent = -1 + emitter.line = 0 + emitter.column = 0 + emitter.whitespace = true + emitter.indention = true + emitter.space_above = true + emitter.foot_indent = -1 + + if emitter.encoding != yaml_UTF8_ENCODING { + if !yaml_emitter_write_bom(emitter) { + return false + } + } + emitter.state = yaml_EMIT_FIRST_DOCUMENT_START_STATE + return true +} + +// Expect DOCUMENT-START or STREAM-END. +func yaml_emitter_emit_document_start(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + + if event.typ == yaml_DOCUMENT_START_EVENT { + + if event.version_directive != nil { + if !yaml_emitter_analyze_version_directive(emitter, event.version_directive) { + return false + } + } + + for i := 0; i < len(event.tag_directives); i++ { + tag_directive := &event.tag_directives[i] + if !yaml_emitter_analyze_tag_directive(emitter, tag_directive) { + return false + } + if !yaml_emitter_append_tag_directive(emitter, tag_directive, false) { + return false + } + } + + for i := 0; i < len(default_tag_directives); i++ { + tag_directive := &default_tag_directives[i] + if !yaml_emitter_append_tag_directive(emitter, tag_directive, true) { + return false + } + } + + implicit := event.implicit + if !first || emitter.canonical { + implicit = false + } + + if emitter.open_ended && (event.version_directive != nil || len(event.tag_directives) > 0) { + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if event.version_directive != nil { + implicit = false + if !yaml_emitter_write_indicator(emitter, []byte("%YAML"), true, false, false) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte("1.1"), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if len(event.tag_directives) > 0 { + implicit = false + for i := 0; i < len(event.tag_directives); i++ { + tag_directive := &event.tag_directives[i] + if !yaml_emitter_write_indicator(emitter, []byte("%TAG"), true, false, false) { + return false + } + if !yaml_emitter_write_tag_handle(emitter, tag_directive.handle) { + return false + } + if !yaml_emitter_write_tag_content(emitter, tag_directive.prefix, true) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + } + + if yaml_emitter_check_empty_document(emitter) { + implicit = false + } + if !implicit { + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte("---"), true, false, false) { + return false + } + if emitter.canonical || true { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + } + + if len(emitter.head_comment) > 0 { + if !yaml_emitter_process_head_comment(emitter) { + return false + } + if !put_break(emitter) { + return false + } + } + + emitter.state = yaml_EMIT_DOCUMENT_CONTENT_STATE + return true + } + + if event.typ == yaml_STREAM_END_EVENT { + if emitter.open_ended { + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_flush(emitter) { + return false + } + emitter.state = yaml_EMIT_END_STATE + return true + } + + return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-START or STREAM-END") +} + +// Expect the root node. +func yaml_emitter_emit_document_content(emitter *yaml_emitter_t, event *yaml_event_t) bool { + emitter.states = append(emitter.states, yaml_EMIT_DOCUMENT_END_STATE) + + if !yaml_emitter_process_head_comment(emitter) { + return false + } + if !yaml_emitter_emit_node(emitter, event, true, false, false, false) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + return true +} + +// Expect DOCUMENT-END. +func yaml_emitter_emit_document_end(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if event.typ != yaml_DOCUMENT_END_EVENT { + return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-END") + } + // [Go] Force document foot separation. + emitter.foot_indent = 0 + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + emitter.foot_indent = -1 + if !yaml_emitter_write_indent(emitter) { + return false + } + if !event.implicit { + // [Go] Allocate the slice elsewhere. + if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_flush(emitter) { + return false + } + emitter.state = yaml_EMIT_DOCUMENT_START_STATE + emitter.tag_directives = emitter.tag_directives[:0] + return true +} + +// Expect a flow item node. +func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first, trail bool) bool { + if first { + if !yaml_emitter_write_indicator(emitter, []byte{'['}, true, true, false) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + emitter.flow_level++ + } + + if event.typ == yaml_SEQUENCE_END_EVENT { + if emitter.canonical && !first && !trail { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + emitter.flow_level-- + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + if emitter.column == 0 || emitter.canonical && !first { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{']'}, false, false, false) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + + return true + } + + if !first && !trail { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + + if !yaml_emitter_process_head_comment(emitter) { + return false + } + if emitter.column == 0 { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_TRAIL_ITEM_STATE) + } else { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE) + } + if !yaml_emitter_emit_node(emitter, event, false, true, false, false) { + return false + } + if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + return true +} + +// Expect a flow key node. +func yaml_emitter_emit_flow_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first, trail bool) bool { + if first { + if !yaml_emitter_write_indicator(emitter, []byte{'{'}, true, true, false) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + emitter.flow_level++ + } + + if event.typ == yaml_MAPPING_END_EVENT { + if (emitter.canonical || len(emitter.head_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0) && !first && !trail { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + if !yaml_emitter_process_head_comment(emitter) { + return false + } + emitter.flow_level-- + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + if emitter.canonical && !first { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'}'}, false, false, false) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + + if !first && !trail { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + + if !yaml_emitter_process_head_comment(emitter) { + return false + } + + if emitter.column == 0 { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + + if !emitter.canonical && yaml_emitter_check_simple_key(emitter) { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, true) + } + if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, false) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a flow value node. +func yaml_emitter_emit_flow_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { + if simple { + if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { + return false + } + } else { + if emitter.canonical || emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, false) { + return false + } + } + if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_TRAIL_KEY_STATE) + } else { + emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_KEY_STATE) + } + if !yaml_emitter_emit_node(emitter, event, false, false, true, false) { + return false + } + if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 { + if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { + return false + } + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + return true +} + +// Expect a block item node. +func yaml_emitter_emit_block_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_increase_indent(emitter, false, false) { + return false + } + } + if event.typ == yaml_SEQUENCE_END_EVENT { + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + if !yaml_emitter_process_head_comment(emitter) { + return false + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{'-'}, true, false, true) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE) + if !yaml_emitter_emit_node(emitter, event, false, true, false, false) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + return true +} + +// Expect a block key node. +func yaml_emitter_emit_block_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { + if first { + if !yaml_emitter_increase_indent(emitter, false, false) { + return false + } + } + if !yaml_emitter_process_head_comment(emitter) { + return false + } + if event.typ == yaml_MAPPING_END_EVENT { + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if len(emitter.line_comment) > 0 { + // [Go] A line comment was provided for the key. That's unusual as the + // scanner associates line comments with the value. Either way, + // save the line comment and render it appropriately later. + emitter.key_line_comment = emitter.line_comment + emitter.line_comment = nil + } + if yaml_emitter_check_simple_key(emitter) { + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, true) + } + if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, true) { + return false + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_VALUE_STATE) + return yaml_emitter_emit_node(emitter, event, false, false, true, false) +} + +// Expect a block value node. +func yaml_emitter_emit_block_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { + if simple { + if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { + return false + } + } else { + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, true) { + return false + } + } + if len(emitter.key_line_comment) > 0 { + // [Go] Line comments are generally associated with the value, but when there's + // no value on the same line as a mapping key they end up attached to the + // key itself. + if event.typ == yaml_SCALAR_EVENT { + if len(emitter.line_comment) == 0 { + // A scalar is coming and it has no line comments by itself yet, + // so just let it handle the line comment as usual. If it has a + // line comment, we can't have both so the one from the key is lost. + emitter.line_comment = emitter.key_line_comment + emitter.key_line_comment = nil + } + } else if event.sequence_style() != yaml_FLOW_SEQUENCE_STYLE && (event.typ == yaml_MAPPING_START_EVENT || event.typ == yaml_SEQUENCE_START_EVENT) { + // An indented block follows, so write the comment right now. + emitter.line_comment, emitter.key_line_comment = emitter.key_line_comment, emitter.line_comment + if !yaml_emitter_process_line_comment(emitter) { + return false + } + emitter.line_comment, emitter.key_line_comment = emitter.key_line_comment, emitter.line_comment + } + } + emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_KEY_STATE) + if !yaml_emitter_emit_node(emitter, event, false, false, true, false) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + if !yaml_emitter_process_foot_comment(emitter) { + return false + } + return true +} + +func yaml_emitter_silent_nil_event(emitter *yaml_emitter_t, event *yaml_event_t) bool { + return event.typ == yaml_SCALAR_EVENT && event.implicit && !emitter.canonical && len(emitter.scalar_data.value) == 0 +} + +// Expect a node. +func yaml_emitter_emit_node(emitter *yaml_emitter_t, event *yaml_event_t, + root bool, sequence bool, mapping bool, simple_key bool) bool { + + emitter.root_context = root + emitter.sequence_context = sequence + emitter.mapping_context = mapping + emitter.simple_key_context = simple_key + + switch event.typ { + case yaml_ALIAS_EVENT: + return yaml_emitter_emit_alias(emitter, event) + case yaml_SCALAR_EVENT: + return yaml_emitter_emit_scalar(emitter, event) + case yaml_SEQUENCE_START_EVENT: + return yaml_emitter_emit_sequence_start(emitter, event) + case yaml_MAPPING_START_EVENT: + return yaml_emitter_emit_mapping_start(emitter, event) + default: + return yaml_emitter_set_emitter_error(emitter, + fmt.Sprintf("expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS, but got %v", event.typ)) + } +} + +// Expect ALIAS. +func yaml_emitter_emit_alias(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true +} + +// Expect SCALAR. +func yaml_emitter_emit_scalar(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_select_scalar_style(emitter, event) { + return false + } + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if !yaml_emitter_increase_indent(emitter, true, false) { + return false + } + if !yaml_emitter_process_scalar(emitter) { + return false + } + emitter.indent = emitter.indents[len(emitter.indents)-1] + emitter.indents = emitter.indents[:len(emitter.indents)-1] + emitter.state = emitter.states[len(emitter.states)-1] + emitter.states = emitter.states[:len(emitter.states)-1] + return true +} + +// Expect SEQUENCE-START. +func yaml_emitter_emit_sequence_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if emitter.flow_level > 0 || emitter.canonical || event.sequence_style() == yaml_FLOW_SEQUENCE_STYLE || + yaml_emitter_check_empty_sequence(emitter) { + emitter.state = yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE + } else { + emitter.state = yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE + } + return true +} + +// Expect MAPPING-START. +func yaml_emitter_emit_mapping_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { + if !yaml_emitter_process_anchor(emitter) { + return false + } + if !yaml_emitter_process_tag(emitter) { + return false + } + if emitter.flow_level > 0 || emitter.canonical || event.mapping_style() == yaml_FLOW_MAPPING_STYLE || + yaml_emitter_check_empty_mapping(emitter) { + emitter.state = yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE + } else { + emitter.state = yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE + } + return true +} + +// Check if the document content is an empty scalar. +func yaml_emitter_check_empty_document(emitter *yaml_emitter_t) bool { + return false // [Go] Huh? +} + +// Check if the next events represent an empty sequence. +func yaml_emitter_check_empty_sequence(emitter *yaml_emitter_t) bool { + if len(emitter.events)-emitter.events_head < 2 { + return false + } + return emitter.events[emitter.events_head].typ == yaml_SEQUENCE_START_EVENT && + emitter.events[emitter.events_head+1].typ == yaml_SEQUENCE_END_EVENT +} + +// Check if the next events represent an empty mapping. +func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool { + if len(emitter.events)-emitter.events_head < 2 { + return false + } + return emitter.events[emitter.events_head].typ == yaml_MAPPING_START_EVENT && + emitter.events[emitter.events_head+1].typ == yaml_MAPPING_END_EVENT +} + +// Check if the next node can be expressed as a simple key. +func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool { + length := 0 + switch emitter.events[emitter.events_head].typ { + case yaml_ALIAS_EVENT: + length += len(emitter.anchor_data.anchor) + case yaml_SCALAR_EVENT: + if emitter.scalar_data.multiline { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + + len(emitter.scalar_data.value) + case yaml_SEQUENCE_START_EVENT: + if !yaml_emitter_check_empty_sequence(emitter) { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + case yaml_MAPPING_START_EVENT: + if !yaml_emitter_check_empty_mapping(emitter) { + return false + } + length += len(emitter.anchor_data.anchor) + + len(emitter.tag_data.handle) + + len(emitter.tag_data.suffix) + default: + return false + } + return length <= 128 +} + +// Determine an acceptable scalar style. +func yaml_emitter_select_scalar_style(emitter *yaml_emitter_t, event *yaml_event_t) bool { + + no_tag := len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 + if no_tag && !event.implicit && !event.quoted_implicit { + return yaml_emitter_set_emitter_error(emitter, "neither tag nor implicit flags are specified") + } + + style := event.scalar_style() + if style == yaml_ANY_SCALAR_STYLE { + style = yaml_PLAIN_SCALAR_STYLE + } + if emitter.canonical { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + if emitter.simple_key_context && emitter.scalar_data.multiline { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + + if style == yaml_PLAIN_SCALAR_STYLE { + if emitter.flow_level > 0 && !emitter.scalar_data.flow_plain_allowed || + emitter.flow_level == 0 && !emitter.scalar_data.block_plain_allowed { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + if len(emitter.scalar_data.value) == 0 && (emitter.flow_level > 0 || emitter.simple_key_context) { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + if no_tag && !event.implicit { + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } + } + if style == yaml_SINGLE_QUOTED_SCALAR_STYLE { + if !emitter.scalar_data.single_quoted_allowed { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + } + if style == yaml_LITERAL_SCALAR_STYLE || style == yaml_FOLDED_SCALAR_STYLE { + if !emitter.scalar_data.block_allowed || emitter.flow_level > 0 || emitter.simple_key_context { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + } + + if no_tag && !event.quoted_implicit && style != yaml_PLAIN_SCALAR_STYLE { + emitter.tag_data.handle = []byte{'!'} + } + emitter.scalar_data.style = style + return true +} + +// Write an anchor. +func yaml_emitter_process_anchor(emitter *yaml_emitter_t) bool { + if emitter.anchor_data.anchor == nil { + return true + } + c := []byte{'&'} + if emitter.anchor_data.alias { + c[0] = '*' + } + if !yaml_emitter_write_indicator(emitter, c, true, false, false) { + return false + } + return yaml_emitter_write_anchor(emitter, emitter.anchor_data.anchor) +} + +// Write a tag. +func yaml_emitter_process_tag(emitter *yaml_emitter_t) bool { + if len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 { + return true + } + if len(emitter.tag_data.handle) > 0 { + if !yaml_emitter_write_tag_handle(emitter, emitter.tag_data.handle) { + return false + } + if len(emitter.tag_data.suffix) > 0 { + if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { + return false + } + } + } else { + // [Go] Allocate these slices elsewhere. + if !yaml_emitter_write_indicator(emitter, []byte("!<"), true, false, false) { + return false + } + if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { + return false + } + if !yaml_emitter_write_indicator(emitter, []byte{'>'}, false, false, false) { + return false + } + } + return true +} + +// Write a scalar. +func yaml_emitter_process_scalar(emitter *yaml_emitter_t) bool { + switch emitter.scalar_data.style { + case yaml_PLAIN_SCALAR_STYLE: + return yaml_emitter_write_plain_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_SINGLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_single_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_DOUBLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_double_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) + + case yaml_LITERAL_SCALAR_STYLE: + return yaml_emitter_write_literal_scalar(emitter, emitter.scalar_data.value) + + case yaml_FOLDED_SCALAR_STYLE: + return yaml_emitter_write_folded_scalar(emitter, emitter.scalar_data.value) + } + panic("unknown scalar style") +} + +// Write a head comment. +func yaml_emitter_process_head_comment(emitter *yaml_emitter_t) bool { + if len(emitter.tail_comment) > 0 { + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_comment(emitter, emitter.tail_comment) { + return false + } + emitter.tail_comment = emitter.tail_comment[:0] + emitter.foot_indent = emitter.indent + if emitter.foot_indent < 0 { + emitter.foot_indent = 0 + } + } + + if len(emitter.head_comment) == 0 { + return true + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_comment(emitter, emitter.head_comment) { + return false + } + emitter.head_comment = emitter.head_comment[:0] + return true +} + +// Write an line comment. +func yaml_emitter_process_line_comment(emitter *yaml_emitter_t) bool { + if len(emitter.line_comment) == 0 { + return true + } + if !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + if !yaml_emitter_write_comment(emitter, emitter.line_comment) { + return false + } + emitter.line_comment = emitter.line_comment[:0] + return true +} + +// Write a foot comment. +func yaml_emitter_process_foot_comment(emitter *yaml_emitter_t) bool { + if len(emitter.foot_comment) == 0 { + return true + } + if !yaml_emitter_write_indent(emitter) { + return false + } + if !yaml_emitter_write_comment(emitter, emitter.foot_comment) { + return false + } + emitter.foot_comment = emitter.foot_comment[:0] + emitter.foot_indent = emitter.indent + if emitter.foot_indent < 0 { + emitter.foot_indent = 0 + } + return true +} + +// Check if a %YAML directive is valid. +func yaml_emitter_analyze_version_directive(emitter *yaml_emitter_t, version_directive *yaml_version_directive_t) bool { + if version_directive.major != 1 || version_directive.minor != 1 { + return yaml_emitter_set_emitter_error(emitter, "incompatible %YAML directive") + } + return true +} + +// Check if a %TAG directive is valid. +func yaml_emitter_analyze_tag_directive(emitter *yaml_emitter_t, tag_directive *yaml_tag_directive_t) bool { + handle := tag_directive.handle + prefix := tag_directive.prefix + if len(handle) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag handle must not be empty") + } + if handle[0] != '!' { + return yaml_emitter_set_emitter_error(emitter, "tag handle must start with '!'") + } + if handle[len(handle)-1] != '!' { + return yaml_emitter_set_emitter_error(emitter, "tag handle must end with '!'") + } + for i := 1; i < len(handle)-1; i += width(handle[i]) { + if !is_alpha(handle, i) { + return yaml_emitter_set_emitter_error(emitter, "tag handle must contain alphanumerical characters only") + } + } + if len(prefix) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag prefix must not be empty") + } + return true +} + +// Check if an anchor is valid. +func yaml_emitter_analyze_anchor(emitter *yaml_emitter_t, anchor []byte, alias bool) bool { + if len(anchor) == 0 { + problem := "anchor value must not be empty" + if alias { + problem = "alias value must not be empty" + } + return yaml_emitter_set_emitter_error(emitter, problem) + } + for i := 0; i < len(anchor); i += width(anchor[i]) { + if !is_alpha(anchor, i) { + problem := "anchor value must contain alphanumerical characters only" + if alias { + problem = "alias value must contain alphanumerical characters only" + } + return yaml_emitter_set_emitter_error(emitter, problem) + } + } + emitter.anchor_data.anchor = anchor + emitter.anchor_data.alias = alias + return true +} + +// Check if a tag is valid. +func yaml_emitter_analyze_tag(emitter *yaml_emitter_t, tag []byte) bool { + if len(tag) == 0 { + return yaml_emitter_set_emitter_error(emitter, "tag value must not be empty") + } + for i := 0; i < len(emitter.tag_directives); i++ { + tag_directive := &emitter.tag_directives[i] + if bytes.HasPrefix(tag, tag_directive.prefix) { + emitter.tag_data.handle = tag_directive.handle + emitter.tag_data.suffix = tag[len(tag_directive.prefix):] + return true + } + } + emitter.tag_data.suffix = tag + return true +} + +// Check if a scalar is valid. +func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { + var ( + block_indicators = false + flow_indicators = false + line_breaks = false + special_characters = false + tab_characters = false + + leading_space = false + leading_break = false + trailing_space = false + trailing_break = false + break_space = false + space_break = false + + preceded_by_whitespace = false + followed_by_whitespace = false + previous_space = false + previous_break = false + ) + + emitter.scalar_data.value = value + + if len(value) == 0 { + emitter.scalar_data.multiline = false + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = true + emitter.scalar_data.single_quoted_allowed = true + emitter.scalar_data.block_allowed = false + return true + } + + if len(value) >= 3 && ((value[0] == '-' && value[1] == '-' && value[2] == '-') || (value[0] == '.' && value[1] == '.' && value[2] == '.')) { + block_indicators = true + flow_indicators = true + } + + preceded_by_whitespace = true + for i, w := 0, 0; i < len(value); i += w { + w = width(value[i]) + followed_by_whitespace = i+w >= len(value) || is_blank(value, i+w) + + if i == 0 { + switch value[i] { + case '#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`': + flow_indicators = true + block_indicators = true + case '?', ':': + flow_indicators = true + if followed_by_whitespace { + block_indicators = true + } + case '-': + if followed_by_whitespace { + flow_indicators = true + block_indicators = true + } + } + } else { + switch value[i] { + case ',', '?', '[', ']', '{', '}': + flow_indicators = true + case ':': + flow_indicators = true + if followed_by_whitespace { + block_indicators = true + } + case '#': + if preceded_by_whitespace { + flow_indicators = true + block_indicators = true + } + } + } + + if value[i] == '\t' { + tab_characters = true + } else if !is_printable(value, i) || !is_ascii(value, i) && !emitter.unicode { + special_characters = true + } + if is_space(value, i) { + if i == 0 { + leading_space = true + } + if i+width(value[i]) == len(value) { + trailing_space = true + } + if previous_break { + break_space = true + } + previous_space = true + previous_break = false + } else if is_break(value, i) { + line_breaks = true + if i == 0 { + leading_break = true + } + if i+width(value[i]) == len(value) { + trailing_break = true + } + if previous_space { + space_break = true + } + previous_space = false + previous_break = true + } else { + previous_space = false + previous_break = false + } + + // [Go]: Why 'z'? Couldn't be the end of the string as that's the loop condition. + preceded_by_whitespace = is_blankz(value, i) + } + + emitter.scalar_data.multiline = line_breaks + emitter.scalar_data.flow_plain_allowed = true + emitter.scalar_data.block_plain_allowed = true + emitter.scalar_data.single_quoted_allowed = true + emitter.scalar_data.block_allowed = true + + if leading_space || leading_break || trailing_space || trailing_break { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + } + if trailing_space { + emitter.scalar_data.block_allowed = false + } + if break_space { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + emitter.scalar_data.single_quoted_allowed = false + } + if space_break || tab_characters || special_characters { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + emitter.scalar_data.single_quoted_allowed = false + } + if space_break || special_characters { + emitter.scalar_data.block_allowed = false + } + if line_breaks { + emitter.scalar_data.flow_plain_allowed = false + emitter.scalar_data.block_plain_allowed = false + } + if flow_indicators { + emitter.scalar_data.flow_plain_allowed = false + } + if block_indicators { + emitter.scalar_data.block_plain_allowed = false + } + return true +} + +// Check if the event data is valid. +func yaml_emitter_analyze_event(emitter *yaml_emitter_t, event *yaml_event_t) bool { + + emitter.anchor_data.anchor = nil + emitter.tag_data.handle = nil + emitter.tag_data.suffix = nil + emitter.scalar_data.value = nil + + if len(event.head_comment) > 0 { + emitter.head_comment = event.head_comment + } + if len(event.line_comment) > 0 { + emitter.line_comment = event.line_comment + } + if len(event.foot_comment) > 0 { + emitter.foot_comment = event.foot_comment + } + if len(event.tail_comment) > 0 { + emitter.tail_comment = event.tail_comment + } + + switch event.typ { + case yaml_ALIAS_EVENT: + if !yaml_emitter_analyze_anchor(emitter, event.anchor, true) { + return false + } + + case yaml_SCALAR_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || (!event.implicit && !event.quoted_implicit)) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + if !yaml_emitter_analyze_scalar(emitter, event.value) { + return false + } + + case yaml_SEQUENCE_START_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + + case yaml_MAPPING_START_EVENT: + if len(event.anchor) > 0 { + if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { + return false + } + } + if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { + if !yaml_emitter_analyze_tag(emitter, event.tag) { + return false + } + } + } + return true +} + +// Write the BOM character. +func yaml_emitter_write_bom(emitter *yaml_emitter_t) bool { + if !flush(emitter) { + return false + } + pos := emitter.buffer_pos + emitter.buffer[pos+0] = '\xEF' + emitter.buffer[pos+1] = '\xBB' + emitter.buffer[pos+2] = '\xBF' + emitter.buffer_pos += 3 + return true +} + +func yaml_emitter_write_indent(emitter *yaml_emitter_t) bool { + indent := emitter.indent + if indent < 0 { + indent = 0 + } + if !emitter.indention || emitter.column > indent || (emitter.column == indent && !emitter.whitespace) { + if !put_break(emitter) { + return false + } + } + if emitter.foot_indent == indent { + if !put_break(emitter) { + return false + } + } + for emitter.column < indent { + if !put(emitter, ' ') { + return false + } + } + emitter.whitespace = true + //emitter.indention = true + emitter.space_above = false + emitter.foot_indent = -1 + return true +} + +func yaml_emitter_write_indicator(emitter *yaml_emitter_t, indicator []byte, need_whitespace, is_whitespace, is_indention bool) bool { + if need_whitespace && !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + if !write_all(emitter, indicator) { + return false + } + emitter.whitespace = is_whitespace + emitter.indention = (emitter.indention && is_indention) + emitter.open_ended = false + return true +} + +func yaml_emitter_write_anchor(emitter *yaml_emitter_t, value []byte) bool { + if !write_all(emitter, value) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_tag_handle(emitter *yaml_emitter_t, value []byte) bool { + if !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + if !write_all(emitter, value) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_tag_content(emitter *yaml_emitter_t, value []byte, need_whitespace bool) bool { + if need_whitespace && !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + for i := 0; i < len(value); { + var must_write bool + switch value[i] { + case ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')', '[', ']': + must_write = true + default: + must_write = is_alpha(value, i) + } + if must_write { + if !write(emitter, value, &i) { + return false + } + } else { + w := width(value[i]) + for k := 0; k < w; k++ { + octet := value[i] + i++ + if !put(emitter, '%') { + return false + } + + c := octet >> 4 + if c < 10 { + c += '0' + } else { + c += 'A' - 10 + } + if !put(emitter, c) { + return false + } + + c = octet & 0x0f + if c < 10 { + c += '0' + } else { + c += 'A' - 10 + } + if !put(emitter, c) { + return false + } + } + } + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_plain_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + if len(value) > 0 && !emitter.whitespace { + if !put(emitter, ' ') { + return false + } + } + + spaces := false + breaks := false + for i := 0; i < len(value); { + if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && !is_space(value, i+1) { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + spaces = true + } else if is_break(value, i) { + if !breaks && value[i] == '\n' { + if !put_break(emitter) { + return false + } + } + if !write_break(emitter, value, &i) { + return false + } + //emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + spaces = false + breaks = false + } + } + + if len(value) > 0 { + emitter.whitespace = false + } + emitter.indention = false + if emitter.root_context { + emitter.open_ended = true + } + + return true +} + +func yaml_emitter_write_single_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + + if !yaml_emitter_write_indicator(emitter, []byte{'\''}, true, false, false) { + return false + } + + spaces := false + breaks := false + for i := 0; i < len(value); { + if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 && !is_space(value, i+1) { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + spaces = true + } else if is_break(value, i) { + if !breaks && value[i] == '\n' { + if !put_break(emitter) { + return false + } + } + if !write_break(emitter, value, &i) { + return false + } + //emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if value[i] == '\'' { + if !put(emitter, '\'') { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + spaces = false + breaks = false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'\''}, false, false, false) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_double_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { + spaces := false + if !yaml_emitter_write_indicator(emitter, []byte{'"'}, true, false, false) { + return false + } + + for i := 0; i < len(value); { + if !is_printable(value, i) || (!emitter.unicode && !is_ascii(value, i)) || + is_bom(value, i) || is_break(value, i) || + value[i] == '"' || value[i] == '\\' { + + octet := value[i] + + var w int + var v rune + switch { + case octet&0x80 == 0x00: + w, v = 1, rune(octet&0x7F) + case octet&0xE0 == 0xC0: + w, v = 2, rune(octet&0x1F) + case octet&0xF0 == 0xE0: + w, v = 3, rune(octet&0x0F) + case octet&0xF8 == 0xF0: + w, v = 4, rune(octet&0x07) + } + for k := 1; k < w; k++ { + octet = value[i+k] + v = (v << 6) + (rune(octet) & 0x3F) + } + i += w + + if !put(emitter, '\\') { + return false + } + + var ok bool + switch v { + case 0x00: + ok = put(emitter, '0') + case 0x07: + ok = put(emitter, 'a') + case 0x08: + ok = put(emitter, 'b') + case 0x09: + ok = put(emitter, 't') + case 0x0A: + ok = put(emitter, 'n') + case 0x0b: + ok = put(emitter, 'v') + case 0x0c: + ok = put(emitter, 'f') + case 0x0d: + ok = put(emitter, 'r') + case 0x1b: + ok = put(emitter, 'e') + case 0x22: + ok = put(emitter, '"') + case 0x5c: + ok = put(emitter, '\\') + case 0x85: + ok = put(emitter, 'N') + case 0xA0: + ok = put(emitter, '_') + case 0x2028: + ok = put(emitter, 'L') + case 0x2029: + ok = put(emitter, 'P') + default: + if v <= 0xFF { + ok = put(emitter, 'x') + w = 2 + } else if v <= 0xFFFF { + ok = put(emitter, 'u') + w = 4 + } else { + ok = put(emitter, 'U') + w = 8 + } + for k := (w - 1) * 4; ok && k >= 0; k -= 4 { + digit := byte((v >> uint(k)) & 0x0F) + if digit < 10 { + ok = put(emitter, digit+'0') + } else { + ok = put(emitter, digit+'A'-10) + } + } + } + if !ok { + return false + } + spaces = false + } else if is_space(value, i) { + if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 { + if !yaml_emitter_write_indent(emitter) { + return false + } + if is_space(value, i+1) { + if !put(emitter, '\\') { + return false + } + } + i += width(value[i]) + } else if !write(emitter, value, &i) { + return false + } + spaces = true + } else { + if !write(emitter, value, &i) { + return false + } + spaces = false + } + } + if !yaml_emitter_write_indicator(emitter, []byte{'"'}, false, false, false) { + return false + } + emitter.whitespace = false + emitter.indention = false + return true +} + +func yaml_emitter_write_block_scalar_hints(emitter *yaml_emitter_t, value []byte) bool { + if is_space(value, 0) || is_break(value, 0) { + indent_hint := []byte{'0' + byte(emitter.best_indent)} + if !yaml_emitter_write_indicator(emitter, indent_hint, false, false, false) { + return false + } + } + + emitter.open_ended = false + + var chomp_hint [1]byte + if len(value) == 0 { + chomp_hint[0] = '-' + } else { + i := len(value) - 1 + for value[i]&0xC0 == 0x80 { + i-- + } + if !is_break(value, i) { + chomp_hint[0] = '-' + } else if i == 0 { + chomp_hint[0] = '+' + emitter.open_ended = true + } else { + i-- + for value[i]&0xC0 == 0x80 { + i-- + } + if is_break(value, i) { + chomp_hint[0] = '+' + emitter.open_ended = true + } + } + } + if chomp_hint[0] != 0 { + if !yaml_emitter_write_indicator(emitter, chomp_hint[:], false, false, false) { + return false + } + } + return true +} + +func yaml_emitter_write_literal_scalar(emitter *yaml_emitter_t, value []byte) bool { + if !yaml_emitter_write_indicator(emitter, []byte{'|'}, true, false, false) { + return false + } + if !yaml_emitter_write_block_scalar_hints(emitter, value) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + //emitter.indention = true + emitter.whitespace = true + breaks := true + for i := 0; i < len(value); { + if is_break(value, i) { + if !write_break(emitter, value, &i) { + return false + } + //emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + } + if !write(emitter, value, &i) { + return false + } + emitter.indention = false + breaks = false + } + } + + return true +} + +func yaml_emitter_write_folded_scalar(emitter *yaml_emitter_t, value []byte) bool { + if !yaml_emitter_write_indicator(emitter, []byte{'>'}, true, false, false) { + return false + } + if !yaml_emitter_write_block_scalar_hints(emitter, value) { + return false + } + if !yaml_emitter_process_line_comment(emitter) { + return false + } + + //emitter.indention = true + emitter.whitespace = true + + breaks := true + leading_spaces := true + for i := 0; i < len(value); { + if is_break(value, i) { + if !breaks && !leading_spaces && value[i] == '\n' { + k := 0 + for is_break(value, k) { + k += width(value[k]) + } + if !is_blankz(value, k) { + if !put_break(emitter) { + return false + } + } + } + if !write_break(emitter, value, &i) { + return false + } + //emitter.indention = true + breaks = true + } else { + if breaks { + if !yaml_emitter_write_indent(emitter) { + return false + } + leading_spaces = is_blank(value, i) + } + if !breaks && is_space(value, i) && !is_space(value, i+1) && emitter.column > emitter.best_width { + if !yaml_emitter_write_indent(emitter) { + return false + } + i += width(value[i]) + } else { + if !write(emitter, value, &i) { + return false + } + } + emitter.indention = false + breaks = false + } + } + return true +} + +func yaml_emitter_write_comment(emitter *yaml_emitter_t, comment []byte) bool { + breaks := false + pound := false + for i := 0; i < len(comment); { + if is_break(comment, i) { + if !write_break(emitter, comment, &i) { + return false + } + //emitter.indention = true + breaks = true + pound = false + } else { + if breaks && !yaml_emitter_write_indent(emitter) { + return false + } + if !pound { + if comment[i] != '#' && (!put(emitter, '#') || !put(emitter, ' ')) { + return false + } + pound = true + } + if !write(emitter, comment, &i) { + return false + } + emitter.indention = false + breaks = false + } + } + if !breaks && !put_break(emitter) { + return false + } + + emitter.whitespace = true + //emitter.indention = true + return true +} diff --git a/goyaml.v3/encode.go b/goyaml.v3/encode.go new file mode 100644 index 0000000..de9e72a --- /dev/null +++ b/goyaml.v3/encode.go @@ -0,0 +1,577 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml + +import ( + "encoding" + "fmt" + "io" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + "time" + "unicode/utf8" +) + +type encoder struct { + emitter yaml_emitter_t + event yaml_event_t + out []byte + flow bool + indent int + doneInit bool +} + +func newEncoder() *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) + yaml_emitter_set_output_string(&e.emitter, &e.out) + yaml_emitter_set_unicode(&e.emitter, true) + return e +} + +func newEncoderWithWriter(w io.Writer) *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) + yaml_emitter_set_output_writer(&e.emitter, w) + yaml_emitter_set_unicode(&e.emitter, true) + return e +} + +func (e *encoder) init() { + if e.doneInit { + return + } + if e.indent == 0 { + e.indent = 4 + } + e.emitter.best_indent = e.indent + yaml_stream_start_event_initialize(&e.event, yaml_UTF8_ENCODING) + e.emit() + e.doneInit = true +} + +func (e *encoder) finish() { + e.emitter.open_ended = false + yaml_stream_end_event_initialize(&e.event) + e.emit() +} + +func (e *encoder) destroy() { + yaml_emitter_delete(&e.emitter) +} + +func (e *encoder) emit() { + // This will internally delete the e.event value. + e.must(yaml_emitter_emit(&e.emitter, &e.event)) +} + +func (e *encoder) must(ok bool) { + if !ok { + msg := e.emitter.problem + if msg == "" { + msg = "unknown problem generating YAML content" + } + failf("%s", msg) + } +} + +func (e *encoder) marshalDoc(tag string, in reflect.Value) { + e.init() + var node *Node + if in.IsValid() { + node, _ = in.Interface().(*Node) + } + if node != nil && node.Kind == DocumentNode { + e.nodev(in) + } else { + yaml_document_start_event_initialize(&e.event, nil, nil, true) + e.emit() + e.marshal(tag, in) + yaml_document_end_event_initialize(&e.event, true) + e.emit() + } +} + +func (e *encoder) marshal(tag string, in reflect.Value) { + tag = shortTag(tag) + if !in.IsValid() || in.Kind() == reflect.Ptr && in.IsNil() { + e.nilv() + return + } + iface := in.Interface() + switch value := iface.(type) { + case *Node: + e.nodev(in) + return + case Node: + if !in.CanAddr() { + var n = reflect.New(in.Type()).Elem() + n.Set(in) + in = n + } + e.nodev(in.Addr()) + return + case time.Time: + e.timev(tag, in) + return + case *time.Time: + e.timev(tag, in.Elem()) + return + case time.Duration: + e.stringv(tag, reflect.ValueOf(value.String())) + return + case Marshaler: + v, err := value.MarshalYAML() + if err != nil { + fail(err) + } + if v == nil { + e.nilv() + return + } + e.marshal(tag, reflect.ValueOf(v)) + return + case encoding.TextMarshaler: + text, err := value.MarshalText() + if err != nil { + fail(err) + } + in = reflect.ValueOf(string(text)) + case nil: + e.nilv() + return + } + switch in.Kind() { + case reflect.Interface: + e.marshal(tag, in.Elem()) + case reflect.Map: + e.mapv(tag, in) + case reflect.Ptr: + e.marshal(tag, in.Elem()) + case reflect.Struct: + e.structv(tag, in) + case reflect.Slice, reflect.Array: + e.slicev(tag, in) + case reflect.String: + e.stringv(tag, in) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + e.intv(tag, in) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + e.uintv(tag, in) + case reflect.Float32, reflect.Float64: + e.floatv(tag, in) + case reflect.Bool: + e.boolv(tag, in) + default: + panic("cannot marshal type: " + in.Type().String()) + } +} + +func (e *encoder) mapv(tag string, in reflect.Value) { + e.mappingv(tag, func() { + keys := keyList(in.MapKeys()) + sort.Sort(keys) + for _, k := range keys { + e.marshal("", k) + e.marshal("", in.MapIndex(k)) + } + }) +} + +func (e *encoder) fieldByIndex(v reflect.Value, index []int) (field reflect.Value) { + for _, num := range index { + for { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return reflect.Value{} + } + v = v.Elem() + continue + } + break + } + v = v.Field(num) + } + return v +} + +func (e *encoder) structv(tag string, in reflect.Value) { + sinfo, err := getStructInfo(in.Type()) + if err != nil { + panic(err) + } + e.mappingv(tag, func() { + for _, info := range sinfo.FieldsList { + var value reflect.Value + if info.Inline == nil { + value = in.Field(info.Num) + } else { + value = e.fieldByIndex(in, info.Inline) + if !value.IsValid() { + continue + } + } + if info.OmitEmpty && isZero(value) { + continue + } + e.marshal("", reflect.ValueOf(info.Key)) + e.flow = info.Flow + e.marshal("", value) + } + if sinfo.InlineMap >= 0 { + m := in.Field(sinfo.InlineMap) + if m.Len() > 0 { + e.flow = false + keys := keyList(m.MapKeys()) + sort.Sort(keys) + for _, k := range keys { + if _, found := sinfo.FieldsMap[k.String()]; found { + panic(fmt.Sprintf("cannot have key %q in inlined map: conflicts with struct field", k.String())) + } + e.marshal("", k) + e.flow = false + e.marshal("", m.MapIndex(k)) + } + } + } + }) +} + +func (e *encoder) mappingv(tag string, f func()) { + implicit := tag == "" + style := yaml_BLOCK_MAPPING_STYLE + if e.flow { + e.flow = false + style = yaml_FLOW_MAPPING_STYLE + } + yaml_mapping_start_event_initialize(&e.event, nil, []byte(tag), implicit, style) + e.emit() + f() + yaml_mapping_end_event_initialize(&e.event) + e.emit() +} + +func (e *encoder) slicev(tag string, in reflect.Value) { + implicit := tag == "" + style := yaml_BLOCK_SEQUENCE_STYLE + if e.flow { + e.flow = false + style = yaml_FLOW_SEQUENCE_STYLE + } + e.must(yaml_sequence_start_event_initialize(&e.event, nil, []byte(tag), implicit, style)) + e.emit() + n := in.Len() + for i := 0; i < n; i++ { + e.marshal("", in.Index(i)) + } + e.must(yaml_sequence_end_event_initialize(&e.event)) + e.emit() +} + +// isBase60 returns whether s is in base 60 notation as defined in YAML 1.1. +// +// The base 60 float notation in YAML 1.1 is a terrible idea and is unsupported +// in YAML 1.2 and by this package, but these should be marshalled quoted for +// the time being for compatibility with other parsers. +func isBase60Float(s string) (result bool) { + // Fast path. + if s == "" { + return false + } + c := s[0] + if !(c == '+' || c == '-' || c >= '0' && c <= '9') || strings.IndexByte(s, ':') < 0 { + return false + } + // Do the full match. + return base60float.MatchString(s) +} + +// From http://yaml.org/type/float.html, except the regular expression there +// is bogus. In practice parsers do not enforce the "\.[0-9_]*" suffix. +var base60float = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0-9_]*)?$`) + +// isOldBool returns whether s is bool notation as defined in YAML 1.1. +// +// We continue to force strings that YAML 1.1 would interpret as booleans to be +// rendered as quotes strings so that the marshalled output valid for YAML 1.1 +// parsing. +func isOldBool(s string) (result bool) { + switch s { + case "y", "Y", "yes", "Yes", "YES", "on", "On", "ON", + "n", "N", "no", "No", "NO", "off", "Off", "OFF": + return true + default: + return false + } +} + +func (e *encoder) stringv(tag string, in reflect.Value) { + var style yaml_scalar_style_t + s := in.String() + canUsePlain := true + switch { + case !utf8.ValidString(s): + if tag == binaryTag { + failf("explicitly tagged !!binary data must be base64-encoded") + } + if tag != "" { + failf("cannot marshal invalid UTF-8 data as %s", shortTag(tag)) + } + // It can't be encoded directly as YAML so use a binary tag + // and encode it as base64. + tag = binaryTag + s = encodeBase64(s) + case tag == "": + // Check to see if it would resolve to a specific + // tag when encoded unquoted. If it doesn't, + // there's no need to quote it. + rtag, _ := resolve("", s) + canUsePlain = rtag == strTag && !(isBase60Float(s) || isOldBool(s)) + } + // Note: it's possible for user code to emit invalid YAML + // if they explicitly specify a tag and a string containing + // text that's incompatible with that tag. + switch { + case strings.Contains(s, "\n"): + if e.flow { + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } else { + style = yaml_LITERAL_SCALAR_STYLE + } + case canUsePlain: + style = yaml_PLAIN_SCALAR_STYLE + default: + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + e.emitScalar(s, "", tag, style, nil, nil, nil, nil) +} + +func (e *encoder) boolv(tag string, in reflect.Value) { + var s string + if in.Bool() { + s = "true" + } else { + s = "false" + } + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) intv(tag string, in reflect.Value) { + s := strconv.FormatInt(in.Int(), 10) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) uintv(tag string, in reflect.Value) { + s := strconv.FormatUint(in.Uint(), 10) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) timev(tag string, in reflect.Value) { + t := in.Interface().(time.Time) + s := t.Format(time.RFC3339Nano) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) floatv(tag string, in reflect.Value) { + // Issue #352: When formatting, use the precision of the underlying value + precision := 64 + if in.Kind() == reflect.Float32 { + precision = 32 + } + + s := strconv.FormatFloat(in.Float(), 'g', -1, precision) + switch s { + case "+Inf": + s = ".inf" + case "-Inf": + s = "-.inf" + case "NaN": + s = ".nan" + } + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) nilv() { + e.emitScalar("null", "", "", yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil) +} + +func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t, head, line, foot, tail []byte) { + // TODO Kill this function. Replace all initialize calls by their underlining Go literals. + implicit := tag == "" + if !implicit { + tag = longTag(tag) + } + e.must(yaml_scalar_event_initialize(&e.event, []byte(anchor), []byte(tag), []byte(value), implicit, implicit, style)) + e.event.head_comment = head + e.event.line_comment = line + e.event.foot_comment = foot + e.event.tail_comment = tail + e.emit() +} + +func (e *encoder) nodev(in reflect.Value) { + e.node(in.Interface().(*Node), "") +} + +func (e *encoder) node(node *Node, tail string) { + // Zero nodes behave as nil. + if node.Kind == 0 && node.IsZero() { + e.nilv() + return + } + + // If the tag was not explicitly requested, and dropping it won't change the + // implicit tag of the value, don't include it in the presentation. + var tag = node.Tag + var stag = shortTag(tag) + var forceQuoting bool + if tag != "" && node.Style&TaggedStyle == 0 { + if node.Kind == ScalarNode { + if stag == strTag && node.Style&(SingleQuotedStyle|DoubleQuotedStyle|LiteralStyle|FoldedStyle) != 0 { + tag = "" + } else { + rtag, _ := resolve("", node.Value) + if rtag == stag { + tag = "" + } else if stag == strTag { + tag = "" + forceQuoting = true + } + } + } else { + var rtag string + switch node.Kind { + case MappingNode: + rtag = mapTag + case SequenceNode: + rtag = seqTag + } + if rtag == stag { + tag = "" + } + } + } + + switch node.Kind { + case DocumentNode: + yaml_document_start_event_initialize(&e.event, nil, nil, true) + e.event.head_comment = []byte(node.HeadComment) + e.emit() + for _, node := range node.Content { + e.node(node, "") + } + yaml_document_end_event_initialize(&e.event, true) + e.event.foot_comment = []byte(node.FootComment) + e.emit() + + case SequenceNode: + style := yaml_BLOCK_SEQUENCE_STYLE + if node.Style&FlowStyle != 0 { + style = yaml_FLOW_SEQUENCE_STYLE + } + e.must(yaml_sequence_start_event_initialize(&e.event, []byte(node.Anchor), []byte(longTag(tag)), tag == "", style)) + e.event.head_comment = []byte(node.HeadComment) + e.emit() + for _, node := range node.Content { + e.node(node, "") + } + e.must(yaml_sequence_end_event_initialize(&e.event)) + e.event.line_comment = []byte(node.LineComment) + e.event.foot_comment = []byte(node.FootComment) + e.emit() + + case MappingNode: + style := yaml_BLOCK_MAPPING_STYLE + if node.Style&FlowStyle != 0 { + style = yaml_FLOW_MAPPING_STYLE + } + yaml_mapping_start_event_initialize(&e.event, []byte(node.Anchor), []byte(longTag(tag)), tag == "", style) + e.event.tail_comment = []byte(tail) + e.event.head_comment = []byte(node.HeadComment) + e.emit() + + // The tail logic below moves the foot comment of prior keys to the following key, + // since the value for each key may be a nested structure and the foot needs to be + // processed only the entirety of the value is streamed. The last tail is processed + // with the mapping end event. + var tail string + for i := 0; i+1 < len(node.Content); i += 2 { + k := node.Content[i] + foot := k.FootComment + if foot != "" { + kopy := *k + kopy.FootComment = "" + k = &kopy + } + e.node(k, tail) + tail = foot + + v := node.Content[i+1] + e.node(v, "") + } + + yaml_mapping_end_event_initialize(&e.event) + e.event.tail_comment = []byte(tail) + e.event.line_comment = []byte(node.LineComment) + e.event.foot_comment = []byte(node.FootComment) + e.emit() + + case AliasNode: + yaml_alias_event_initialize(&e.event, []byte(node.Value)) + e.event.head_comment = []byte(node.HeadComment) + e.event.line_comment = []byte(node.LineComment) + e.event.foot_comment = []byte(node.FootComment) + e.emit() + + case ScalarNode: + value := node.Value + if !utf8.ValidString(value) { + if stag == binaryTag { + failf("explicitly tagged !!binary data must be base64-encoded") + } + if stag != "" { + failf("cannot marshal invalid UTF-8 data as %s", stag) + } + // It can't be encoded directly as YAML so use a binary tag + // and encode it as base64. + tag = binaryTag + value = encodeBase64(value) + } + + style := yaml_PLAIN_SCALAR_STYLE + switch { + case node.Style&DoubleQuotedStyle != 0: + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + case node.Style&SingleQuotedStyle != 0: + style = yaml_SINGLE_QUOTED_SCALAR_STYLE + case node.Style&LiteralStyle != 0: + style = yaml_LITERAL_SCALAR_STYLE + case node.Style&FoldedStyle != 0: + style = yaml_FOLDED_SCALAR_STYLE + case strings.Contains(value, "\n"): + style = yaml_LITERAL_SCALAR_STYLE + case forceQuoting: + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + + e.emitScalar(value, node.Anchor, tag, style, []byte(node.HeadComment), []byte(node.LineComment), []byte(node.FootComment), []byte(tail)) + default: + failf("cannot encode node with unknown kind %d", node.Kind) + } +} diff --git a/goyaml.v3/encode_test.go b/goyaml.v3/encode_test.go new file mode 100644 index 0000000..4a8bf2e --- /dev/null +++ b/goyaml.v3/encode_test.go @@ -0,0 +1,736 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml_test + +import ( + "bytes" + "fmt" + "math" + "strconv" + "strings" + "time" + + "net" + "os" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v3" +) + +var marshalIntTest = 123 + +var marshalTests = []struct { + value interface{} + data string +}{ + { + nil, + "null\n", + }, { + (*marshalerType)(nil), + "null\n", + }, { + &struct{}{}, + "{}\n", + }, { + map[string]string{"v": "hi"}, + "v: hi\n", + }, { + map[string]interface{}{"v": "hi"}, + "v: hi\n", + }, { + map[string]string{"v": "true"}, + "v: \"true\"\n", + }, { + map[string]string{"v": "false"}, + "v: \"false\"\n", + }, { + map[string]interface{}{"v": true}, + "v: true\n", + }, { + map[string]interface{}{"v": false}, + "v: false\n", + }, { + map[string]interface{}{"v": 10}, + "v: 10\n", + }, { + map[string]interface{}{"v": -10}, + "v: -10\n", + }, { + map[string]uint{"v": 42}, + "v: 42\n", + }, { + map[string]interface{}{"v": int64(4294967296)}, + "v: 4294967296\n", + }, { + map[string]int64{"v": int64(4294967296)}, + "v: 4294967296\n", + }, { + map[string]uint64{"v": 4294967296}, + "v: 4294967296\n", + }, { + map[string]interface{}{"v": "10"}, + "v: \"10\"\n", + }, { + map[string]interface{}{"v": 0.1}, + "v: 0.1\n", + }, { + map[string]interface{}{"v": float64(0.1)}, + "v: 0.1\n", + }, { + map[string]interface{}{"v": float32(0.99)}, + "v: 0.99\n", + }, { + map[string]interface{}{"v": -0.1}, + "v: -0.1\n", + }, { + map[string]interface{}{"v": math.Inf(+1)}, + "v: .inf\n", + }, { + map[string]interface{}{"v": math.Inf(-1)}, + "v: -.inf\n", + }, { + map[string]interface{}{"v": math.NaN()}, + "v: .nan\n", + }, { + map[string]interface{}{"v": nil}, + "v: null\n", + }, { + map[string]interface{}{"v": ""}, + "v: \"\"\n", + }, { + map[string][]string{"v": []string{"A", "B"}}, + "v:\n - A\n - B\n", + }, { + map[string][]string{"v": []string{"A", "B\nC"}}, + "v:\n - A\n - |-\n B\n C\n", + }, { + map[string][]interface{}{"v": []interface{}{"A", 1, map[string][]int{"B": []int{2, 3}}}}, + "v:\n - A\n - 1\n - B:\n - 2\n - 3\n", + }, { + map[string]interface{}{"a": map[interface{}]interface{}{"b": "c"}}, + "a:\n b: c\n", + }, { + map[string]interface{}{"a": "-"}, + "a: '-'\n", + }, + + // Simple values. + { + &marshalIntTest, + "123\n", + }, + + // Structures + { + &struct{ Hello string }{"world"}, + "hello: world\n", + }, { + &struct { + A struct { + B string + } + }{struct{ B string }{"c"}}, + "a:\n b: c\n", + }, { + &struct { + A *struct { + B string + } + }{&struct{ B string }{"c"}}, + "a:\n b: c\n", + }, { + &struct { + A *struct { + B string + } + }{}, + "a: null\n", + }, { + &struct{ A int }{1}, + "a: 1\n", + }, { + &struct{ A []int }{[]int{1, 2}}, + "a:\n - 1\n - 2\n", + }, { + &struct{ A [2]int }{[2]int{1, 2}}, + "a:\n - 1\n - 2\n", + }, { + &struct { + B int "a" + }{1}, + "a: 1\n", + }, { + &struct{ A bool }{true}, + "a: true\n", + }, { + &struct{ A string }{"true"}, + "a: \"true\"\n", + }, { + &struct{ A string }{"off"}, + "a: \"off\"\n", + }, + + // Conditional flag + { + &struct { + A int "a,omitempty" + B int "b,omitempty" + }{1, 0}, + "a: 1\n", + }, { + &struct { + A int "a,omitempty" + B int "b,omitempty" + }{0, 0}, + "{}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{&struct{ X, y int }{1, 2}}, + "a: {x: 1}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{nil}, + "{}\n", + }, { + &struct { + A *struct{ X, y int } "a,omitempty,flow" + }{&struct{ X, y int }{}}, + "a: {x: 0}\n", + }, { + &struct { + A struct{ X, y int } "a,omitempty,flow" + }{struct{ X, y int }{1, 2}}, + "a: {x: 1}\n", + }, { + &struct { + A struct{ X, y int } "a,omitempty,flow" + }{struct{ X, y int }{0, 1}}, + "{}\n", + }, { + &struct { + A float64 "a,omitempty" + B float64 "b,omitempty" + }{1, 0}, + "a: 1\n", + }, + { + &struct { + T1 time.Time "t1,omitempty" + T2 time.Time "t2,omitempty" + T3 *time.Time "t3,omitempty" + T4 *time.Time "t4,omitempty" + }{ + T2: time.Date(2018, 1, 9, 10, 40, 47, 0, time.UTC), + T4: newTime(time.Date(2098, 1, 9, 10, 40, 47, 0, time.UTC)), + }, + "t2: 2018-01-09T10:40:47Z\nt4: 2098-01-09T10:40:47Z\n", + }, + // Nil interface that implements Marshaler. + { + map[string]yaml.Marshaler{ + "a": nil, + }, + "a: null\n", + }, + + // Flow flag + { + &struct { + A []int "a,flow" + }{[]int{1, 2}}, + "a: [1, 2]\n", + }, { + &struct { + A map[string]string "a,flow" + }{map[string]string{"b": "c", "d": "e"}}, + "a: {b: c, d: e}\n", + }, { + &struct { + A struct { + B, D string + } "a,flow" + }{struct{ B, D string }{"c", "e"}}, + "a: {b: c, d: e}\n", + }, { + &struct { + A string "a,flow" + }{"b\nc"}, + "a: \"b\\nc\"\n", + }, + + // Unexported field + { + &struct { + u int + A int + }{0, 1}, + "a: 1\n", + }, + + // Ignored field + { + &struct { + A int + B int "-" + }{1, 2}, + "a: 1\n", + }, + + // Struct inlining + { + &struct { + A int + C inlineB `yaml:",inline"` + }{1, inlineB{2, inlineC{3}}}, + "a: 1\nb: 2\nc: 3\n", + }, + // Struct inlining as a pointer + { + &struct { + A int + C *inlineB `yaml:",inline"` + }{1, &inlineB{2, inlineC{3}}}, + "a: 1\nb: 2\nc: 3\n", + }, { + &struct { + A int + C *inlineB `yaml:",inline"` + }{1, nil}, + "a: 1\n", + }, { + &struct { + A int + D *inlineD `yaml:",inline"` + }{1, &inlineD{&inlineC{3}, 4}}, + "a: 1\nc: 3\nd: 4\n", + }, + + // Map inlining + { + &struct { + A int + C map[string]int `yaml:",inline"` + }{1, map[string]int{"b": 2, "c": 3}}, + "a: 1\nb: 2\nc: 3\n", + }, + + // Duration + { + map[string]time.Duration{"a": 3 * time.Second}, + "a: 3s\n", + }, + + // Issue #24: bug in map merging logic. + { + map[string]string{"a": ""}, + "a: \n", + }, + + // Issue #34: marshal unsupported base 60 floats quoted for compatibility + // with old YAML 1.1 parsers. + { + map[string]string{"a": "1:1"}, + "a: \"1:1\"\n", + }, + + // Binary data. + { + map[string]string{"a": "\x00"}, + "a: \"\\0\"\n", + }, { + map[string]string{"a": "\x80\x81\x82"}, + "a: !!binary gIGC\n", + }, { + map[string]string{"a": strings.Repeat("\x90", 54)}, + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + }, + + // Encode unicode as utf-8 rather than in escaped form. + { + map[string]string{"a": "你好"}, + "a: 你好\n", + }, + + // Support encoding.TextMarshaler. + { + map[string]net.IP{"a": net.IPv4(1, 2, 3, 4)}, + "a: 1.2.3.4\n", + }, + // time.Time gets a timestamp tag. + { + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + "a: 2015-02-24T18:19:39Z\n", + }, + { + map[string]*time.Time{"a": newTime(time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC))}, + "a: 2015-02-24T18:19:39Z\n", + }, + { + // This is confirmed to be properly decoded in Python (libyaml) without a timestamp tag. + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 123456789, time.FixedZone("FOO", -3*60*60))}, + "a: 2015-02-24T18:19:39.123456789-03:00\n", + }, + // Ensure timestamp-like strings are quoted. + { + map[string]string{"a": "2015-02-24T18:19:39Z"}, + "a: \"2015-02-24T18:19:39Z\"\n", + }, + + // Ensure strings containing ": " are quoted (reported as PR #43, but not reproducible). + { + map[string]string{"a": "b: c"}, + "a: 'b: c'\n", + }, + + // Containing hash mark ('#') in string should be quoted + { + map[string]string{"a": "Hello #comment"}, + "a: 'Hello #comment'\n", + }, + { + map[string]string{"a": "你好 #comment"}, + "a: '你好 #comment'\n", + }, + + // Ensure MarshalYAML also gets called on the result of MarshalYAML itself. + { + &marshalerType{marshalerType{true}}, + "true\n", + }, { + &marshalerType{&marshalerType{true}}, + "true\n", + }, + + // Check indentation of maps inside sequences inside maps. + { + map[string]interface{}{"a": map[string]interface{}{"b": []map[string]int{{"c": 1, "d": 2}}}}, + "a:\n b:\n - c: 1\n d: 2\n", + }, + + // Strings with tabs were disallowed as literals (issue #471). + { + map[string]string{"a": "\tB\n\tC\n"}, + "a: |\n \tB\n \tC\n", + }, + + // Ensure that strings do not wrap + { + map[string]string{"a": "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 "}, + "a: 'abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 '\n", + }, + + // yaml.Node + { + &struct { + Value yaml.Node + }{ + yaml.Node{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "foo", + Style: yaml.SingleQuotedStyle, + }, + }, + "value: 'foo'\n", + }, { + yaml.Node{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "foo", + Style: yaml.SingleQuotedStyle, + }, + "'foo'\n", + }, + + // Enforced tagging with shorthand notation (issue #616). + { + &struct { + Value yaml.Node + }{ + yaml.Node{ + Kind: yaml.ScalarNode, + Style: yaml.TaggedStyle, + Value: "foo", + Tag: "!!str", + }, + }, + "value: !!str foo\n", + }, { + &struct { + Value yaml.Node + }{ + yaml.Node{ + Kind: yaml.MappingNode, + Style: yaml.TaggedStyle, + Tag: "!!map", + }, + }, + "value: !!map {}\n", + }, { + &struct { + Value yaml.Node + }{ + yaml.Node{ + Kind: yaml.SequenceNode, + Style: yaml.TaggedStyle, + Tag: "!!seq", + }, + }, + "value: !!seq []\n", + }, +} + +func (s *S) TestMarshal(c *C) { + defer os.Setenv("TZ", os.Getenv("TZ")) + os.Setenv("TZ", "UTC") + for i, item := range marshalTests { + c.Logf("test %d: %q", i, item.data) + data, err := yaml.Marshal(item.value) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, item.data) + } +} + +func (s *S) TestEncoderSingleDocument(c *C) { + for i, item := range marshalTests { + c.Logf("test %d. %q", i, item.data) + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(item.value) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, item.data) + } +} + +func (s *S) TestEncoderMultipleDocuments(c *C) { + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, Equals, nil) + err = enc.Encode(map[string]string{"c": "d"}) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, "a: b\n---\nc: d\n") +} + +func (s *S) TestEncoderWriteError(c *C) { + enc := yaml.NewEncoder(errorWriter{}) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, ErrorMatches, `yaml: write error: some write error`) // Data not flushed yet +} + +type errorWriter struct{} + +func (errorWriter) Write([]byte) (int, error) { + return 0, fmt.Errorf("some write error") +} + +var marshalErrorTests = []struct { + value interface{} + error string + panic string +}{{ + value: &struct { + B int + inlineB ",inline" + }{1, inlineB{2, inlineC{3}}}, + panic: `duplicated key 'b' in struct struct \{ B int; .*`, +}, { + value: &struct { + A int + B map[string]int ",inline" + }{1, map[string]int{"a": 2}}, + panic: `cannot have key "a" in inlined map: conflicts with struct field`, +}} + +func (s *S) TestMarshalErrors(c *C) { + for _, item := range marshalErrorTests { + if item.panic != "" { + c.Assert(func() { yaml.Marshal(item.value) }, PanicMatches, item.panic) + } else { + _, err := yaml.Marshal(item.value) + c.Assert(err, ErrorMatches, item.error) + } + } +} + +func (s *S) TestMarshalTypeCache(c *C) { + var data []byte + var err error + func() { + type T struct{ A int } + data, err = yaml.Marshal(&T{}) + c.Assert(err, IsNil) + }() + func() { + type T struct{ B int } + data, err = yaml.Marshal(&T{}) + c.Assert(err, IsNil) + }() + c.Assert(string(data), Equals, "b: 0\n") +} + +var marshalerTests = []struct { + data string + value interface{} +}{ + {"_:\n hi: there\n", map[interface{}]interface{}{"hi": "there"}}, + {"_:\n - 1\n - A\n", []interface{}{1, "A"}}, + {"_: 10\n", 10}, + {"_: null\n", nil}, + {"_: BAR!\n", "BAR!"}, +} + +type marshalerType struct { + value interface{} +} + +func (o marshalerType) MarshalText() ([]byte, error) { + panic("MarshalText called on type with MarshalYAML") +} + +func (o marshalerType) MarshalYAML() (interface{}, error) { + return o.value, nil +} + +type marshalerValue struct { + Field marshalerType "_" +} + +func (s *S) TestMarshaler(c *C) { + for _, item := range marshalerTests { + obj := &marshalerValue{} + obj.Field.value = item.value + data, err := yaml.Marshal(obj) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, string(item.data)) + } +} + +func (s *S) TestMarshalerWholeDocument(c *C) { + obj := &marshalerType{} + obj.value = map[string]string{"hello": "world!"} + data, err := yaml.Marshal(obj) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, "hello: world!\n") +} + +type failingMarshaler struct{} + +func (ft *failingMarshaler) MarshalYAML() (interface{}, error) { + return nil, failingErr +} + +func (s *S) TestMarshalerError(c *C) { + _, err := yaml.Marshal(&failingMarshaler{}) + c.Assert(err, Equals, failingErr) +} + +func (s *S) TestSetIndent(c *C) { + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + enc.SetIndent(8) + err := enc.Encode(map[string]interface{}{"a": map[string]interface{}{"b": map[string]string{"c": "d"}}}) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, "a:\n b:\n c: d\n") +} + +func (s *S) TestSortedOutput(c *C) { + order := []interface{}{ + false, + true, + 1, + uint(1), + 1.0, + 1.1, + 1.2, + 2, + uint(2), + 2.0, + 2.1, + "", + ".1", + ".2", + ".a", + "1", + "2", + "a!10", + "a/0001", + "a/002", + "a/3", + "a/10", + "a/11", + "a/0012", + "a/100", + "a~10", + "ab/1", + "b/1", + "b/01", + "b/2", + "b/02", + "b/3", + "b/03", + "b1", + "b01", + "b3", + "c2.10", + "c10.2", + "d1", + "d7", + "d7abc", + "d12", + "d12a", + "e2b", + "e4b", + "e21a", + } + m := make(map[interface{}]int) + for _, k := range order { + m[k] = 1 + } + data, err := yaml.Marshal(m) + c.Assert(err, IsNil) + out := "\n" + string(data) + last := 0 + for i, k := range order { + repr := fmt.Sprint(k) + if s, ok := k.(string); ok { + if _, err = strconv.ParseFloat(repr, 32); s == "" || err == nil { + repr = `"` + repr + `"` + } + } + index := strings.Index(out, "\n"+repr+":") + if index == -1 { + c.Fatalf("%#v is not in the output: %#v", k, out) + } + if index < last { + c.Fatalf("%#v was generated before %#v: %q", k, order[i-1], out) + } + last = index + } +} + +func newTime(t time.Time) *time.Time { + return &t +} diff --git a/goyaml.v3/example_embedded_test.go b/goyaml.v3/example_embedded_test.go new file mode 100644 index 0000000..9d17398 --- /dev/null +++ b/goyaml.v3/example_embedded_test.go @@ -0,0 +1,56 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml_test + +import ( + "fmt" + "log" + + "gopkg.in/yaml.v3" +) + +// An example showing how to unmarshal embedded +// structs from YAML. + +type StructA struct { + A string `yaml:"a"` +} + +type StructB struct { + // Embedded structs are not treated as embedded in YAML by default. To do that, + // add the ",inline" annotation below + StructA `yaml:",inline"` + B string `yaml:"b"` +} + +var data = ` +a: a string from struct A +b: a string from struct B +` + +func ExampleUnmarshal_embedded() { + var b StructB + + err := yaml.Unmarshal([]byte(data), &b) + if err != nil { + log.Fatalf("cannot unmarshal data: %v", err) + } + fmt.Println(b.A) + fmt.Println(b.B) + // Output: + // a string from struct A + // a string from struct B +} diff --git a/goyaml.v3/go.mod b/goyaml.v3/go.mod new file mode 100644 index 0000000..f407ea3 --- /dev/null +++ b/goyaml.v3/go.mod @@ -0,0 +1,5 @@ +module "gopkg.in/yaml.v3" + +require ( + "gopkg.in/check.v1" v0.0.0-20161208181325-20d25e280405 +) diff --git a/goyaml.v3/limit_test.go b/goyaml.v3/limit_test.go new file mode 100644 index 0000000..07a3cbd --- /dev/null +++ b/goyaml.v3/limit_test.go @@ -0,0 +1,128 @@ +package yaml_test + +import ( + "strings" + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v3" +) + +var limitTests = []struct { + name string + data []byte + error string +}{ + { + name: "1000kb of maps with 100 aliases", + data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`), + error: "yaml: document contains excessive aliasing", + }, { + name: "1000kb of deeply nested slices", + data: []byte(strings.Repeat(`[`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested maps", + data: []byte("x: " + strings.Repeat(`{`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested indents", + data: []byte(strings.Repeat(`- `, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of 1000-indent lines", + data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)), + }, + {name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)}, + {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, + {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, + {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, + {name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, + {name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))}, + {name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))}, +} + +func (s *S) TestLimits(c *C) { + if testing.Short() { + return + } + for _, tc := range limitTests { + var v interface{} + err := yaml.Unmarshal(tc.data, &v) + if len(tc.error) > 0 { + c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name)) + } else { + c.Assert(err, IsNil, Commentf("testcase: %s", tc.name)) + } + } +} + +func Benchmark1000KB100Aliases(b *testing.B) { + benchmark(b, "1000kb of maps with 100 aliases") +} +func Benchmark1000KBDeeplyNestedSlices(b *testing.B) { + benchmark(b, "1000kb of deeply nested slices") +} +func Benchmark1000KBDeeplyNestedMaps(b *testing.B) { + benchmark(b, "1000kb of deeply nested maps") +} +func Benchmark1000KBDeeplyNestedIndents(b *testing.B) { + benchmark(b, "1000kb of deeply nested indents") +} +func Benchmark1000KB1000IndentLines(b *testing.B) { + benchmark(b, "1000kb of 1000-indent lines") +} +func Benchmark1KBMaps(b *testing.B) { + benchmark(b, "1kb of maps") +} +func Benchmark10KBMaps(b *testing.B) { + benchmark(b, "10kb of maps") +} +func Benchmark100KBMaps(b *testing.B) { + benchmark(b, "100kb of maps") +} +func Benchmark1000KBMaps(b *testing.B) { + benchmark(b, "1000kb of maps") +} + +func BenchmarkDeepSlice(b *testing.B) { + benchmark(b, "1000kb slice nested at max-depth") +} + +func BenchmarkDeepFlow(b *testing.B) { + benchmark(b, "1000kb slice nested in maps at max-depth") +} + +func Benchmark1000KBMaxDepthNested(b *testing.B) { + benchmark(b, "1000kb of 10000-nested lines") +} + +func benchmark(b *testing.B, name string) { + for _, t := range limitTests { + if t.name != name { + continue + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + var v interface{} + err := yaml.Unmarshal(t.data, &v) + if len(t.error) > 0 { + if err == nil { + b.Errorf("expected error, got none") + } else if err.Error() != t.error { + b.Errorf("expected error '%s', got '%s'", t.error, err.Error()) + } + } else { + if err != nil { + b.Errorf("unexpected error: %v", err) + } + } + } + + return + } + + b.Errorf("testcase %q not found", name) +} diff --git a/goyaml.v3/node_test.go b/goyaml.v3/node_test.go new file mode 100644 index 0000000..b7d0c9a --- /dev/null +++ b/goyaml.v3/node_test.go @@ -0,0 +1,2886 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml_test + +import ( + "bytes" + "fmt" + "os" + + . "gopkg.in/check.v1" + "gopkg.in/yaml.v3" + "io" + "strings" +) + +var nodeTests = []struct { + yaml string + node yaml.Node +}{ + { + "null\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "null", + Tag: "!!null", + Line: 1, + Column: 1, + }}, + }, + }, { + "[encode]null\n", + yaml.Node{}, + }, { + "foo\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "foo", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "\"foo\"\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.DoubleQuotedStyle, + Value: "foo", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "'foo'\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.SingleQuotedStyle, + Value: "foo", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "!!str 123\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.TaggedStyle, + Value: "123", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + // Although the node isn't TaggedStyle, dropping the tag would change the value. + "[encode]!!binary gIGC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "gIGC", + Tag: "!!binary", + Line: 1, + Column: 1, + }}, + }, + }, { + // Item doesn't have a tag, but needs to be binary encoded due to its content. + "[encode]!!binary gIGC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "\x80\x81\x82", + Line: 1, + Column: 1, + }}, + }, + }, { + // Same, but with strings we can just quote them. + "[encode]\"123\"\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "123", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "!tag:something 123\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.TaggedStyle, + Value: "123", + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "[encode]!tag:something 123\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "123", + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "!tag:something {}\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Style: yaml.TaggedStyle | yaml.FlowStyle, + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "[encode]!tag:something {}\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Style: yaml.FlowStyle, + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "!tag:something []\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Style: yaml.TaggedStyle | yaml.FlowStyle, + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "[encode]!tag:something []\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Style: yaml.FlowStyle, + Tag: "!tag:something", + Line: 1, + Column: 1, + }}, + }, + }, { + "''\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.SingleQuotedStyle, + Value: "", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "|\n foo\n bar\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: yaml.LiteralStyle, + Value: "foo\nbar\n", + Tag: "!!str", + Line: 1, + Column: 1, + }}, + }, + }, { + "true\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 1, + Column: 1, + }}, + }, + }, { + "-10\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "-10", + Tag: "!!int", + Line: 1, + Column: 1, + }}, + }, + }, { + "4294967296\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "4294967296", + Tag: "!!int", + Line: 1, + Column: 1, + }}, + }, + }, { + "0.1000\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "0.1000", + Tag: "!!float", + Line: 1, + Column: 1, + }}, + }, + }, { + "-.inf\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "-.inf", + Tag: "!!float", + Line: 1, + Column: 1, + }}, + }, + }, { + ".nan\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: ".nan", + Tag: "!!float", + Line: 1, + Column: 1, + }}, + }, + }, { + "{}\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Style: yaml.FlowStyle, + Value: "", + Tag: "!!map", + Line: 1, + Column: 1, + }}, + }, + }, { + "a: b c\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Value: "", + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Value: "b c", + Tag: "!!str", + Line: 1, + Column: 4, + }}, + }}, + }, + }, { + "a:\n b: c\n d: e\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Value: "c", + Tag: "!!str", + Line: 2, + Column: 6, + }, { + Kind: yaml.ScalarNode, + Value: "d", + Tag: "!!str", + Line: 3, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Value: "e", + Tag: "!!str", + Line: 3, + Column: 6, + }}, + }}, + }}, + }, + }, { + "a:\n - b: c\n d: e\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 5, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Value: "c", + Tag: "!!str", + Line: 2, + Column: 8, + }, { + Kind: yaml.ScalarNode, + Value: "d", + Tag: "!!str", + Line: 3, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Value: "e", + Tag: "!!str", + Line: 3, + Column: 8, + }}, + }}, + }}, + }}, + }, + }, { + "a: # AI\n - b\nc:\n - d\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "a", + LineComment: "# AI", + Line: 1, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "b", + Line: 2, + Column: 5, + }}, + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "c", + Line: 3, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "d", + Line: 4, + Column: 5, + }}, + Line: 4, + Column: 3, + }}, + }}, + }, + }, { + "[decode]a:\n # HM\n - # HB1\n # HB2\n b: # IB\n c # IC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: 0x0, + Tag: "!!str", + Value: "a", + Line: 1, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 3, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + HeadComment: "# HM", + Line: 5, + Column: 5, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "b", + HeadComment: "# HB1\n# HB2", + LineComment: "# IB", + Line: 5, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "c", + LineComment: "# IC", + Line: 6, + Column: 7, + }}, + }}, + }}, + }}, + }, + }, { + // When encoding the value above, it loses b's inline comment. + "[encode]a:\n # HM\n - # HB1\n # HB2\n b: c # IC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Style: 0x0, + Tag: "!!str", + Value: "a", + Line: 1, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 3, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + HeadComment: "# HM", + Line: 5, + Column: 5, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "b", + HeadComment: "# HB1\n# HB2", + LineComment: "# IB", + Line: 5, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "c", + LineComment: "# IC", + Line: 6, + Column: 7, + }}, + }}, + }}, + }}, + }, + }, { + // Multiple cases of comment inlining next to mapping keys. + "a: | # IA\n str\nb: >- # IB\n str\nc: # IC\n - str\nd: # ID\n str:\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "a", + Line: 1, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Style: yaml.LiteralStyle, + Tag: "!!str", + Value: "str\n", + LineComment: "# IA", + Line: 1, + Column: 4, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "b", + Line: 3, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Style: yaml.FoldedStyle, + Tag: "!!str", + Value: "str", + LineComment: "# IB", + Line: 3, + Column: 4, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "c", + LineComment: "# IC", + Line: 5, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 6, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "str", + Line: 6, + Column: 5, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "d", + LineComment: "# ID", + Line: 7, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 8, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "str", + Line: 8, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!null", + Line: 8, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Indentless sequence. + "[decode]a:\n# HM\n- # HB1\n # HB2\n b: # IB\n c # IC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "a", + Line: 1, + Column: 1, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 3, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + HeadComment: "# HM", + Line: 5, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "b", + HeadComment: "# HB1\n# HB2", + LineComment: "# IB", + Line: 5, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "c", + LineComment: "# IC", + Line: 6, + Column: 5, + }}, + }}, + }}, + }}, + }, + }, { + "- a\n- b\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Value: "", + Tag: "!!seq", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 3, + }}, + }}, + }, + }, { + "- a\n- - b\n - c\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 3, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Value: "c", + Tag: "!!str", + Line: 3, + Column: 5, + }}, + }}, + }}, + }, + }, { + "[a, b]\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Style: yaml.FlowStyle, + Value: "", + Tag: "!!seq", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 2, + }, { + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 1, + Column: 5, + }}, + }}, + }, + }, { + "- a\n- [b, c]\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 3, + }, { + Kind: yaml.SequenceNode, + Tag: "!!seq", + Style: yaml.FlowStyle, + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 4, + }, { + Kind: yaml.ScalarNode, + Value: "c", + Tag: "!!str", + Line: 2, + Column: 7, + }}, + }}, + }}, + }, + }, { + "a: &x 1\nb: &y 2\nc: *x\nd: *y\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Line: 1, + Column: 1, + Tag: "!!map", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + Line: 1, + Column: 1, + }, + saveNode("x", &yaml.Node{ + Kind: yaml.ScalarNode, + Value: "1", + Tag: "!!int", + Anchor: "x", + Line: 1, + Column: 4, + }), + { + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + Line: 2, + Column: 1, + }, + saveNode("y", &yaml.Node{ + Kind: yaml.ScalarNode, + Value: "2", + Tag: "!!int", + Anchor: "y", + Line: 2, + Column: 4, + }), + { + Kind: yaml.ScalarNode, + Value: "c", + Tag: "!!str", + Line: 3, + Column: 1, + }, { + Kind: yaml.AliasNode, + Value: "x", + Alias: dropNode("x"), + Line: 3, + Column: 4, + }, { + Kind: yaml.ScalarNode, + Value: "d", + Tag: "!!str", + Line: 4, + Column: 1, + }, { + Kind: yaml.AliasNode, + Value: "y", + Tag: "", + Alias: dropNode("y"), + Line: 4, + Column: 4, + }}, + }}, + }, + }, { + + "# One\n# Two\ntrue # Three\n# Four\n# Five\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 3, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 3, + Column: 1, + HeadComment: "# One\n# Two", + LineComment: "# Three", + FootComment: "# Four\n# Five", + }}, + }, + }, { + + "# š\ntrue # š\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 2, + Column: 1, + HeadComment: "# š", + LineComment: "# š", + }}, + }, + }, { + + "[decode]\n# One\n\n# Two\n\n# Three\ntrue # Four\n# Five\n\n# Six\n\n# Seven\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 7, + Column: 1, + HeadComment: "# One\n\n# Two", + FootComment: "# Six\n\n# Seven", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 7, + Column: 1, + HeadComment: "# Three", + LineComment: "# Four", + FootComment: "# Five", + }}, + }, + }, { + // Write out the pound character if missing from comments. + "[encode]# One\n# Two\ntrue # Three\n# Four\n# Five\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 3, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 3, + Column: 1, + HeadComment: "One\nTwo\n", + LineComment: "Three\n", + FootComment: "Four\nFive\n", + }}, + }, + }, { + "[encode]# One\n# Two\ntrue # Three\n# Four\n# Five\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 3, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 3, + Column: 1, + HeadComment: " One\n Two", + LineComment: " Three", + FootComment: " Four\n Five", + }}, + }, + }, { + "# DH1\n\n# DH2\n\n# H1\n# H2\ntrue # I\n# F1\n# F2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 7, + Column: 1, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "true", + Tag: "!!bool", + Line: 7, + Column: 1, + HeadComment: "# H1\n# H2", + LineComment: "# I", + FootComment: "# F1\n# F2", + }}, + }, + }, { + "# DH1\n\n# DH2\n\n# HA1\n# HA2\nka: va # IA\n# FA1\n# FA2\n\n# HB1\n# HB2\nkb: vb # IB\n# FB1\n# FB2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 7, + Column: 1, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Line: 7, + Column: 1, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1\n# HA2", + FootComment: "# FA1\n# FA2", + }, { + Kind: yaml.ScalarNode, + Line: 7, + Column: 5, + Tag: "!!str", + Value: "va", + LineComment: "# IA", + }, { + Kind: yaml.ScalarNode, + Line: 13, + Column: 1, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1\n# HB2", + FootComment: "# FB1\n# FB2", + }, { + Kind: yaml.ScalarNode, + Line: 13, + Column: 5, + Tag: "!!str", + Value: "vb", + LineComment: "# IB", + }}, + }}, + }, + }, { + "# DH1\n\n# DH2\n\n# HA1\n# HA2\n- la # IA\n# FA1\n# FA2\n\n# HB1\n# HB2\n- lb # IB\n# FB1\n# FB2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 7, + Column: 1, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 7, + Column: 3, + Value: "la", + HeadComment: "# HA1\n# HA2", + LineComment: "# IA", + FootComment: "# FA1\n# FA2", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 13, + Column: 3, + Value: "lb", + HeadComment: "# HB1\n# HB2", + LineComment: "# IB", + FootComment: "# FB1\n# FB2", + }}, + }}, + }, + }, { + "# DH1\n\n- la # IA\n# HB1\n- lb\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 3, + Column: 1, + HeadComment: "# DH1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 3, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 3, + Column: 3, + Value: "la", + LineComment: "# IA", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 5, + Column: 3, + Value: "lb", + HeadComment: "# HB1", + }}, + }}, + }, + }, { + "- la # IA\n- lb # IB\n- lc # IC\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 1, + Column: 3, + Value: "la", + LineComment: "# IA", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 2, + Column: 3, + Value: "lb", + LineComment: "# IB", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 3, + Column: 3, + Value: "lc", + LineComment: "# IC", + }}, + }}, + }, + }, { + "# DH1\n\n# HL1\n- - la\n # HB1\n - lb\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 4, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 4, + Column: 3, + HeadComment: "# HL1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 4, + Column: 5, + Value: "la", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 5, + Value: "lb", + HeadComment: "# HB1", + }}, + }}, + }}, + }, + }, { + "# DH1\n\n# HL1\n- # HA1\n - la\n # HB1\n - lb\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 4, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 5, + Column: 3, + HeadComment: "# HL1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 5, + Column: 5, + Value: "la", + HeadComment: "# HA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 7, + Column: 5, + Value: "lb", + HeadComment: "# HB1", + }}, + }}, + }}, + }, + }, { + "[decode]# DH1\n\n# HL1\n- # HA1\n\n - la\n # HB1\n - lb\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 4, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 6, + Column: 3, + HeadComment: "# HL1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 5, + Value: "la", + HeadComment: "# HA1\n", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 8, + Column: 5, + Value: "lb", + HeadComment: "# HB1", + }}, + }}, + }}, + }, + }, { + "# DH1\n\n# HA1\nka:\n # HB1\n kb:\n # HC1\n # HC2\n - lc # IC\n # FC1\n # FC2\n\n # HD1\n - ld # ID\n # FD1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 4, + Column: 1, + Value: "ka", + HeadComment: "# HA1", + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 6, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "kb", + HeadComment: "# HB1", + }, { + Kind: yaml.SequenceNode, + Line: 9, + Column: 5, + Tag: "!!seq", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 9, + Column: 7, + Value: "lc", + HeadComment: "# HC1\n# HC2", + LineComment: "# IC", + FootComment: "# FC1\n# FC2", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 14, + Column: 7, + Value: "ld", + HeadComment: "# HD1", + + LineComment: "# ID", + FootComment: "# FD1", + }}, + }}, + }}, + }}, + }, + }, { + "# DH1\n\n# HA1\nka:\n # HB1\n kb:\n # HC1\n # HC2\n - lc # IC\n # FC1\n # FC2\n\n # HD1\n - ld # ID\n # FD1\nke: ve\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 4, + Column: 1, + Value: "ka", + HeadComment: "# HA1", + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 6, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "kb", + HeadComment: "# HB1", + }, { + Kind: yaml.SequenceNode, + Line: 9, + Column: 5, + Tag: "!!seq", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 9, + Column: 7, + Value: "lc", + HeadComment: "# HC1\n# HC2", + LineComment: "# IC", + FootComment: "# FC1\n# FC2", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 14, + Column: 7, + Value: "ld", + HeadComment: "# HD1", + LineComment: "# ID", + FootComment: "# FD1", + }}, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 16, + Column: 1, + Value: "ke", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 16, + Column: 5, + Value: "ve", + }}, + }}, + }, + }, { + "# DH1\n\n# DH2\n\n# HA1\n# HA2\nka:\n # HB1\n # HB2\n kb:\n" + + " # HC1\n # HC2\n kc:\n # HD1\n # HD2\n kd: vd\n # FD1\n # FD2\n" + + " # FC1\n # FC2\n # FB1\n # FB2\n# FA1\n# FA2\n\n# HE1\n# HE2\nke: ve\n# FE1\n# FE2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1\n# HA2", + FootComment: "# FA1\n# FA2", + Line: 7, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 10, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1\n# HB2", + FootComment: "# FB1\n# FB2", + Line: 10, + Column: 3, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 13, + Column: 5, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1\n# HC2", + FootComment: "# FC1\n# FC2", + Line: 13, + Column: 5, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 16, + Column: 7, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kd", + HeadComment: "# HD1\n# HD2", + FootComment: "# FD1\n# FD2", + Line: 16, + Column: 7, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vd", + Line: 16, + Column: 11, + }}, + }}, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ke", + HeadComment: "# HE1\n# HE2", + FootComment: "# FE1\n# FE2", + Line: 28, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ve", + Line: 28, + Column: 5, + }}, + }}, + }, + }, { + // Same as above but indenting ke in so it's also part of ka's value. + "# DH1\n\n# DH2\n\n# HA1\n# HA2\nka:\n # HB1\n # HB2\n kb:\n" + + " # HC1\n # HC2\n kc:\n # HD1\n # HD2\n kd: vd\n # FD1\n # FD2\n" + + " # FC1\n # FC2\n # FB1\n # FB2\n\n # HE1\n # HE2\n ke: ve\n # FE1\n # FE2\n# FA1\n# FA2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 7, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1\n# HA2", + FootComment: "# FA1\n# FA2", + Line: 7, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 10, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1\n# HB2", + FootComment: "# FB1\n# FB2", + Line: 10, + Column: 3, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 13, + Column: 5, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1\n# HC2", + FootComment: "# FC1\n# FC2", + Line: 13, + Column: 5, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 16, + Column: 7, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kd", + HeadComment: "# HD1\n# HD2", + FootComment: "# FD1\n# FD2", + Line: 16, + Column: 7, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vd", + Line: 16, + Column: 11, + }}, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ke", + HeadComment: "# HE1\n# HE2", + FootComment: "# FE1\n# FE2", + Line: 26, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ve", + Line: 26, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Decode only due to lack of newline at the end. + "[decode]# HA1\nka:\n # HB1\n kb: vb\n # FB1\n# FA1", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Same as above, but with newline at the end. + "# HA1\nka:\n # HB1\n kb: vb\n # FB1\n# FA1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Same as above, but without FB1. + "# HA1\nka:\n # HB1\n kb: vb\n# FA1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Same as above, but with two newlines at the end. Decode-only for that. + "[decode]# HA1\nka:\n # HB1\n kb: vb\n # FB1\n# FA1\n\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }}, + }}, + }, + }, { + // Similar to above, but make HB1 look more like a footer of ka. + "[decode]# HA1\nka:\n# HB1\n\n kb: vb\n# FA1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 5, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1\n", + Line: 5, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 5, + Column: 7, + }}, + }}, + }}, + }, + }, { + "ka:\n kb: vb\n# FA1\n\nkc: vc\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + FootComment: "# FA1", + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 2, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + Line: 5, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 5, + Column: 5, + }}, + }}, + }, + }, { + "ka:\n kb: vb\n# HC1\nkc: vc\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 2, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1", + Line: 4, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 4, + Column: 5, + }}, + }}, + }, + }, { + // Decode only due to empty line before HC1. + "[decode]ka:\n kb: vb\n\n# HC1\nkc: vc\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 2, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1", + Line: 5, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 5, + Column: 5, + }}, + }}, + }, + }, { + // Decode-only due to empty lines around HC1. + "[decode]ka:\n kb: vb\n\n# HC1\n\nkc: vc\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 2, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1\n", + Line: 6, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 6, + Column: 5, + }}, + }}, + }, + }, { + "ka: # IA\n kb: # IB\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + LineComment: "# IA", + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + LineComment: "# IB", + }, { + Kind: yaml.ScalarNode, + Tag: "!!null", + Line: 2, + Column: 6, + }}, + }}, + }}, + }, + }, { + "# HA1\nka:\n # HB1\n kb: vb\n # FB1\n# HC1\n# HC2\nkc: vc\n# FC1\n# FC2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1\n# HC2", + FootComment: "# FC1\n# FC2", + Line: 8, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 8, + Column: 5, + }}, + }}, + }, + }, { + // Same as above, but decode only due to empty line between ka's value and kc's headers. + "[decode]# HA1\nka:\n # HB1\n kb: vb\n # FB1\n\n# HC1\n# HC2\nkc: vc\n# FC1\n# FC2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + HeadComment: "# HA1", + Line: 2, + Column: 1, + }, { + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 4, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 4, + Column: 3, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vb", + Line: 4, + Column: 7, + }}, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kc", + HeadComment: "# HC1\n# HC2", + FootComment: "# FC1\n# FC2", + Line: 9, + Column: 1, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "vc", + Line: 9, + Column: 5, + }}, + }}, + }, + }, { + "# H1\n[la, lb] # I\n# F1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 2, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Style: yaml.FlowStyle, + Line: 2, + Column: 1, + HeadComment: "# H1", + LineComment: "# I", + FootComment: "# F1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 2, + Column: 2, + Value: "la", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 2, + Column: 6, + Value: "lb", + }}, + }}, + }, + }, { + "# DH1\n\n# SH1\n[\n # HA1\n la, # IA\n # FA1\n\n # HB1\n lb, # IB\n # FB1\n]\n# SF1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Style: yaml.FlowStyle, + Line: 4, + Column: 1, + HeadComment: "# SH1", + FootComment: "# SF1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "la", + HeadComment: "# HA1", + LineComment: "# IA", + FootComment: "# FA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 3, + Value: "lb", + HeadComment: "# HB1", + LineComment: "# IB", + FootComment: "# FB1", + }}, + }}, + }, + }, { + // Same as above, but with extra newlines before FB1 and FB2 + "[decode]# DH1\n\n# SH1\n[\n # HA1\n la, # IA\n # FA1\n\n # HB1\n lb, # IB\n\n\n # FB1\n\n# FB2\n]\n# SF1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Style: yaml.FlowStyle, + Line: 4, + Column: 1, + HeadComment: "# SH1", + FootComment: "# SF1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "la", + HeadComment: "# HA1", + LineComment: "# IA", + FootComment: "# FA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 3, + Value: "lb", + HeadComment: "# HB1", + LineComment: "# IB", + FootComment: "# FB1\n\n# FB2", + }}, + }}, + }, + }, { + "# DH1\n\n# SH1\n[\n # HA1\n la,\n # FA1\n\n # HB1\n lb,\n # FB1\n]\n# SF1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Style: yaml.FlowStyle, + Line: 4, + Column: 1, + HeadComment: "# SH1", + FootComment: "# SF1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "la", + HeadComment: "# HA1", + FootComment: "# FA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 3, + Value: "lb", + HeadComment: "# HB1", + FootComment: "# FB1", + }}, + }}, + }, + }, { + "ka:\n kb: [\n # HA1\n la,\n # FA1\n\n # HB1\n lb,\n # FB1\n ]\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Line: 1, + Column: 1, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "ka", + Line: 1, + Column: 1, + }, { + Kind: 0x4, + Tag: "!!map", + Line: 2, + Column: 3, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "kb", + Line: 2, + Column: 3, + }, { + Kind: yaml.SequenceNode, + Style: 0x20, + Tag: "!!seq", + Line: 2, + Column: 7, + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "la", + HeadComment: "# HA1", + FootComment: "# FA1", + Line: 4, + Column: 5, + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: "lb", + HeadComment: "# HB1", + FootComment: "# FB1", + Line: 8, + Column: 5, + }}, + }}, + }}, + }}, + }, + }, { + "# DH1\n\n# MH1\n{\n # HA1\n ka: va, # IA\n # FA1\n\n # HB1\n kb: vb, # IB\n # FB1\n}\n# MF1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Style: yaml.FlowStyle, + Line: 4, + Column: 1, + HeadComment: "# MH1", + FootComment: "# MF1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 7, + Value: "va", + LineComment: "# IA", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 3, + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 7, + Value: "vb", + LineComment: "# IB", + }}, + }}, + }, + }, { + "# DH1\n\n# MH1\n{\n # HA1\n ka: va,\n # FA1\n\n # HB1\n kb: vb,\n # FB1\n}\n# MF1\n\n# DF1\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 4, + Column: 1, + HeadComment: "# DH1", + FootComment: "# DF1", + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Style: yaml.FlowStyle, + Line: 4, + Column: 1, + HeadComment: "# MH1", + FootComment: "# MF1", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 3, + Value: "ka", + HeadComment: "# HA1", + FootComment: "# FA1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 6, + Column: 7, + Value: "va", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 3, + Value: "kb", + HeadComment: "# HB1", + FootComment: "# FB1", + }, { + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 10, + Column: 7, + Value: "vb", + }}, + }}, + }, + }, { + "# DH1\n\n# DH2\n\n# HA1\n# HA2\n- &x la # IA\n# FA1\n# FA2\n\n# HB1\n# HB2\n- *x # IB\n# FB1\n# FB2\n\n# DF1\n\n# DF2\n", + yaml.Node{ + Kind: yaml.DocumentNode, + Line: 7, + Column: 1, + HeadComment: "# DH1\n\n# DH2", + FootComment: "# DF1\n\n# DF2", + Content: []*yaml.Node{{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Line: 7, + Column: 1, + Content: []*yaml.Node{ + saveNode("x", &yaml.Node{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Line: 7, + Column: 3, + Value: "la", + HeadComment: "# HA1\n# HA2", + LineComment: "# IA", + FootComment: "# FA1\n# FA2", + Anchor: "x", + }), { + Kind: yaml.AliasNode, + Line: 13, + Column: 3, + Value: "x", + Alias: dropNode("x"), + HeadComment: "# HB1\n# HB2", + LineComment: "# IB", + FootComment: "# FB1\n# FB2", + }, + }, + }}, + }, + }, +} + +func (s *S) TestNodeRoundtrip(c *C) { + defer os.Setenv("TZ", os.Getenv("TZ")) + os.Setenv("TZ", "UTC") + for i, item := range nodeTests { + c.Logf("test %d: %q", i, item.yaml) + + if strings.Contains(item.yaml, "#") { + var buf bytes.Buffer + fprintComments(&buf, &item.node, " ") + c.Logf(" expected comments:\n%s", buf.Bytes()) + } + + decode := true + encode := true + + testYaml := item.yaml + if s := strings.TrimPrefix(testYaml, "[decode]"); s != testYaml { + encode = false + testYaml = s + } + if s := strings.TrimPrefix(testYaml, "[encode]"); s != testYaml { + decode = false + testYaml = s + } + + if decode { + var node yaml.Node + err := yaml.Unmarshal([]byte(testYaml), &node) + c.Assert(err, IsNil) + if strings.Contains(item.yaml, "#") { + var buf bytes.Buffer + fprintComments(&buf, &node, " ") + c.Logf(" obtained comments:\n%s", buf.Bytes()) + } + c.Assert(&node, DeepEquals, &item.node) + } + if encode { + node := deepCopyNode(&item.node, nil) + buf := bytes.Buffer{} + enc := yaml.NewEncoder(&buf) + enc.SetIndent(2) + err := enc.Encode(node) + c.Assert(err, IsNil) + err = enc.Close() + c.Assert(err, IsNil) + c.Assert(buf.String(), Equals, testYaml) + + // Ensure there were no mutations to the tree. + c.Assert(node, DeepEquals, &item.node) + } + } +} + +func deepCopyNode(node *yaml.Node, cache map[*yaml.Node]*yaml.Node) *yaml.Node { + if n, ok := cache[node]; ok { + return n + } + if cache == nil { + cache = make(map[*yaml.Node]*yaml.Node) + } + copy := *node + cache[node] = © + copy.Content = nil + for _, elem := range node.Content { + copy.Content = append(copy.Content, deepCopyNode(elem, cache)) + } + if node.Alias != nil { + copy.Alias = deepCopyNode(node.Alias, cache) + } + return © +} + +var savedNodes = make(map[string]*yaml.Node) + +func saveNode(name string, node *yaml.Node) *yaml.Node { + savedNodes[name] = node + return node +} + +func peekNode(name string) *yaml.Node { + return savedNodes[name] +} + +func dropNode(name string) *yaml.Node { + node := savedNodes[name] + delete(savedNodes, name) + return node +} + +var setStringTests = []struct { + str string + yaml string + node yaml.Node +}{ + { + "something simple", + "something simple\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "something simple", + Tag: "!!str", + }, + }, { + `"quoted value"`, + "'\"quoted value\"'\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: `"quoted value"`, + Tag: "!!str", + }, + }, { + "multi\nline", + "|-\n multi\n line\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "multi\nline", + Tag: "!!str", + Style: yaml.LiteralStyle, + }, + }, { + "123", + "\"123\"\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "123", + Tag: "!!str", + }, + }, { + "multi\nline\n", + "|\n multi\n line\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "multi\nline\n", + Tag: "!!str", + Style: yaml.LiteralStyle, + }, + }, { + "\x80\x81\x82", + "!!binary gIGC\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "gIGC", + Tag: "!!binary", + }, + }, +} + +func (s *S) TestSetString(c *C) { + defer os.Setenv("TZ", os.Getenv("TZ")) + os.Setenv("TZ", "UTC") + for i, item := range setStringTests { + c.Logf("test %d: %q", i, item.str) + + var node yaml.Node + + node.SetString(item.str) + + c.Assert(node, DeepEquals, item.node) + + buf := bytes.Buffer{} + enc := yaml.NewEncoder(&buf) + enc.SetIndent(2) + err := enc.Encode(&item.node) + c.Assert(err, IsNil) + err = enc.Close() + c.Assert(err, IsNil) + c.Assert(buf.String(), Equals, item.yaml) + + var doc yaml.Node + err = yaml.Unmarshal([]byte(item.yaml), &doc) + c.Assert(err, IsNil) + + var str string + err = node.Decode(&str) + c.Assert(err, IsNil) + c.Assert(str, Equals, item.str) + } +} + +var nodeEncodeDecodeTests = []struct { + value interface{} + yaml string + node yaml.Node +}{{ + "something simple", + "something simple\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: "something simple", + Tag: "!!str", + }, +}, { + `"quoted value"`, + "'\"quoted value\"'\n", + yaml.Node{ + Kind: yaml.ScalarNode, + Style: yaml.SingleQuotedStyle, + Value: `"quoted value"`, + Tag: "!!str", + }, +}, { + 123, + "123", + yaml.Node{ + Kind: yaml.ScalarNode, + Value: `123`, + Tag: "!!int", + }, +}, { + []interface{}{1, 2}, + "[1, 2]", + yaml.Node{ + Kind: yaml.SequenceNode, + Tag: "!!seq", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "1", + Tag: "!!int", + }, { + Kind: yaml.ScalarNode, + Value: "2", + Tag: "!!int", + }}, + }, +}, { + map[string]interface{}{"a": "b"}, + "a: b", + yaml.Node{ + Kind: yaml.MappingNode, + Tag: "!!map", + Content: []*yaml.Node{{ + Kind: yaml.ScalarNode, + Value: "a", + Tag: "!!str", + }, { + Kind: yaml.ScalarNode, + Value: "b", + Tag: "!!str", + }}, + }, +}} + +func (s *S) TestNodeEncodeDecode(c *C) { + for i, item := range nodeEncodeDecodeTests { + c.Logf("Encode/Decode test value #%d: %#v", i, item.value) + + var v interface{} + err := item.node.Decode(&v) + c.Assert(err, IsNil) + c.Assert(v, DeepEquals, item.value) + + var n yaml.Node + err = n.Encode(item.value) + c.Assert(err, IsNil) + c.Assert(n, DeepEquals, item.node) + } +} + +func (s *S) TestNodeZeroEncodeDecode(c *C) { + // Zero node value behaves as nil when encoding... + var n yaml.Node + data, err := yaml.Marshal(&n) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, "null\n") + + // ... and decoding. + var v *struct{} = &struct{}{} + c.Assert(n.Decode(&v), IsNil) + c.Assert(v, IsNil) + + // ... and even when looking for its tag. + c.Assert(n.ShortTag(), Equals, "!!null") + + // Kind zero is still unknown, though. + n.Line = 1 + _, err = yaml.Marshal(&n) + c.Assert(err, ErrorMatches, "yaml: cannot encode node with unknown kind 0") + c.Assert(n.Decode(&v), ErrorMatches, "yaml: cannot decode node with unknown kind 0") +} + +func (s *S) TestNodeOmitEmpty(c *C) { + var v struct { + A int + B yaml.Node ",omitempty" + } + v.A = 1 + data, err := yaml.Marshal(&v) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, "a: 1\n") + + v.B.Line = 1 + _, err = yaml.Marshal(&v) + c.Assert(err, ErrorMatches, "yaml: cannot encode node with unknown kind 0") +} + +func fprintComments(out io.Writer, node *yaml.Node, indent string) { + switch node.Kind { + case yaml.ScalarNode: + fmt.Fprintf(out, "%s<%s> ", indent, node.Value) + fprintCommentSet(out, node) + fmt.Fprintf(out, "\n") + case yaml.DocumentNode: + fmt.Fprintf(out, "%s ", indent) + fprintCommentSet(out, node) + fmt.Fprintf(out, "\n") + for i := 0; i < len(node.Content); i++ { + fprintComments(out, node.Content[i], indent+" ") + } + case yaml.MappingNode: + fmt.Fprintf(out, "%s ", indent) + fprintCommentSet(out, node) + fmt.Fprintf(out, "\n") + for i := 0; i < len(node.Content); i += 2 { + fprintComments(out, node.Content[i], indent+" ") + fprintComments(out, node.Content[i+1], indent+" ") + } + case yaml.SequenceNode: + fmt.Fprintf(out, "%s ", indent) + fprintCommentSet(out, node) + fmt.Fprintf(out, "\n") + for i := 0; i < len(node.Content); i++ { + fprintComments(out, node.Content[i], indent+" ") + } + } +} + +func fprintCommentSet(out io.Writer, node *yaml.Node) { + if len(node.HeadComment)+len(node.LineComment)+len(node.FootComment) > 0 { + fmt.Fprintf(out, "%q / %q / %q", node.HeadComment, node.LineComment, node.FootComment) + } +} diff --git a/goyaml.v3/parserc.go b/goyaml.v3/parserc.go new file mode 100644 index 0000000..268558a --- /dev/null +++ b/goyaml.v3/parserc.go @@ -0,0 +1,1258 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "bytes" +) + +// The parser implements the following grammar: +// +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// implicit_document ::= block_node DOCUMENT-END* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// block_node_or_indentless_sequence ::= +// ALIAS +// | properties (block_content | indentless_block_sequence)? +// | block_content +// | indentless_block_sequence +// block_node ::= ALIAS +// | properties block_content? +// | block_content +// flow_node ::= ALIAS +// | properties flow_content? +// | flow_content +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// block_content ::= block_collection | flow_collection | SCALAR +// flow_content ::= flow_collection | SCALAR +// block_collection ::= block_sequence | block_mapping +// flow_collection ::= flow_sequence | flow_mapping +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// block_mapping ::= BLOCK-MAPPING_START +// ((KEY block_node_or_indentless_sequence?)? +// (VALUE block_node_or_indentless_sequence?)?)* +// BLOCK-END +// flow_sequence ::= FLOW-SEQUENCE-START +// (flow_sequence_entry FLOW-ENTRY)* +// flow_sequence_entry? +// FLOW-SEQUENCE-END +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// flow_mapping ::= FLOW-MAPPING-START +// (flow_mapping_entry FLOW-ENTRY)* +// flow_mapping_entry? +// FLOW-MAPPING-END +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + +// Peek the next token in the token queue. +func peek_token(parser *yaml_parser_t) *yaml_token_t { + if parser.token_available || yaml_parser_fetch_more_tokens(parser) { + token := &parser.tokens[parser.tokens_head] + yaml_parser_unfold_comments(parser, token) + return token + } + return nil +} + +// yaml_parser_unfold_comments walks through the comments queue and joins all +// comments behind the position of the provided token into the respective +// top-level comment slices in the parser. +func yaml_parser_unfold_comments(parser *yaml_parser_t, token *yaml_token_t) { + for parser.comments_head < len(parser.comments) && token.start_mark.index >= parser.comments[parser.comments_head].token_mark.index { + comment := &parser.comments[parser.comments_head] + if len(comment.head) > 0 { + if token.typ == yaml_BLOCK_END_TOKEN { + // No heads on ends, so keep comment.head for a follow up token. + break + } + if len(parser.head_comment) > 0 { + parser.head_comment = append(parser.head_comment, '\n') + } + parser.head_comment = append(parser.head_comment, comment.head...) + } + if len(comment.foot) > 0 { + if len(parser.foot_comment) > 0 { + parser.foot_comment = append(parser.foot_comment, '\n') + } + parser.foot_comment = append(parser.foot_comment, comment.foot...) + } + if len(comment.line) > 0 { + if len(parser.line_comment) > 0 { + parser.line_comment = append(parser.line_comment, '\n') + } + parser.line_comment = append(parser.line_comment, comment.line...) + } + *comment = yaml_comment_t{} + parser.comments_head++ + } +} + +// Remove the next token from the queue (must be called after peek_token). +func skip_token(parser *yaml_parser_t) { + parser.token_available = false + parser.tokens_parsed++ + parser.stream_end_produced = parser.tokens[parser.tokens_head].typ == yaml_STREAM_END_TOKEN + parser.tokens_head++ +} + +// Get the next event. +func yaml_parser_parse(parser *yaml_parser_t, event *yaml_event_t) bool { + // Erase the event object. + *event = yaml_event_t{} + + // No events after the end of the stream or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR || parser.state == yaml_PARSE_END_STATE { + return true + } + + // Generate the next event. + return yaml_parser_state_machine(parser, event) +} + +// Set parser error. +func yaml_parser_set_parser_error(parser *yaml_parser_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +func yaml_parser_set_parser_error_context(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +// State dispatcher. +func yaml_parser_state_machine(parser *yaml_parser_t, event *yaml_event_t) bool { + //trace("yaml_parser_state_machine", "state:", parser.state.String()) + + switch parser.state { + case yaml_PARSE_STREAM_START_STATE: + return yaml_parser_parse_stream_start(parser, event) + + case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, true) + + case yaml_PARSE_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, false) + + case yaml_PARSE_DOCUMENT_CONTENT_STATE: + return yaml_parser_parse_document_content(parser, event) + + case yaml_PARSE_DOCUMENT_END_STATE: + return yaml_parser_parse_document_end(parser, event) + + case yaml_PARSE_BLOCK_NODE_STATE: + return yaml_parser_parse_node(parser, event, true, false) + + case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return yaml_parser_parse_node(parser, event, true, true) + + case yaml_PARSE_FLOW_NODE_STATE: + return yaml_parser_parse_node(parser, event, false, false) + + case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, true) + + case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, false) + + case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_indentless_sequence_entry(parser, event) + + case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, true) + + case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, false) + + case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: + return yaml_parser_parse_block_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, true) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, false) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_key(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_end(parser, event) + + case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, true) + + case yaml_PARSE_FLOW_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, true) + + default: + panic("invalid parser state") + } +} + +// Parse the production: +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// ************ +func yaml_parser_parse_stream_start(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_STREAM_START_TOKEN { + return yaml_parser_set_parser_error(parser, "did not find expected ", token.start_mark) + } + parser.state = yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + encoding: token.encoding, + } + skip_token(parser) + return true +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// * +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// ************************* +func yaml_parser_parse_document_start(parser *yaml_parser_t, event *yaml_event_t, implicit bool) bool { + + token := peek_token(parser) + if token == nil { + return false + } + + // Parse extra document end indicators. + if !implicit { + for token.typ == yaml_DOCUMENT_END_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + if implicit && token.typ != yaml_VERSION_DIRECTIVE_TOKEN && + token.typ != yaml_TAG_DIRECTIVE_TOKEN && + token.typ != yaml_DOCUMENT_START_TOKEN && + token.typ != yaml_STREAM_END_TOKEN { + // Parse an implicit document. + if !yaml_parser_process_directives(parser, nil, nil) { + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_BLOCK_NODE_STATE + + var head_comment []byte + if len(parser.head_comment) > 0 { + // [Go] Scan the header comment backwards, and if an empty line is found, break + // the header so the part before the last empty line goes into the + // document header, while the bottom of it goes into a follow up event. + for i := len(parser.head_comment) - 1; i > 0; i-- { + if parser.head_comment[i] == '\n' { + if i == len(parser.head_comment)-1 { + head_comment = parser.head_comment[:i] + parser.head_comment = parser.head_comment[i+1:] + break + } else if parser.head_comment[i-1] == '\n' { + head_comment = parser.head_comment[:i-1] + parser.head_comment = parser.head_comment[i+1:] + break + } + } + } + } + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + + head_comment: head_comment, + } + + } else if token.typ != yaml_STREAM_END_TOKEN { + // Parse an explicit document. + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + start_mark := token.start_mark + if !yaml_parser_process_directives(parser, &version_directive, &tag_directives) { + return false + } + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_DOCUMENT_START_TOKEN { + yaml_parser_set_parser_error(parser, + "did not find expected ", token.start_mark) + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_DOCUMENT_CONTENT_STATE + end_mark := token.end_mark + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + version_directive: version_directive, + tag_directives: tag_directives, + implicit: false, + } + skip_token(parser) + + } else { + // Parse the stream end. + parser.state = yaml_PARSE_END_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + } + + return true +} + +// Parse the productions: +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// *********** +// +func yaml_parser_parse_document_content(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN || + token.typ == yaml_TAG_DIRECTIVE_TOKEN || + token.typ == yaml_DOCUMENT_START_TOKEN || + token.typ == yaml_DOCUMENT_END_TOKEN || + token.typ == yaml_STREAM_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + return yaml_parser_process_empty_scalar(parser, event, + token.start_mark) + } + return yaml_parser_parse_node(parser, event, true, false) +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// ************* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// +func yaml_parser_parse_document_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + start_mark := token.start_mark + end_mark := token.start_mark + + implicit := true + if token.typ == yaml_DOCUMENT_END_TOKEN { + end_mark = token.end_mark + skip_token(parser) + implicit = false + } + + parser.tag_directives = parser.tag_directives[:0] + + parser.state = yaml_PARSE_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_DOCUMENT_END_EVENT, + start_mark: start_mark, + end_mark: end_mark, + implicit: implicit, + } + yaml_parser_set_event_comments(parser, event) + if len(event.head_comment) > 0 && len(event.foot_comment) == 0 { + event.foot_comment = event.head_comment + event.head_comment = nil + } + return true +} + +func yaml_parser_set_event_comments(parser *yaml_parser_t, event *yaml_event_t) { + event.head_comment = parser.head_comment + event.line_comment = parser.line_comment + event.foot_comment = parser.foot_comment + parser.head_comment = nil + parser.line_comment = nil + parser.foot_comment = nil + parser.tail_comment = nil + parser.stem_comment = nil +} + +// Parse the productions: +// block_node_or_indentless_sequence ::= +// ALIAS +// ***** +// | properties (block_content | indentless_block_sequence)? +// ********** * +// | block_content | indentless_block_sequence +// * +// block_node ::= ALIAS +// ***** +// | properties block_content? +// ********** * +// | block_content +// * +// flow_node ::= ALIAS +// ***** +// | properties flow_content? +// ********** * +// | flow_content +// * +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// ************************* +// block_content ::= block_collection | flow_collection | SCALAR +// ****** +// flow_content ::= flow_collection | SCALAR +// ****** +func yaml_parser_parse_node(parser *yaml_parser_t, event *yaml_event_t, block, indentless_sequence bool) bool { + //defer trace("yaml_parser_parse_node", "block:", block, "indentless_sequence:", indentless_sequence)() + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_ALIAS_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + *event = yaml_event_t{ + typ: yaml_ALIAS_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + anchor: token.value, + } + yaml_parser_set_event_comments(parser, event) + skip_token(parser) + return true + } + + start_mark := token.start_mark + end_mark := token.start_mark + + var tag_token bool + var tag_handle, tag_suffix, anchor []byte + var tag_mark yaml_mark_t + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.value + start_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.value + tag_suffix = token.suffix + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } else if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.value + tag_suffix = token.suffix + start_mark = token.start_mark + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.value + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + var tag []byte + if tag_token { + if len(tag_handle) == 0 { + tag = tag_suffix + tag_suffix = nil + } else { + for i := range parser.tag_directives { + if bytes.Equal(parser.tag_directives[i].handle, tag_handle) { + tag = append([]byte(nil), parser.tag_directives[i].prefix...) + tag = append(tag, tag_suffix...) + break + } + } + if len(tag) == 0 { + yaml_parser_set_parser_error_context(parser, + "while parsing a node", start_mark, + "found undefined tag handle", tag_mark) + return false + } + } + } + + implicit := len(tag) == 0 + if indentless_sequence && token.typ == yaml_BLOCK_ENTRY_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), + } + return true + } + if token.typ == yaml_SCALAR_TOKEN { + var plain_implicit, quoted_implicit bool + end_mark = token.end_mark + if (len(tag) == 0 && token.style == yaml_PLAIN_SCALAR_STYLE) || (len(tag) == 1 && tag[0] == '!') { + plain_implicit = true + } else if len(tag) == 0 { + quoted_implicit = true + } + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + value: token.value, + implicit: plain_implicit, + quoted_implicit: quoted_implicit, + style: yaml_style_t(token.style), + } + yaml_parser_set_event_comments(parser, event) + skip_token(parser) + return true + } + if token.typ == yaml_FLOW_SEQUENCE_START_TOKEN { + // [Go] Some of the events below can be merged as they differ only on style. + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_FLOW_SEQUENCE_STYLE), + } + yaml_parser_set_event_comments(parser, event) + return true + } + if token.typ == yaml_FLOW_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), + } + yaml_parser_set_event_comments(parser, event) + return true + } + if block && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), + } + if parser.stem_comment != nil { + event.head_comment = parser.stem_comment + parser.stem_comment = nil + } + return true + } + if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + style: yaml_style_t(yaml_BLOCK_MAPPING_STYLE), + } + if parser.stem_comment != nil { + event.head_comment = parser.stem_comment + parser.stem_comment = nil + } + return true + } + if len(anchor) > 0 || len(tag) > 0 { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + anchor: anchor, + tag: tag, + implicit: implicit, + quoted_implicit: false, + style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), + } + return true + } + + context := "while parsing a flow node" + if block { + context = "while parsing a block node" + } + yaml_parser_set_parser_error_context(parser, context, start_mark, + "did not find expected node content", token.start_mark) + return false +} + +// Parse the productions: +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// ******************** *********** * ********* +// +func yaml_parser_parse_block_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + if token == nil { + return false + } + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + prior_head_len := len(parser.head_comment) + skip_token(parser) + yaml_parser_split_stem_comment(parser, prior_head_len) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } else { + parser.state = yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } + if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block collection", context_mark, + "did not find expected '-' indicator", token.start_mark) +} + +// Parse the productions: +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// *********** * +func yaml_parser_parse_indentless_sequence_entry(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + prior_head_len := len(parser.head_comment) + skip_token(parser) + yaml_parser_split_stem_comment(parser, prior_head_len) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && + token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be token.end_mark? + } + return true +} + +// Split stem comment from head comment. +// +// When a sequence or map is found under a sequence entry, the former head comment +// is assigned to the underlying sequence or map as a whole, not the individual +// sequence or map entry as would be expected otherwise. To handle this case the +// previous head comment is moved aside as the stem comment. +func yaml_parser_split_stem_comment(parser *yaml_parser_t, stem_len int) { + if stem_len == 0 { + return + } + + token := peek_token(parser) + if token == nil || token.typ != yaml_BLOCK_SEQUENCE_START_TOKEN && token.typ != yaml_BLOCK_MAPPING_START_TOKEN { + return + } + + parser.stem_comment = parser.head_comment[:stem_len] + if len(parser.head_comment) == stem_len { + parser.head_comment = nil + } else { + // Copy suffix to prevent very strange bugs if someone ever appends + // further bytes to the prefix in the stem_comment slice above. + parser.head_comment = append([]byte(nil), parser.head_comment[stem_len+1:]...) + } +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// ******************* +// ((KEY block_node_or_indentless_sequence?)? +// *** * +// (VALUE block_node_or_indentless_sequence?)?)* +// +// BLOCK-END +// ********* +// +func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + if token == nil { + return false + } + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + // [Go] A tail comment was left from the prior mapping value processed. Emit an event + // as it needs to be processed with that value and not the following key. + if len(parser.tail_comment) > 0 { + *event = yaml_event_t{ + typ: yaml_TAIL_COMMENT_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + foot_comment: parser.tail_comment, + } + parser.tail_comment = nil + return true + } + + if token.typ == yaml_KEY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } else { + parser.state = yaml_PARSE_BLOCK_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } else if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + yaml_parser_set_event_comments(parser, event) + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block mapping", context_mark, + "did not find expected key", token.start_mark) +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// +// ((KEY block_node_or_indentless_sequence?)? +// +// (VALUE block_node_or_indentless_sequence?)?)* +// ***** * +// BLOCK-END +// +// +func yaml_parser_parse_block_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Parse the productions: +// flow_sequence ::= FLOW-SEQUENCE-START +// ******************* +// (flow_sequence_entry FLOW-ENTRY)* +// * ********** +// flow_sequence_entry? +// * +// FLOW-SEQUENCE-END +// ***************** +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + if token == nil { + return false + } + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow sequence", context_mark, + "did not find expected ',' or ']'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + implicit: true, + style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), + } + skip_token(parser) + return true + } else if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + yaml_parser_set_event_comments(parser, event) + + skip_token(parser) + return true +} + +// +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// *** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_key(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + mark := token.end_mark + skip_token(parser) + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// ***** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry_mapping_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be end_mark? + } + return true +} + +// Parse the productions: +// flow_mapping ::= FLOW-MAPPING-START +// ****************** +// (flow_mapping_entry FLOW-ENTRY)* +// * ********** +// flow_mapping_entry? +// ****************** +// FLOW-MAPPING-END +// **************** +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * *** * +// +func yaml_parser_parse_flow_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow mapping", context_mark, + "did not find expected ',' or '}'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } else { + parser.state = yaml_PARSE_FLOW_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + } else if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + yaml_parser_set_event_comments(parser, event) + skip_token(parser) + return true +} + +// Parse the productions: +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * ***** * +// +func yaml_parser_parse_flow_mapping_value(parser *yaml_parser_t, event *yaml_event_t, empty bool) bool { + token := peek_token(parser) + if token == nil { + return false + } + if empty { + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Generate an empty scalar event. +func yaml_parser_process_empty_scalar(parser *yaml_parser_t, event *yaml_event_t, mark yaml_mark_t) bool { + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: mark, + end_mark: mark, + value: nil, // Empty + implicit: true, + style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), + } + return true +} + +var default_tag_directives = []yaml_tag_directive_t{ + {[]byte("!"), []byte("!")}, + {[]byte("!!"), []byte("tag:yaml.org,2002:")}, +} + +// Parse directives. +func yaml_parser_process_directives(parser *yaml_parser_t, + version_directive_ref **yaml_version_directive_t, + tag_directives_ref *[]yaml_tag_directive_t) bool { + + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + + token := peek_token(parser) + if token == nil { + return false + } + + for token.typ == yaml_VERSION_DIRECTIVE_TOKEN || token.typ == yaml_TAG_DIRECTIVE_TOKEN { + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN { + if version_directive != nil { + yaml_parser_set_parser_error(parser, + "found duplicate %YAML directive", token.start_mark) + return false + } + if token.major != 1 || token.minor != 1 { + yaml_parser_set_parser_error(parser, + "found incompatible YAML document", token.start_mark) + return false + } + version_directive = &yaml_version_directive_t{ + major: token.major, + minor: token.minor, + } + } else if token.typ == yaml_TAG_DIRECTIVE_TOKEN { + value := yaml_tag_directive_t{ + handle: token.value, + prefix: token.prefix, + } + if !yaml_parser_append_tag_directive(parser, value, false, token.start_mark) { + return false + } + tag_directives = append(tag_directives, value) + } + + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + + for i := range default_tag_directives { + if !yaml_parser_append_tag_directive(parser, default_tag_directives[i], true, token.start_mark) { + return false + } + } + + if version_directive_ref != nil { + *version_directive_ref = version_directive + } + if tag_directives_ref != nil { + *tag_directives_ref = tag_directives + } + return true +} + +// Append a tag directive to the directives stack. +func yaml_parser_append_tag_directive(parser *yaml_parser_t, value yaml_tag_directive_t, allow_duplicates bool, mark yaml_mark_t) bool { + for i := range parser.tag_directives { + if bytes.Equal(value.handle, parser.tag_directives[i].handle) { + if allow_duplicates { + return true + } + return yaml_parser_set_parser_error(parser, "found duplicate %TAG directive", mark) + } + } + + // [Go] I suspect the copy is unnecessary. This was likely done + // because there was no way to track ownership of the data. + value_copy := yaml_tag_directive_t{ + handle: make([]byte, len(value.handle)), + prefix: make([]byte, len(value.prefix)), + } + copy(value_copy.handle, value.handle) + copy(value_copy.prefix, value.prefix) + parser.tag_directives = append(parser.tag_directives, value_copy) + return true +} diff --git a/goyaml.v3/readerc.go b/goyaml.v3/readerc.go new file mode 100644 index 0000000..b7de0a8 --- /dev/null +++ b/goyaml.v3/readerc.go @@ -0,0 +1,434 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "io" +) + +// Set the reader error and return 0. +func yaml_parser_set_reader_error(parser *yaml_parser_t, problem string, offset int, value int) bool { + parser.error = yaml_READER_ERROR + parser.problem = problem + parser.problem_offset = offset + parser.problem_value = value + return false +} + +// Byte order marks. +const ( + bom_UTF8 = "\xef\xbb\xbf" + bom_UTF16LE = "\xff\xfe" + bom_UTF16BE = "\xfe\xff" +) + +// Determine the input stream encoding by checking the BOM symbol. If no BOM is +// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. +func yaml_parser_determine_encoding(parser *yaml_parser_t) bool { + // Ensure that we had enough bytes in the raw buffer. + for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 { + if !yaml_parser_update_raw_buffer(parser) { + return false + } + } + + // Determine the encoding. + buf := parser.raw_buffer + pos := parser.raw_buffer_pos + avail := len(buf) - pos + if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] { + parser.encoding = yaml_UTF16LE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] { + parser.encoding = yaml_UTF16BE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] { + parser.encoding = yaml_UTF8_ENCODING + parser.raw_buffer_pos += 3 + parser.offset += 3 + } else { + parser.encoding = yaml_UTF8_ENCODING + } + return true +} + +// Update the raw buffer. +func yaml_parser_update_raw_buffer(parser *yaml_parser_t) bool { + size_read := 0 + + // Return if the raw buffer is full. + if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) { + return true + } + + // Return on EOF. + if parser.eof { + return true + } + + // Move the remaining bytes in the raw buffer to the beginning. + if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) { + copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:]) + } + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos] + parser.raw_buffer_pos = 0 + + // Call the read handler to fill the buffer. + size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)]) + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read] + if err == io.EOF { + parser.eof = true + } else if err != nil { + return yaml_parser_set_reader_error(parser, "input error: "+err.Error(), parser.offset, -1) + } + return true +} + +// Ensure that the buffer contains at least `length` characters. +// Return true on success, false on failure. +// +// The length is supposed to be significantly less that the buffer size. +func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { + if parser.read_handler == nil { + panic("read handler must be set") + } + + // [Go] This function was changed to guarantee the requested length size at EOF. + // The fact we need to do this is pretty awful, but the description above implies + // for that to be the case, and there are tests + + // If the EOF flag is set and the raw buffer is empty, do nothing. + if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) { + // [Go] ACTUALLY! Read the documentation of this function above. + // This is just broken. To return true, we need to have the + // given length in the buffer. Not doing that means every single + // check that calls this function to make sure the buffer has a + // given length is Go) panicking; or C) accessing invalid memory. + //return true + } + + // Return if the buffer contains enough characters. + if parser.unread >= length { + return true + } + + // Determine the input encoding if it is not known yet. + if parser.encoding == yaml_ANY_ENCODING { + if !yaml_parser_determine_encoding(parser) { + return false + } + } + + // Move the unread characters to the beginning of the buffer. + buffer_len := len(parser.buffer) + if parser.buffer_pos > 0 && parser.buffer_pos < buffer_len { + copy(parser.buffer, parser.buffer[parser.buffer_pos:]) + buffer_len -= parser.buffer_pos + parser.buffer_pos = 0 + } else if parser.buffer_pos == buffer_len { + buffer_len = 0 + parser.buffer_pos = 0 + } + + // Open the whole buffer for writing, and cut it before returning. + parser.buffer = parser.buffer[:cap(parser.buffer)] + + // Fill the buffer until it has enough characters. + first := true + for parser.unread < length { + + // Fill the raw buffer if necessary. + if !first || parser.raw_buffer_pos == len(parser.raw_buffer) { + if !yaml_parser_update_raw_buffer(parser) { + parser.buffer = parser.buffer[:buffer_len] + return false + } + } + first = false + + // Decode the raw buffer. + inner: + for parser.raw_buffer_pos != len(parser.raw_buffer) { + var value rune + var width int + + raw_unread := len(parser.raw_buffer) - parser.raw_buffer_pos + + // Decode the next character. + switch parser.encoding { + case yaml_UTF8_ENCODING: + // Decode a UTF-8 character. Check RFC 3629 + // (http://www.ietf.org/rfc/rfc3629.txt) for more details. + // + // The following table (taken from the RFC) is used for + // decoding. + // + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // --------------------+------------------------------------ + // 0000 0000-0000 007F | 0xxxxxxx + // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Additionally, the characters in the range 0xD800-0xDFFF + // are prohibited as they are reserved for use with UTF-16 + // surrogate pairs. + + // Determine the length of the UTF-8 sequence. + octet := parser.raw_buffer[parser.raw_buffer_pos] + switch { + case octet&0x80 == 0x00: + width = 1 + case octet&0xE0 == 0xC0: + width = 2 + case octet&0xF0 == 0xE0: + width = 3 + case octet&0xF8 == 0xF0: + width = 4 + default: + // The leading octet is invalid. + return yaml_parser_set_reader_error(parser, + "invalid leading UTF-8 octet", + parser.offset, int(octet)) + } + + // Check if the raw buffer contains an incomplete character. + if width > raw_unread { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-8 octet sequence", + parser.offset, -1) + } + break inner + } + + // Decode the leading octet. + switch { + case octet&0x80 == 0x00: + value = rune(octet & 0x7F) + case octet&0xE0 == 0xC0: + value = rune(octet & 0x1F) + case octet&0xF0 == 0xE0: + value = rune(octet & 0x0F) + case octet&0xF8 == 0xF0: + value = rune(octet & 0x07) + default: + value = 0 + } + + // Check and decode the trailing octets. + for k := 1; k < width; k++ { + octet = parser.raw_buffer[parser.raw_buffer_pos+k] + + // Check if the octet is valid. + if (octet & 0xC0) != 0x80 { + return yaml_parser_set_reader_error(parser, + "invalid trailing UTF-8 octet", + parser.offset+k, int(octet)) + } + + // Decode the octet. + value = (value << 6) + rune(octet&0x3F) + } + + // Check the length of the sequence against the value. + switch { + case width == 1: + case width == 2 && value >= 0x80: + case width == 3 && value >= 0x800: + case width == 4 && value >= 0x10000: + default: + return yaml_parser_set_reader_error(parser, + "invalid length of a UTF-8 sequence", + parser.offset, -1) + } + + // Check the range of the value. + if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF { + return yaml_parser_set_reader_error(parser, + "invalid Unicode character", + parser.offset, int(value)) + } + + case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING: + var low, high int + if parser.encoding == yaml_UTF16LE_ENCODING { + low, high = 0, 1 + } else { + low, high = 1, 0 + } + + // The UTF-16 encoding is not as simple as one might + // naively think. Check RFC 2781 + // (http://www.ietf.org/rfc/rfc2781.txt). + // + // Normally, two subsequent bytes describe a Unicode + // character. However a special technique (called a + // surrogate pair) is used for specifying character + // values larger than 0xFFFF. + // + // A surrogate pair consists of two pseudo-characters: + // high surrogate area (0xD800-0xDBFF) + // low surrogate area (0xDC00-0xDFFF) + // + // The following formulas are used for decoding + // and encoding characters using surrogate pairs: + // + // U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) + // U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) + // W1 = 110110yyyyyyyyyy + // W2 = 110111xxxxxxxxxx + // + // where U is the character value, W1 is the high surrogate + // area, W2 is the low surrogate area. + + // Check for incomplete UTF-16 character. + if raw_unread < 2 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 character", + parser.offset, -1) + } + break inner + } + + // Get the character. + value = rune(parser.raw_buffer[parser.raw_buffer_pos+low]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high]) << 8) + + // Check for unexpected low surrogate area. + if value&0xFC00 == 0xDC00 { + return yaml_parser_set_reader_error(parser, + "unexpected low surrogate area", + parser.offset, int(value)) + } + + // Check for a high surrogate area. + if value&0xFC00 == 0xD800 { + width = 4 + + // Check for incomplete surrogate pair. + if raw_unread < 4 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 surrogate pair", + parser.offset, -1) + } + break inner + } + + // Get the next character. + value2 := rune(parser.raw_buffer[parser.raw_buffer_pos+low+2]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high+2]) << 8) + + // Check for a low surrogate area. + if value2&0xFC00 != 0xDC00 { + return yaml_parser_set_reader_error(parser, + "expected low surrogate area", + parser.offset+2, int(value2)) + } + + // Generate the value of the surrogate pair. + value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF) + } else { + width = 2 + } + + default: + panic("impossible") + } + + // Check if the character is in the allowed range: + // #x9 | #xA | #xD | [#x20-#x7E] (8 bit) + // | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) + // | [#x10000-#x10FFFF] (32 bit) + switch { + case value == 0x09: + case value == 0x0A: + case value == 0x0D: + case value >= 0x20 && value <= 0x7E: + case value == 0x85: + case value >= 0xA0 && value <= 0xD7FF: + case value >= 0xE000 && value <= 0xFFFD: + case value >= 0x10000 && value <= 0x10FFFF: + default: + return yaml_parser_set_reader_error(parser, + "control characters are not allowed", + parser.offset, int(value)) + } + + // Move the raw pointers. + parser.raw_buffer_pos += width + parser.offset += width + + // Finally put the character into the buffer. + if value <= 0x7F { + // 0000 0000-0000 007F . 0xxxxxxx + parser.buffer[buffer_len+0] = byte(value) + buffer_len += 1 + } else if value <= 0x7FF { + // 0000 0080-0000 07FF . 110xxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6)) + parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F)) + buffer_len += 2 + } else if value <= 0xFFFF { + // 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12)) + parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F)) + buffer_len += 3 + } else { + // 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18)) + parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F)) + parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F)) + buffer_len += 4 + } + + parser.unread++ + } + + // On EOF, put NUL into the buffer and return. + if parser.eof { + parser.buffer[buffer_len] = 0 + buffer_len++ + parser.unread++ + break + } + } + // [Go] Read the documentation of this function above. To return true, + // we need to have the given length in the buffer. Not doing that means + // every single check that calls this function to make sure the buffer + // has a given length is Go) panicking; or C) accessing invalid memory. + // This happens here due to the EOF above breaking early. + for buffer_len < length { + parser.buffer[buffer_len] = 0 + buffer_len++ + } + parser.buffer = parser.buffer[:buffer_len] + return true +} diff --git a/goyaml.v3/resolve.go b/goyaml.v3/resolve.go new file mode 100644 index 0000000..64ae888 --- /dev/null +++ b/goyaml.v3/resolve.go @@ -0,0 +1,326 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml + +import ( + "encoding/base64" + "math" + "regexp" + "strconv" + "strings" + "time" +) + +type resolveMapItem struct { + value interface{} + tag string +} + +var resolveTable = make([]byte, 256) +var resolveMap = make(map[string]resolveMapItem) + +func init() { + t := resolveTable + t[int('+')] = 'S' // Sign + t[int('-')] = 'S' + for _, c := range "0123456789" { + t[int(c)] = 'D' // Digit + } + for _, c := range "yYnNtTfFoO~" { + t[int(c)] = 'M' // In map + } + t[int('.')] = '.' // Float (potentially in map) + + var resolveMapList = []struct { + v interface{} + tag string + l []string + }{ + {true, boolTag, []string{"true", "True", "TRUE"}}, + {false, boolTag, []string{"false", "False", "FALSE"}}, + {nil, nullTag, []string{"", "~", "null", "Null", "NULL"}}, + {math.NaN(), floatTag, []string{".nan", ".NaN", ".NAN"}}, + {math.Inf(+1), floatTag, []string{".inf", ".Inf", ".INF"}}, + {math.Inf(+1), floatTag, []string{"+.inf", "+.Inf", "+.INF"}}, + {math.Inf(-1), floatTag, []string{"-.inf", "-.Inf", "-.INF"}}, + {"<<", mergeTag, []string{"<<"}}, + } + + m := resolveMap + for _, item := range resolveMapList { + for _, s := range item.l { + m[s] = resolveMapItem{item.v, item.tag} + } + } +} + +const ( + nullTag = "!!null" + boolTag = "!!bool" + strTag = "!!str" + intTag = "!!int" + floatTag = "!!float" + timestampTag = "!!timestamp" + seqTag = "!!seq" + mapTag = "!!map" + binaryTag = "!!binary" + mergeTag = "!!merge" +) + +var longTags = make(map[string]string) +var shortTags = make(map[string]string) + +func init() { + for _, stag := range []string{nullTag, boolTag, strTag, intTag, floatTag, timestampTag, seqTag, mapTag, binaryTag, mergeTag} { + ltag := longTag(stag) + longTags[stag] = ltag + shortTags[ltag] = stag + } +} + +const longTagPrefix = "tag:yaml.org,2002:" + +func shortTag(tag string) string { + if strings.HasPrefix(tag, longTagPrefix) { + if stag, ok := shortTags[tag]; ok { + return stag + } + return "!!" + tag[len(longTagPrefix):] + } + return tag +} + +func longTag(tag string) string { + if strings.HasPrefix(tag, "!!") { + if ltag, ok := longTags[tag]; ok { + return ltag + } + return longTagPrefix + tag[2:] + } + return tag +} + +func resolvableTag(tag string) bool { + switch tag { + case "", strTag, boolTag, intTag, floatTag, nullTag, timestampTag: + return true + } + return false +} + +var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`) + +func resolve(tag string, in string) (rtag string, out interface{}) { + tag = shortTag(tag) + if !resolvableTag(tag) { + return tag, in + } + + defer func() { + switch tag { + case "", rtag, strTag, binaryTag: + return + case floatTag: + if rtag == intTag { + switch v := out.(type) { + case int64: + rtag = floatTag + out = float64(v) + return + case int: + rtag = floatTag + out = float64(v) + return + } + } + } + failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag)) + }() + + // Any data is accepted as a !!str or !!binary. + // Otherwise, the prefix is enough of a hint about what it might be. + hint := byte('N') + if in != "" { + hint = resolveTable[in[0]] + } + if hint != 0 && tag != strTag && tag != binaryTag { + // Handle things we can lookup in a map. + if item, ok := resolveMap[in]; ok { + return item.tag, item.value + } + + // Base 60 floats are a bad idea, were dropped in YAML 1.2, and + // are purposefully unsupported here. They're still quoted on + // the way out for compatibility with other parser, though. + + switch hint { + case 'M': + // We've already checked the map above. + + case '.': + // Not in the map, so maybe a normal float. + floatv, err := strconv.ParseFloat(in, 64) + if err == nil { + return floatTag, floatv + } + + case 'D', 'S': + // Int, float, or timestamp. + // Only try values as a timestamp if the value is unquoted or there's an explicit + // !!timestamp tag. + if tag == "" || tag == timestampTag { + t, ok := parseTimestamp(in) + if ok { + return timestampTag, t + } + } + + plain := strings.Replace(in, "_", "", -1) + intv, err := strconv.ParseInt(plain, 0, 64) + if err == nil { + if intv == int64(int(intv)) { + return intTag, int(intv) + } else { + return intTag, intv + } + } + uintv, err := strconv.ParseUint(plain, 0, 64) + if err == nil { + return intTag, uintv + } + if yamlStyleFloat.MatchString(plain) { + floatv, err := strconv.ParseFloat(plain, 64) + if err == nil { + return floatTag, floatv + } + } + if strings.HasPrefix(plain, "0b") { + intv, err := strconv.ParseInt(plain[2:], 2, 64) + if err == nil { + if intv == int64(int(intv)) { + return intTag, int(intv) + } else { + return intTag, intv + } + } + uintv, err := strconv.ParseUint(plain[2:], 2, 64) + if err == nil { + return intTag, uintv + } + } else if strings.HasPrefix(plain, "-0b") { + intv, err := strconv.ParseInt("-"+plain[3:], 2, 64) + if err == nil { + if true || intv == int64(int(intv)) { + return intTag, int(intv) + } else { + return intTag, intv + } + } + } + // Octals as introduced in version 1.2 of the spec. + // Octals from the 1.1 spec, spelled as 0777, are still + // decoded by default in v3 as well for compatibility. + // May be dropped in v4 depending on how usage evolves. + if strings.HasPrefix(plain, "0o") { + intv, err := strconv.ParseInt(plain[2:], 8, 64) + if err == nil { + if intv == int64(int(intv)) { + return intTag, int(intv) + } else { + return intTag, intv + } + } + uintv, err := strconv.ParseUint(plain[2:], 8, 64) + if err == nil { + return intTag, uintv + } + } else if strings.HasPrefix(plain, "-0o") { + intv, err := strconv.ParseInt("-"+plain[3:], 8, 64) + if err == nil { + if true || intv == int64(int(intv)) { + return intTag, int(intv) + } else { + return intTag, intv + } + } + } + default: + panic("internal error: missing handler for resolver table: " + string(rune(hint)) + " (with " + in + ")") + } + } + return strTag, in +} + +// encodeBase64 encodes s as base64 that is broken up into multiple lines +// as appropriate for the resulting length. +func encodeBase64(s string) string { + const lineLen = 70 + encLen := base64.StdEncoding.EncodedLen(len(s)) + lines := encLen/lineLen + 1 + buf := make([]byte, encLen*2+lines) + in := buf[0:encLen] + out := buf[encLen:] + base64.StdEncoding.Encode(in, []byte(s)) + k := 0 + for i := 0; i < len(in); i += lineLen { + j := i + lineLen + if j > len(in) { + j = len(in) + } + k += copy(out[k:], in[i:j]) + if lines > 1 { + out[k] = '\n' + k++ + } + } + return string(out[:k]) +} + +// This is a subset of the formats allowed by the regular expression +// defined at http://yaml.org/type/timestamp.html. +var allowedTimestampFormats = []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + // Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5" + // from the set of examples. +} + +// parseTimestamp parses s as a timestamp string and +// returns the timestamp and reports whether it succeeded. +// Timestamp formats are defined at http://yaml.org/type/timestamp.html +func parseTimestamp(s string) (time.Time, bool) { + // TODO write code to check all the formats supported by + // http://yaml.org/type/timestamp.html instead of using time.Parse. + + // Quick check: all date formats start with YYYY-. + i := 0 + for ; i < len(s); i++ { + if c := s[i]; c < '0' || c > '9' { + break + } + } + if i != 4 || i == len(s) || s[i] != '-' { + return time.Time{}, false + } + for _, format := range allowedTimestampFormats { + if t, err := time.Parse(format, s); err == nil { + return t, true + } + } + return time.Time{}, false +} diff --git a/goyaml.v3/scannerc.go b/goyaml.v3/scannerc.go new file mode 100644 index 0000000..ca00701 --- /dev/null +++ b/goyaml.v3/scannerc.go @@ -0,0 +1,3038 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "bytes" + "fmt" +) + +// Introduction +// ************ +// +// The following notes assume that you are familiar with the YAML specification +// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in +// some cases we are less restrictive that it requires. +// +// The process of transforming a YAML stream into a sequence of events is +// divided on two steps: Scanning and Parsing. +// +// The Scanner transforms the input stream into a sequence of tokens, while the +// parser transform the sequence of tokens produced by the Scanner into a +// sequence of parsing events. +// +// The Scanner is rather clever and complicated. The Parser, on the contrary, +// is a straightforward implementation of a recursive-descendant parser (or, +// LL(1) parser, as it is usually called). +// +// Actually there are two issues of Scanning that might be called "clever", the +// rest is quite straightforward. The issues are "block collection start" and +// "simple keys". Both issues are explained below in details. +// +// Here the Scanning step is explained and implemented. We start with the list +// of all the tokens produced by the Scanner together with short descriptions. +// +// Now, tokens: +// +// STREAM-START(encoding) # The stream start. +// STREAM-END # The stream end. +// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. +// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. +// DOCUMENT-START # '---' +// DOCUMENT-END # '...' +// BLOCK-SEQUENCE-START # Indentation increase denoting a block +// BLOCK-MAPPING-START # sequence or a block mapping. +// BLOCK-END # Indentation decrease. +// FLOW-SEQUENCE-START # '[' +// FLOW-SEQUENCE-END # ']' +// BLOCK-SEQUENCE-START # '{' +// BLOCK-SEQUENCE-END # '}' +// BLOCK-ENTRY # '-' +// FLOW-ENTRY # ',' +// KEY # '?' or nothing (simple keys). +// VALUE # ':' +// ALIAS(anchor) # '*anchor' +// ANCHOR(anchor) # '&anchor' +// TAG(handle,suffix) # '!handle!suffix' +// SCALAR(value,style) # A scalar. +// +// The following two tokens are "virtual" tokens denoting the beginning and the +// end of the stream: +// +// STREAM-START(encoding) +// STREAM-END +// +// We pass the information about the input stream encoding with the +// STREAM-START token. +// +// The next two tokens are responsible for tags: +// +// VERSION-DIRECTIVE(major,minor) +// TAG-DIRECTIVE(handle,prefix) +// +// Example: +// +// %YAML 1.1 +// %TAG ! !foo +// %TAG !yaml! tag:yaml.org,2002: +// --- +// +// The correspoding sequence of tokens: +// +// STREAM-START(utf-8) +// VERSION-DIRECTIVE(1,1) +// TAG-DIRECTIVE("!","!foo") +// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") +// DOCUMENT-START +// STREAM-END +// +// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole +// line. +// +// The document start and end indicators are represented by: +// +// DOCUMENT-START +// DOCUMENT-END +// +// Note that if a YAML stream contains an implicit document (without '---' +// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be +// produced. +// +// In the following examples, we present whole documents together with the +// produced tokens. +// +// 1. An implicit document: +// +// 'a scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// STREAM-END +// +// 2. An explicit document: +// +// --- +// 'a scalar' +// ... +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// SCALAR("a scalar",single-quoted) +// DOCUMENT-END +// STREAM-END +// +// 3. Several documents in a stream: +// +// 'a scalar' +// --- +// 'another scalar' +// --- +// 'yet another scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// DOCUMENT-START +// SCALAR("another scalar",single-quoted) +// DOCUMENT-START +// SCALAR("yet another scalar",single-quoted) +// STREAM-END +// +// We have already introduced the SCALAR token above. The following tokens are +// used to describe aliases, anchors, tag, and scalars: +// +// ALIAS(anchor) +// ANCHOR(anchor) +// TAG(handle,suffix) +// SCALAR(value,style) +// +// The following series of examples illustrate the usage of these tokens: +// +// 1. A recursive sequence: +// +// &A [ *A ] +// +// Tokens: +// +// STREAM-START(utf-8) +// ANCHOR("A") +// FLOW-SEQUENCE-START +// ALIAS("A") +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A tagged scalar: +// +// !!float "3.14" # A good approximation. +// +// Tokens: +// +// STREAM-START(utf-8) +// TAG("!!","float") +// SCALAR("3.14",double-quoted) +// STREAM-END +// +// 3. Various scalar styles: +// +// --- # Implicit empty plain scalars do not produce tokens. +// --- a plain scalar +// --- 'a single-quoted scalar' +// --- "a double-quoted scalar" +// --- |- +// a literal scalar +// --- >- +// a folded +// scalar +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// DOCUMENT-START +// SCALAR("a plain scalar",plain) +// DOCUMENT-START +// SCALAR("a single-quoted scalar",single-quoted) +// DOCUMENT-START +// SCALAR("a double-quoted scalar",double-quoted) +// DOCUMENT-START +// SCALAR("a literal scalar",literal) +// DOCUMENT-START +// SCALAR("a folded scalar",folded) +// STREAM-END +// +// Now it's time to review collection-related tokens. We will start with +// flow collections: +// +// FLOW-SEQUENCE-START +// FLOW-SEQUENCE-END +// FLOW-MAPPING-START +// FLOW-MAPPING-END +// FLOW-ENTRY +// KEY +// VALUE +// +// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and +// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' +// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the +// indicators '?' and ':', which are used for denoting mapping keys and values, +// are represented by the KEY and VALUE tokens. +// +// The following examples show flow collections: +// +// 1. A flow sequence: +// +// [item 1, item 2, item 3] +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-SEQUENCE-START +// SCALAR("item 1",plain) +// FLOW-ENTRY +// SCALAR("item 2",plain) +// FLOW-ENTRY +// SCALAR("item 3",plain) +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A flow mapping: +// +// { +// a simple key: a value, # Note that the KEY token is produced. +// ? a complex key: another value, +// } +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// FLOW-ENTRY +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// FLOW-ENTRY +// FLOW-MAPPING-END +// STREAM-END +// +// A simple key is a key which is not denoted by the '?' indicator. Note that +// the Scanner still produce the KEY token whenever it encounters a simple key. +// +// For scanning block collections, the following tokens are used (note that we +// repeat KEY and VALUE here): +// +// BLOCK-SEQUENCE-START +// BLOCK-MAPPING-START +// BLOCK-END +// BLOCK-ENTRY +// KEY +// VALUE +// +// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation +// increase that precedes a block collection (cf. the INDENT token in Python). +// The token BLOCK-END denote indentation decrease that ends a block collection +// (cf. the DEDENT token in Python). However YAML has some syntax pecularities +// that makes detections of these tokens more complex. +// +// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators +// '-', '?', and ':' correspondingly. +// +// The following examples show how the tokens BLOCK-SEQUENCE-START, +// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: +// +// 1. Block sequences: +// +// - item 1 +// - item 2 +// - +// - item 3.1 +// - item 3.2 +// - +// key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 3.1",plain) +// BLOCK-ENTRY +// SCALAR("item 3.2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Block mappings: +// +// a simple key: a value # The KEY token is produced here. +// ? a complex key +// : another value +// a mapping: +// key 1: value 1 +// key 2: value 2 +// a sequence: +// - item 1 +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// KEY +// SCALAR("a mapping",plain) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML does not always require to start a new block collection from a new +// line. If the current line contains only '-', '?', and ':' indicators, a new +// block collection may start at the current line. The following examples +// illustrate this case: +// +// 1. Collections in a sequence: +// +// - - item 1 +// - item 2 +// - key 1: value 1 +// key 2: value 2 +// - ? complex key +// : complex value +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("complex key") +// VALUE +// SCALAR("complex value") +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Collections in a mapping: +// +// ? a sequence +// : - item 1 +// - item 2 +// ? a mapping +// : key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// KEY +// SCALAR("a mapping",plain) +// VALUE +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML also permits non-indented sequences if they are included into a block +// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: +// +// key: +// - item 1 # BLOCK-SEQUENCE-START is NOT produced here. +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key",plain) +// VALUE +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// + +// Ensure that the buffer contains the required number of characters. +// Return true on success, false on failure (reader error or memory error). +func cache(parser *yaml_parser_t, length int) bool { + // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) + return parser.unread >= length || yaml_parser_update_buffer(parser, length) +} + +// Advance the buffer pointer. +func skip(parser *yaml_parser_t) { + if !is_blank(parser.buffer, parser.buffer_pos) { + parser.newlines = 0 + } + parser.mark.index++ + parser.mark.column++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) +} + +func skip_line(parser *yaml_parser_t) { + if is_crlf(parser.buffer, parser.buffer_pos) { + parser.mark.index += 2 + parser.mark.column = 0 + parser.mark.line++ + parser.unread -= 2 + parser.buffer_pos += 2 + parser.newlines++ + } else if is_break(parser.buffer, parser.buffer_pos) { + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) + parser.newlines++ + } +} + +// Copy a character to a string buffer and advance pointers. +func read(parser *yaml_parser_t, s []byte) []byte { + if !is_blank(parser.buffer, parser.buffer_pos) { + parser.newlines = 0 + } + w := width(parser.buffer[parser.buffer_pos]) + if w == 0 { + panic("invalid character sequence") + } + if len(s) == 0 { + s = make([]byte, 0, 32) + } + if w == 1 && len(s)+w <= cap(s) { + s = s[:len(s)+1] + s[len(s)-1] = parser.buffer[parser.buffer_pos] + parser.buffer_pos++ + } else { + s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) + parser.buffer_pos += w + } + parser.mark.index++ + parser.mark.column++ + parser.unread-- + return s +} + +// Copy a line break character to a string buffer and advance pointers. +func read_line(parser *yaml_parser_t, s []byte) []byte { + buf := parser.buffer + pos := parser.buffer_pos + switch { + case buf[pos] == '\r' && buf[pos+1] == '\n': + // CR LF . LF + s = append(s, '\n') + parser.buffer_pos += 2 + parser.mark.index++ + parser.unread-- + case buf[pos] == '\r' || buf[pos] == '\n': + // CR|LF . LF + s = append(s, '\n') + parser.buffer_pos += 1 + case buf[pos] == '\xC2' && buf[pos+1] == '\x85': + // NEL . LF + s = append(s, '\n') + parser.buffer_pos += 2 + case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): + // LS|PS . LS|PS + s = append(s, buf[parser.buffer_pos:pos+3]...) + parser.buffer_pos += 3 + default: + return s + } + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + parser.newlines++ + return s +} + +// Get the next token. +func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { + // Erase the token object. + *token = yaml_token_t{} // [Go] Is this necessary? + + // No tokens after STREAM-END or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR { + return true + } + + // Ensure that the tokens queue contains enough tokens. + if !parser.token_available { + if !yaml_parser_fetch_more_tokens(parser) { + return false + } + } + + // Fetch the next token from the queue. + *token = parser.tokens[parser.tokens_head] + parser.tokens_head++ + parser.tokens_parsed++ + parser.token_available = false + + if token.typ == yaml_STREAM_END_TOKEN { + parser.stream_end_produced = true + } + return true +} + +// Set the scanner error and return false. +func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { + parser.error = yaml_SCANNER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = parser.mark + return false +} + +func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { + context := "while parsing a tag" + if directive { + context = "while parsing a %TAG directive" + } + return yaml_parser_set_scanner_error(parser, context, context_mark, problem) +} + +func trace(args ...interface{}) func() { + pargs := append([]interface{}{"+++"}, args...) + fmt.Println(pargs...) + pargs = append([]interface{}{"---"}, args...) + return func() { fmt.Println(pargs...) } +} + +// Ensure that the tokens queue contains at least one token which can be +// returned to the Parser. +func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { + // While we need more tokens to fetch, do it. + for { + // [Go] The comment parsing logic requires a lookahead of two tokens + // so that foot comments may be parsed in time of associating them + // with the tokens that are parsed before them, and also for line + // comments to be transformed into head comments in some edge cases. + if parser.tokens_head < len(parser.tokens)-2 { + // If a potential simple key is at the head position, we need to fetch + // the next token to disambiguate it. + head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] + if !ok { + break + } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { + return false + } else if !valid { + break + } + } + // Fetch the next token. + if !yaml_parser_fetch_next_token(parser) { + return false + } + } + + parser.token_available = true + return true +} + +// The dispatcher for token fetchers. +func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) { + // Ensure that the buffer is initialized. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check if we just started scanning. Fetch STREAM-START then. + if !parser.stream_start_produced { + return yaml_parser_fetch_stream_start(parser) + } + + scan_mark := parser.mark + + // Eat whitespaces and comments until we reach the next token. + if !yaml_parser_scan_to_next_token(parser) { + return false + } + + // [Go] While unrolling indents, transform the head comments of prior + // indentation levels observed after scan_start into foot comments at + // the respective indexes. + + // Check the indentation level against the current column. + if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) { + return false + } + + // Ensure that the buffer contains at least 4 characters. 4 is the length + // of the longest indicators ('--- ' and '... '). + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + + // Is it the end of the stream? + if is_z(parser.buffer, parser.buffer_pos) { + return yaml_parser_fetch_stream_end(parser) + } + + // Is it a directive? + if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { + return yaml_parser_fetch_directive(parser) + } + + buf := parser.buffer + pos := parser.buffer_pos + + // Is it the document start indicator? + if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) + } + + // Is it the document end indicator? + if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) + } + + comment_mark := parser.mark + if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') { + // Associate any following comments with the prior token. + comment_mark = parser.tokens[len(parser.tokens)-1].start_mark + } + defer func() { + if !ok { + return + } + if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN { + // Sequence indicators alone have no line comments. It becomes + // a head comment for whatever follows. + return + } + if !yaml_parser_scan_line_comment(parser, comment_mark) { + ok = false + return + } + }() + + // Is it the flow sequence start indicator? + if buf[pos] == '[' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) + } + + // Is it the flow mapping start indicator? + if parser.buffer[parser.buffer_pos] == '{' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) + } + + // Is it the flow sequence end indicator? + if parser.buffer[parser.buffer_pos] == ']' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_SEQUENCE_END_TOKEN) + } + + // Is it the flow mapping end indicator? + if parser.buffer[parser.buffer_pos] == '}' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_MAPPING_END_TOKEN) + } + + // Is it the flow entry indicator? + if parser.buffer[parser.buffer_pos] == ',' { + return yaml_parser_fetch_flow_entry(parser) + } + + // Is it the block entry indicator? + if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { + return yaml_parser_fetch_block_entry(parser) + } + + // Is it the key indicator? + if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_key(parser) + } + + // Is it the value indicator? + if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_value(parser) + } + + // Is it an alias? + if parser.buffer[parser.buffer_pos] == '*' { + return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) + } + + // Is it an anchor? + if parser.buffer[parser.buffer_pos] == '&' { + return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) + } + + // Is it a tag? + if parser.buffer[parser.buffer_pos] == '!' { + return yaml_parser_fetch_tag(parser) + } + + // Is it a literal scalar? + if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, true) + } + + // Is it a folded scalar? + if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, false) + } + + // Is it a single-quoted scalar? + if parser.buffer[parser.buffer_pos] == '\'' { + return yaml_parser_fetch_flow_scalar(parser, true) + } + + // Is it a double-quoted scalar? + if parser.buffer[parser.buffer_pos] == '"' { + return yaml_parser_fetch_flow_scalar(parser, false) + } + + // Is it a plain scalar? + // + // A plain scalar may start with any non-blank characters except + // + // '-', '?', ':', ',', '[', ']', '{', '}', + // '#', '&', '*', '!', '|', '>', '\'', '\"', + // '%', '@', '`'. + // + // In the block context (and, for the '-' indicator, in the flow context + // too), it may also start with the characters + // + // '-', '?', ':' + // + // if it is followed by a non-space character. + // + // The last rule is more restrictive than the specification requires. + // [Go] TODO Make this logic more reasonable. + //switch parser.buffer[parser.buffer_pos] { + //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': + //} + if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || + parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || + parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || + (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level == 0 && + (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && + !is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_plain_scalar(parser) + } + + // If we don't determine the token type so far, it is an error. + return yaml_parser_set_scanner_error(parser, + "while scanning for the next token", parser.mark, + "found character that cannot start any token") +} + +func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { + if !simple_key.possible { + return false, true + } + + // The 1.2 specification says: + // + // "If the ? indicator is omitted, parsing needs to see past the + // implicit key to recognize it as such. To limit the amount of + // lookahead required, the “:” indicator must appear at most 1024 + // Unicode characters beyond the start of the key. In addition, the key + // is restricted to a single line." + // + if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { + // Check if the potential simple key to be removed is required. + if simple_key.required { + return false, yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key.mark, + "could not find expected ':'") + } + simple_key.possible = false + return false, true + } + return true, true +} + +// Check if a simple key may start at the current position and add it if +// needed. +func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { + // A simple key is required at the current position if the scanner is in + // the block context and the current column coincides with the indentation + // level. + + required := parser.flow_level == 0 && parser.indent == parser.mark.column + + // + // If the current position may start a simple key, save it. + // + if parser.simple_key_allowed { + simple_key := yaml_simple_key_t{ + possible: true, + required: required, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + } + + if !yaml_parser_remove_simple_key(parser) { + return false + } + parser.simple_keys[len(parser.simple_keys)-1] = simple_key + parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 + } + return true +} + +// Remove a potential simple key at the current flow level. +func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { + i := len(parser.simple_keys) - 1 + if parser.simple_keys[i].possible { + // If the key is required, it is an error. + if parser.simple_keys[i].required { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", parser.simple_keys[i].mark, + "could not find expected ':'") + } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) + } + return true +} + +// max_flow_level limits the flow_level +const max_flow_level = 10000 + +// Increase the flow level and resize the simple key list if needed. +func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { + // Reset the simple key on the next level. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ + possible: false, + required: false, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + }) + + // Increase the flow level. + parser.flow_level++ + if parser.flow_level > max_flow_level { + return yaml_parser_set_scanner_error(parser, + "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_flow_level)) + } + return true +} + +// Decrease the flow level. +func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { + if parser.flow_level > 0 { + parser.flow_level-- + last := len(parser.simple_keys) - 1 + delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) + parser.simple_keys = parser.simple_keys[:last] + } + return true +} + +// max_indents limits the indents stack size +const max_indents = 10000 + +// Push the current indentation level to the stack and set the new level +// the current column is greater than the indentation level. In this case, +// append or insert the specified token into the token queue. +func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + if parser.indent < column { + // Push the current indentation level to the stack and set the new + // indentation level. + parser.indents = append(parser.indents, parser.indent) + parser.indent = column + if len(parser.indents) > max_indents { + return yaml_parser_set_scanner_error(parser, + "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_indents)) + } + + // Create a token and insert it into the queue. + token := yaml_token_t{ + typ: typ, + start_mark: mark, + end_mark: mark, + } + if number > -1 { + number -= parser.tokens_parsed + } + yaml_insert_token(parser, number, &token) + } + return true +} + +// Pop indentation levels from the indents stack until the current level +// becomes less or equal to the column. For each indentation level, append +// the BLOCK-END token. +func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + block_mark := scan_mark + block_mark.index-- + + // Loop through the indentation levels in the stack. + for parser.indent > column { + + // [Go] Reposition the end token before potential following + // foot comments of parent blocks. For that, search + // backwards for recent comments that were at the same + // indent as the block that is ending now. + stop_index := block_mark.index + for i := len(parser.comments) - 1; i >= 0; i-- { + comment := &parser.comments[i] + + if comment.end_mark.index < stop_index { + // Don't go back beyond the start of the comment/whitespace scan, unless column < 0. + // If requested indent column is < 0, then the document is over and everything else + // is a foot anyway. + break + } + if comment.start_mark.column == parser.indent+1 { + // This is a good match. But maybe there's a former comment + // at that same indent level, so keep searching. + block_mark = comment.start_mark + } + + // While the end of the former comment matches with + // the start of the following one, we know there's + // nothing in between and scanning is still safe. + stop_index = comment.scan_mark.index + } + + // Create a token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_END_TOKEN, + start_mark: block_mark, + end_mark: block_mark, + } + yaml_insert_token(parser, -1, &token) + + // Pop the indentation level. + parser.indent = parser.indents[len(parser.indents)-1] + parser.indents = parser.indents[:len(parser.indents)-1] + } + return true +} + +// Initialize the scanner and produce the STREAM-START token. +func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { + + // Set the initial indentation. + parser.indent = -1 + + // Initialize the simple key stack. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + + parser.simple_keys_by_tok = make(map[int]int) + + // A simple key is allowed at the beginning of the stream. + parser.simple_key_allowed = true + + // We have started. + parser.stream_start_produced = true + + // Create the STREAM-START token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_START_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + encoding: parser.encoding, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the STREAM-END token and shut down the scanner. +func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { + + // Force new line. + if parser.mark.column != 0 { + parser.mark.column = 0 + parser.mark.line++ + } + + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the STREAM-END token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_END_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. +func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. + token := yaml_token_t{} + if !yaml_parser_scan_directive(parser, &token) { + return false + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the DOCUMENT-START or DOCUMENT-END token. +func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Consume the token. + start_mark := parser.mark + + skip(parser) + skip(parser) + skip(parser) + + end_mark := parser.mark + + // Create the DOCUMENT-START or DOCUMENT-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. +func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { + + // The indicators '[' and '{' may start a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // Increase the flow level. + if !yaml_parser_increase_flow_level(parser) { + return false + } + + // A simple key may follow the indicators '[' and '{'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. +func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset any potential simple key on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Decrease the flow level. + if !yaml_parser_decrease_flow_level(parser) { + return false + } + + // No simple keys after the indicators ']' and '}'. + parser.simple_key_allowed = false + + // Consume the token. + + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-ENTRY token. +func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after ','. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_FLOW_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the BLOCK-ENTRY token. +func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { + // Check if the scanner is in the block context. + if parser.flow_level == 0 { + // Check if we are allowed to start a new entry. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "block sequence entries are not allowed in this context") + } + // Add the BLOCK-SEQUENCE-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { + return false + } + } else { + // It is an error for the '-' indicator to occur in the flow context, + // but we let the Parser detect and report about it because the Parser + // is able to point to the context. + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '-'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the BLOCK-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the KEY token. +func yaml_parser_fetch_key(parser *yaml_parser_t) bool { + + // In the block context, additional checks are required. + if parser.flow_level == 0 { + // Check if we are allowed to start a new key (not nessesary simple). + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping keys are not allowed in this context") + } + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '?' in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the KEY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the VALUE token. +func yaml_parser_fetch_value(parser *yaml_parser_t) bool { + + simple_key := &parser.simple_keys[len(parser.simple_keys)-1] + + // Have we found a simple key? + if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { + return false + + } else if valid { + + // Create the KEY token and insert it into the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: simple_key.mark, + end_mark: simple_key.mark, + } + yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) + + // In the block context, we may need to add the BLOCK-MAPPING-START token. + if !yaml_parser_roll_indent(parser, simple_key.mark.column, + simple_key.token_number, + yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { + return false + } + + // Remove the simple key. + simple_key.possible = false + delete(parser.simple_keys_by_tok, simple_key.token_number) + + // A simple key cannot follow another simple key. + parser.simple_key_allowed = false + + } else { + // The ':' indicator follows a complex key. + + // In the block context, extra checks are required. + if parser.flow_level == 0 { + + // Check if we are allowed to start a complex value. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping values are not allowed in this context") + } + + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Simple keys after ':' are allowed in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + } + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the VALUE token and append it to the queue. + token := yaml_token_t{ + typ: yaml_VALUE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the ALIAS or ANCHOR token. +func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // An anchor or an alias could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow an anchor or an alias. + parser.simple_key_allowed = false + + // Create the ALIAS or ANCHOR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_anchor(parser, &token, typ) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the TAG token. +func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { + // A tag could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a tag. + parser.simple_key_allowed = false + + // Create the TAG token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_tag(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. +func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { + // Remove any potential simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // A simple key may follow a block scalar. + parser.simple_key_allowed = true + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_block_scalar(parser, &token, literal) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. +func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_flow_scalar(parser, &token, single) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,plain) token. +func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_plain_scalar(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Eat whitespaces and comments until the next token is found. +func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { + + scan_mark := parser.mark + + // Until the next token is not found. + for { + // Allow the BOM mark to start a line. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { + skip(parser) + } + + // Eat whitespaces. + // Tabs are allowed: + // - in the flow context + // - in the block context, but not at the beginning of the line or + // after '-', '?', or ':' (complex value). + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if we just had a line comment under a sequence entry that + // looks more like a header to the following content. Similar to this: + // + // - # The comment + // - Some data + // + // If so, transform the line comment to a head comment and reposition. + if len(parser.comments) > 0 && len(parser.tokens) > 1 { + tokenA := parser.tokens[len(parser.tokens)-2] + tokenB := parser.tokens[len(parser.tokens)-1] + comment := &parser.comments[len(parser.comments)-1] + if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) { + // If it was in the prior line, reposition so it becomes a + // header of the follow up token. Otherwise, keep it in place + // so it becomes a header of the former. + comment.head = comment.line + comment.line = nil + if comment.start_mark.line == parser.mark.line-1 { + comment.token_mark = parser.mark + } + } + } + + // Eat a comment until a line break. + if parser.buffer[parser.buffer_pos] == '#' { + if !yaml_parser_scan_comments(parser, scan_mark) { + return false + } + } + + // If it is a line break, eat it. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + + // In the block context, a new line may start a simple key. + if parser.flow_level == 0 { + parser.simple_key_allowed = true + } + } else { + break // We have found a token. + } + } + + return true +} + +// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { + // Eat '%'. + start_mark := parser.mark + skip(parser) + + // Scan the directive name. + var name []byte + if !yaml_parser_scan_directive_name(parser, start_mark, &name) { + return false + } + + // Is it a YAML directive? + if bytes.Equal(name, []byte("YAML")) { + // Scan the VERSION directive value. + var major, minor int8 + if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { + return false + } + end_mark := parser.mark + + // Create a VERSION-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_VERSION_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + major: major, + minor: minor, + } + + // Is it a TAG directive? + } else if bytes.Equal(name, []byte("TAG")) { + // Scan the TAG directive value. + var handle, prefix []byte + if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { + return false + } + end_mark := parser.mark + + // Create a TAG-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_TAG_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: handle, + prefix: prefix, + } + + // Unknown directive. + } else { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unknown directive name") + return false + } + + // Eat the rest of the line including any comments. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + if parser.buffer[parser.buffer_pos] == '#' { + // [Go] Discard this inline comment for the time being. + //if !yaml_parser_scan_line_comment(parser, start_mark) { + // return false + //} + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } + + return true +} + +// Scan the directive name. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^ +// +func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { + // Consume the directive name. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + var s []byte + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the name is empty. + if len(s) == 0 { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "could not find expected directive name") + return false + } + + // Check for an blank character after the name. + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unexpected non-alphabetical character") + return false + } + *name = s + return true +} + +// Scan the value of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^ +func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { + // Eat whitespaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Consume the major version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { + return false + } + + // Eat '.'. + if parser.buffer[parser.buffer_pos] != '.' { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected digit or '.' character") + } + + skip(parser) + + // Consume the minor version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { + return false + } + return true +} + +const max_number_length = 2 + +// Scan the version number of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^ +// %YAML 1.1 # a comment \n +// ^ +func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { + + // Repeat while the next character is digit. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + var value, length int8 + for is_digit(parser.buffer, parser.buffer_pos) { + // Check if the number is too long. + length++ + if length > max_number_length { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "found extremely long version number") + } + value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the number was present. + if length == 0 { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected version number") + } + *number = value + return true +} + +// Scan the value of a TAG-DIRECTIVE token. +// +// Scope: +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { + var handle_value, prefix_value []byte + + // Eat whitespaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Scan a handle. + if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { + return false + } + + // Expect a whitespace. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blank(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace") + return false + } + + // Eat whitespaces. + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Scan a prefix. + if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { + return false + } + + // Expect a whitespace or line break. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace or line break") + return false + } + + *handle = handle_value + *prefix = prefix_value + return true +} + +func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { + var s []byte + + // Eat the indicator character. + start_mark := parser.mark + skip(parser) + + // Consume the value. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + end_mark := parser.mark + + /* + * Check if length of the anchor is greater than 0 and it is followed by + * a whitespace character or one of the indicators: + * + * '?', ':', ',', ']', '}', '%', '@', '`'. + */ + + if len(s) == 0 || + !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || + parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '`') { + context := "while scanning an alias" + if typ == yaml_ANCHOR_TOKEN { + context = "while scanning an anchor" + } + yaml_parser_set_scanner_error(parser, context, start_mark, + "did not find expected alphabetic or numeric character") + return false + } + + // Create a token. + *token = yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + value: s, + } + + return true +} + +/* + * Scan a TAG token. + */ + +func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { + var handle, suffix []byte + + start_mark := parser.mark + + // Check if the tag is in the canonical form. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + if parser.buffer[parser.buffer_pos+1] == '<' { + // Keep the handle as '' + + // Eat '!<' + skip(parser) + skip(parser) + + // Consume the tag value. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + + // Check for '>' and eat it. + if parser.buffer[parser.buffer_pos] != '>' { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find the expected '>'") + return false + } + + skip(parser) + } else { + // The tag has either the '!suffix' or the '!handle!suffix' form. + + // First, try to scan a handle. + if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { + return false + } + + // Check if it is, indeed, handle. + if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { + // Scan the suffix now. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + } else { + // It wasn't a handle after all. Scan the rest of the tag. + if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { + return false + } + + // Set the handle to '!'. + handle = []byte{'!'} + + // A special case: the '!' tag. Set the handle to '' and the + // suffix to '!'. + if len(suffix) == 0 { + handle, suffix = suffix, handle + } + } + } + + // Check the character which ends the tag. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find expected whitespace or line break") + return false + } + + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_TAG_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: handle, + suffix: suffix, + } + return true +} + +// Scan a tag handle. +func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { + // Check the initial '!' character. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] != '!' { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + + var s []byte + + // Copy the '!' character. + s = read(parser, s) + + // Copy all subsequent alphabetical and numerical characters. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_alpha(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check if the trailing character is '!' and copy it. + if parser.buffer[parser.buffer_pos] == '!' { + s = read(parser, s) + } else { + // It's either the '!' tag or not really a tag handle. If it's a %TAG + // directive, it's an error. If it's a tag token, it must be a part of URI. + if directive && string(s) != "!" { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + } + + *handle = s + return true +} + +// Scan a tag. +func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { + //size_t length = head ? strlen((char *)head) : 0 + var s []byte + hasTag := len(head) > 0 + + // Copy the head if needed. + // + // Note that we don't copy the leading '!' character. + if len(head) > 1 { + s = append(s, head[1:]...) + } + + // Scan the tag. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // The set of characters that may appear in URI is as follows: + // + // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + // '%'. + // [Go] TODO Convert this into more reasonable logic. + for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || + parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || + parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || + parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || + parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || + parser.buffer[parser.buffer_pos] == '%' { + // Check if it is a URI-escape sequence. + if parser.buffer[parser.buffer_pos] == '%' { + if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { + return false + } + } else { + s = read(parser, s) + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + hasTag = true + } + + if !hasTag { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected tag URI") + return false + } + *uri = s + return true +} + +// Decode an URI-escape sequence corresponding to a single UTF-8 character. +func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { + + // Decode the required number of characters. + w := 1024 + for w > 0 { + // Check for a URI-escaped octet. + if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { + return false + } + + if !(parser.buffer[parser.buffer_pos] == '%' && + is_hex(parser.buffer, parser.buffer_pos+1) && + is_hex(parser.buffer, parser.buffer_pos+2)) { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find URI escaped octet") + } + + // Get the octet. + octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) + + // If it is the leading octet, determine the length of the UTF-8 sequence. + if w == 1024 { + w = width(octet) + if w == 0 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect leading UTF-8 octet") + } + } else { + // Check if the trailing octet is correct. + if octet&0xC0 != 0x80 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect trailing UTF-8 octet") + } + } + + // Copy the octet and move the pointers. + *s = append(*s, octet) + skip(parser) + skip(parser) + skip(parser) + w-- + } + return true +} + +// Scan a block scalar. +func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { + // Eat the indicator '|' or '>'. + start_mark := parser.mark + skip(parser) + + // Scan the additional block scalar indicators. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check for a chomping indicator. + var chomping, increment int + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + // Set the chomping method and eat the indicator. + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + + // Check for an indentation indicator. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_digit(parser.buffer, parser.buffer_pos) { + // Check that the indentation is greater than 0. + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an indentation indicator equal to 0") + return false + } + + // Get the indentation level and eat the indicator. + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + } + + } else if is_digit(parser.buffer, parser.buffer_pos) { + // Do the same as above, but in the opposite order. + + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an indentation indicator equal to 0") + return false + } + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + } + } + + // Eat whitespaces and comments to the end of the line. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + if parser.buffer[parser.buffer_pos] == '#' { + if !yaml_parser_scan_line_comment(parser, start_mark) { + return false + } + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } + + end_mark := parser.mark + + // Set the indentation level if it was specified. + var indent int + if increment > 0 { + if parser.indent >= 0 { + indent = parser.indent + increment + } else { + indent = increment + } + } + + // Scan the leading line breaks and determine the indentation level if needed. + var s, leading_break, trailing_breaks []byte + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + + // Scan the block scalar content. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + var leading_blank, trailing_blank bool + for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { + // We are at the beginning of a non-empty line. + + // Is it a trailing whitespace? + trailing_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Check if we need to fold the leading line break. + if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { + // Do we need to join the lines by space? + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } + } else { + s = append(s, leading_break...) + } + leading_break = leading_break[:0] + + // Append the remaining line breaks. + s = append(s, trailing_breaks...) + trailing_breaks = trailing_breaks[:0] + + // Is it a leading whitespace? + leading_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Consume the current line. + for !is_breakz(parser.buffer, parser.buffer_pos) { + s = read(parser, s) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Consume the line break. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + leading_break = read_line(parser, leading_break) + + // Eat the following indentation spaces and line breaks. + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + } + + // Chomp the tail. + if chomping != -1 { + s = append(s, leading_break...) + } + if chomping == 1 { + s = append(s, trailing_breaks...) + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_LITERAL_SCALAR_STYLE, + } + if !literal { + token.style = yaml_FOLDED_SCALAR_STYLE + } + return true +} + +// Scan indentation spaces and line breaks for a block scalar. Determine the +// indentation level if needed. +func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { + *end_mark = parser.mark + + // Eat the indentation spaces and line breaks. + max_indent := 0 + for { + // Eat the indentation spaces. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { + skip(parser) + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + if parser.mark.column > max_indent { + max_indent = parser.mark.column + } + + // Check for a tab character messing the indentation. + if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { + return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found a tab character where an indentation space is expected") + } + + // Have we found a non-empty line? + if !is_break(parser.buffer, parser.buffer_pos) { + break + } + + // Consume the line break. + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + // [Go] Should really be returning breaks instead. + *breaks = read_line(parser, *breaks) + *end_mark = parser.mark + } + + // Determine the indentation level if needed. + if *indent == 0 { + *indent = max_indent + if *indent < parser.indent+1 { + *indent = parser.indent + 1 + } + if *indent < 1 { + *indent = 1 + } + } + return true +} + +// Scan a quoted scalar. +func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { + // Eat the left quote. + start_mark := parser.mark + skip(parser) + + // Consume the content of the quoted scalar. + var s, leading_break, trailing_breaks, whitespaces []byte + for { + // Check that there are no document indicators at the beginning of the line. + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected document indicator") + return false + } + + // Check for EOF. + if is_z(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected end of stream") + return false + } + + // Consume non-blank characters. + leading_blanks := false + for !is_blankz(parser.buffer, parser.buffer_pos) { + if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { + // Is is an escaped single quote. + s = append(s, '\'') + skip(parser) + skip(parser) + + } else if single && parser.buffer[parser.buffer_pos] == '\'' { + // It is a right single quote. + break + } else if !single && parser.buffer[parser.buffer_pos] == '"' { + // It is a right double quote. + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { + // It is an escaped line break. + if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { + return false + } + skip(parser) + skip_line(parser) + leading_blanks = true + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' { + // It is an escape sequence. + code_length := 0 + + // Check the escape character. + switch parser.buffer[parser.buffer_pos+1] { + case '0': + s = append(s, 0) + case 'a': + s = append(s, '\x07') + case 'b': + s = append(s, '\x08') + case 't', '\t': + s = append(s, '\x09') + case 'n': + s = append(s, '\x0A') + case 'v': + s = append(s, '\x0B') + case 'f': + s = append(s, '\x0C') + case 'r': + s = append(s, '\x0D') + case 'e': + s = append(s, '\x1B') + case ' ': + s = append(s, '\x20') + case '"': + s = append(s, '"') + case '\'': + s = append(s, '\'') + case '\\': + s = append(s, '\\') + case 'N': // NEL (#x85) + s = append(s, '\xC2') + s = append(s, '\x85') + case '_': // #xA0 + s = append(s, '\xC2') + s = append(s, '\xA0') + case 'L': // LS (#x2028) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA8') + case 'P': // PS (#x2029) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA9') + case 'x': + code_length = 2 + case 'u': + code_length = 4 + case 'U': + code_length = 8 + default: + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found unknown escape character") + return false + } + + skip(parser) + skip(parser) + + // Consume an arbitrary escape code. + if code_length > 0 { + var value int + + // Scan the character value. + if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { + return false + } + for k := 0; k < code_length; k++ { + if !is_hex(parser.buffer, parser.buffer_pos+k) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "did not find expected hexdecimal number") + return false + } + value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) + } + + // Check the value and write the character. + if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found invalid Unicode character escape code") + return false + } + if value <= 0x7F { + s = append(s, byte(value)) + } else if value <= 0x7FF { + s = append(s, byte(0xC0+(value>>6))) + s = append(s, byte(0x80+(value&0x3F))) + } else if value <= 0xFFFF { + s = append(s, byte(0xE0+(value>>12))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } else { + s = append(s, byte(0xF0+(value>>18))) + s = append(s, byte(0x80+((value>>12)&0x3F))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } + + // Advance the pointer. + for k := 0; k < code_length; k++ { + skip(parser) + } + } + } else { + // It is a non-escaped non-blank character. + s = read(parser, s) + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + } + + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + // Check if we are at the end of the scalar. + if single { + if parser.buffer[parser.buffer_pos] == '\'' { + break + } + } else { + if parser.buffer[parser.buffer_pos] == '"' { + break + } + } + + // Consume blank characters. + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + // Consume a space or a tab character. + if !leading_blanks { + whitespaces = read(parser, whitespaces) + } else { + skip(parser) + } + } else { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + leading_break = read_line(parser, leading_break) + leading_blanks = true + } else { + trailing_breaks = read_line(parser, trailing_breaks) + } + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Join the whitespaces or fold line breaks. + if leading_blanks { + // Do we need to fold line breaks? + if len(leading_break) > 0 && leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Eat the right quote. + skip(parser) + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_SINGLE_QUOTED_SCALAR_STYLE, + } + if !single { + token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + return true +} + +// Scan a plain scalar. +func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { + + var s, leading_break, trailing_breaks, whitespaces []byte + var leading_blanks bool + var indent = parser.indent + 1 + + start_mark := parser.mark + end_mark := parser.mark + + // Consume the content of the plain scalar. + for { + // Check for a document indicator. + if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { + return false + } + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + break + } + + // Check for a comment. + if parser.buffer[parser.buffer_pos] == '#' { + break + } + + // Consume non-blank characters. + for !is_blankz(parser.buffer, parser.buffer_pos) { + + // Check for indicators that may end a plain scalar. + if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level > 0 && + (parser.buffer[parser.buffer_pos] == ',' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}')) { + break + } + + // Check if we need to join whitespaces and breaks. + if leading_blanks || len(whitespaces) > 0 { + if leading_blanks { + // Do we need to fold line breaks? + if leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + leading_blanks = false + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Copy the character. + s = read(parser, s) + + end_mark = parser.mark + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + } + + // Is it the end? + if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { + break + } + + // Consume blank characters. + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + + // Check for tab characters that abuse indentation. + if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found a tab character that violates indentation") + return false + } + + // Consume a space or a tab character. + if !leading_blanks { + whitespaces = read(parser, whitespaces) + } else { + skip(parser) + } + } else { + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + leading_break = read_line(parser, leading_break) + leading_blanks = true + } else { + trailing_breaks = read_line(parser, trailing_breaks) + } + } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + } + + // Check indentation level. + if parser.flow_level == 0 && parser.mark.column < indent { + break + } + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + value: s, + style: yaml_PLAIN_SCALAR_STYLE, + } + + // Note that we change the 'simple_key_allowed' flag. + if leading_blanks { + parser.simple_key_allowed = true + } + return true +} + +func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool { + if parser.newlines > 0 { + return true + } + + var start_mark yaml_mark_t + var text []byte + + for peek := 0; peek < 512; peek++ { + if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { + break + } + if is_blank(parser.buffer, parser.buffer_pos+peek) { + continue + } + if parser.buffer[parser.buffer_pos+peek] == '#' { + seen := parser.mark.index+peek + for { + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_breakz(parser.buffer, parser.buffer_pos) { + if parser.mark.index >= seen { + break + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } else if parser.mark.index >= seen { + if len(text) == 0 { + start_mark = parser.mark + } + text = read(parser, text) + } else { + skip(parser) + } + } + } + break + } + if len(text) > 0 { + parser.comments = append(parser.comments, yaml_comment_t{ + token_mark: token_mark, + start_mark: start_mark, + line: text, + }) + } + return true +} + +func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool { + token := parser.tokens[len(parser.tokens)-1] + + if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 { + token = parser.tokens[len(parser.tokens)-2] + } + + var token_mark = token.start_mark + var start_mark yaml_mark_t + var next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } + + var recent_empty = false + var first_empty = parser.newlines <= 1 + + var line = parser.mark.line + var column = parser.mark.column + + var text []byte + + // The foot line is the place where a comment must start to + // still be considered as a foot of the prior content. + // If there's some content in the currently parsed line, then + // the foot is the line below it. + var foot_line = -1 + if scan_mark.line > 0 { + foot_line = parser.mark.line-parser.newlines+1 + if parser.newlines == 0 && parser.mark.column > 1 { + foot_line++ + } + } + + var peek = 0 + for ; peek < 512; peek++ { + if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { + break + } + column++ + if is_blank(parser.buffer, parser.buffer_pos+peek) { + continue + } + c := parser.buffer[parser.buffer_pos+peek] + var close_flow = parser.flow_level > 0 && (c == ']' || c == '}') + if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) { + // Got line break or terminator. + if close_flow || !recent_empty { + if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) { + // This is the first empty line and there were no empty lines before, + // so this initial part of the comment is a foot of the prior token + // instead of being a head for the following one. Split it up. + // Alternatively, this might also be the last comment inside a flow + // scope, so it must be a footer. + if len(text) > 0 { + if start_mark.column-1 < next_indent { + // If dedented it's unrelated to the prior token. + token_mark = start_mark + } + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: token_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, + foot: text, + }) + scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} + token_mark = scan_mark + text = nil + } + } else { + if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 { + text = append(text, '\n') + } + } + } + if !is_break(parser.buffer, parser.buffer_pos+peek) { + break + } + first_empty = false + recent_empty = true + column = 0 + line++ + continue + } + + if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) { + // The comment at the different indentation is a foot of the + // preceding data rather than a head of the upcoming one. + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: token_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, + foot: text, + }) + scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} + token_mark = scan_mark + text = nil + } + + if parser.buffer[parser.buffer_pos+peek] != '#' { + break + } + + if len(text) == 0 { + start_mark = yaml_mark_t{parser.mark.index + peek, line, column} + } else { + text = append(text, '\n') + } + + recent_empty = false + + // Consume until after the consumed comment line. + seen := parser.mark.index+peek + for { + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_breakz(parser.buffer, parser.buffer_pos) { + if parser.mark.index >= seen { + break + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } else if parser.mark.index >= seen { + text = read(parser, text) + } else { + skip(parser) + } + } + + peek = 0 + column = 0 + line = parser.mark.line + next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } + } + + if len(text) > 0 { + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: start_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column}, + head: text, + }) + } + return true +} diff --git a/goyaml.v3/sorter.go b/goyaml.v3/sorter.go new file mode 100644 index 0000000..9210ece --- /dev/null +++ b/goyaml.v3/sorter.go @@ -0,0 +1,134 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml + +import ( + "reflect" + "unicode" +) + +type keyList []reflect.Value + +func (l keyList) Len() int { return len(l) } +func (l keyList) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +func (l keyList) Less(i, j int) bool { + a := l[i] + b := l[j] + ak := a.Kind() + bk := b.Kind() + for (ak == reflect.Interface || ak == reflect.Ptr) && !a.IsNil() { + a = a.Elem() + ak = a.Kind() + } + for (bk == reflect.Interface || bk == reflect.Ptr) && !b.IsNil() { + b = b.Elem() + bk = b.Kind() + } + af, aok := keyFloat(a) + bf, bok := keyFloat(b) + if aok && bok { + if af != bf { + return af < bf + } + if ak != bk { + return ak < bk + } + return numLess(a, b) + } + if ak != reflect.String || bk != reflect.String { + return ak < bk + } + ar, br := []rune(a.String()), []rune(b.String()) + digits := false + for i := 0; i < len(ar) && i < len(br); i++ { + if ar[i] == br[i] { + digits = unicode.IsDigit(ar[i]) + continue + } + al := unicode.IsLetter(ar[i]) + bl := unicode.IsLetter(br[i]) + if al && bl { + return ar[i] < br[i] + } + if al || bl { + if digits { + return al + } else { + return bl + } + } + var ai, bi int + var an, bn int64 + if ar[i] == '0' || br[i] == '0' { + for j := i - 1; j >= 0 && unicode.IsDigit(ar[j]); j-- { + if ar[j] != '0' { + an = 1 + bn = 1 + break + } + } + } + for ai = i; ai < len(ar) && unicode.IsDigit(ar[ai]); ai++ { + an = an*10 + int64(ar[ai]-'0') + } + for bi = i; bi < len(br) && unicode.IsDigit(br[bi]); bi++ { + bn = bn*10 + int64(br[bi]-'0') + } + if an != bn { + return an < bn + } + if ai != bi { + return ai < bi + } + return ar[i] < br[i] + } + return len(ar) < len(br) +} + +// keyFloat returns a float value for v if it is a number/bool +// and whether it is a number/bool or not. +func keyFloat(v reflect.Value) (f float64, ok bool) { + switch v.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return float64(v.Int()), true + case reflect.Float32, reflect.Float64: + return v.Float(), true + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return float64(v.Uint()), true + case reflect.Bool: + if v.Bool() { + return 1, true + } + return 0, true + } + return 0, false +} + +// numLess returns whether a < b. +// a and b must necessarily have the same kind. +func numLess(a, b reflect.Value) bool { + switch a.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return a.Int() < b.Int() + case reflect.Float32, reflect.Float64: + return a.Float() < b.Float() + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return a.Uint() < b.Uint() + case reflect.Bool: + return !a.Bool() && b.Bool() + } + panic("not a number") +} diff --git a/goyaml.v3/suite_test.go b/goyaml.v3/suite_test.go new file mode 100644 index 0000000..4630b5f --- /dev/null +++ b/goyaml.v3/suite_test.go @@ -0,0 +1,27 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package yaml_test + +import ( + . "gopkg.in/check.v1" + "testing" +) + +func Test(t *testing.T) { TestingT(t) } + +type S struct{} + +var _ = Suite(&S{}) diff --git a/goyaml.v3/writerc.go b/goyaml.v3/writerc.go new file mode 100644 index 0000000..b8a116b --- /dev/null +++ b/goyaml.v3/writerc.go @@ -0,0 +1,48 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +// Set the writer error and return false. +func yaml_emitter_set_writer_error(emitter *yaml_emitter_t, problem string) bool { + emitter.error = yaml_WRITER_ERROR + emitter.problem = problem + return false +} + +// Flush the output buffer. +func yaml_emitter_flush(emitter *yaml_emitter_t) bool { + if emitter.write_handler == nil { + panic("write handler not set") + } + + // Check if the buffer is empty. + if emitter.buffer_pos == 0 { + return true + } + + if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil { + return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error()) + } + emitter.buffer_pos = 0 + return true +} diff --git a/goyaml.v3/yaml.go b/goyaml.v3/yaml.go new file mode 100644 index 0000000..8cec6da --- /dev/null +++ b/goyaml.v3/yaml.go @@ -0,0 +1,698 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package yaml implements YAML support for the Go language. +// +// Source code and other details for the project are available at GitHub: +// +// https://github.com/go-yaml/yaml +// +package yaml + +import ( + "errors" + "fmt" + "io" + "reflect" + "strings" + "sync" + "unicode/utf8" +) + +// The Unmarshaler interface may be implemented by types to customize their +// behavior when being unmarshaled from a YAML document. +type Unmarshaler interface { + UnmarshalYAML(value *Node) error +} + +type obsoleteUnmarshaler interface { + UnmarshalYAML(unmarshal func(interface{}) error) error +} + +// The Marshaler interface may be implemented by types to customize their +// behavior when being marshaled into a YAML document. The returned value +// is marshaled in place of the original value implementing Marshaler. +// +// If an error is returned by MarshalYAML, the marshaling procedure stops +// and returns with the provided error. +type Marshaler interface { + MarshalYAML() (interface{}, error) +} + +// Unmarshal decodes the first document found within the in byte slice +// and assigns decoded values into the out value. +// +// Maps and pointers (to a struct, string, int, etc) are accepted as out +// values. If an internal pointer within a struct is not initialized, +// the yaml package will initialize it if necessary for unmarshalling +// the provided data. The out parameter must not be nil. +// +// The type of the decoded values should be compatible with the respective +// values in out. If one or more values cannot be decoded due to a type +// mismatches, decoding continues partially until the end of the YAML +// content, and a *yaml.TypeError is returned with details for all +// missed values. +// +// Struct fields are only unmarshalled if they are exported (have an +// upper case first letter), and are unmarshalled using the field name +// lowercased as the default key. Custom keys may be defined via the +// "yaml" name in the field tag: the content preceding the first comma +// is used as the key, and the following comma-separated options are +// used to tweak the marshalling process (see Marshal). +// Conflicting names result in a runtime error. +// +// For example: +// +// type T struct { +// F int `yaml:"a,omitempty"` +// B int +// } +// var t T +// yaml.Unmarshal([]byte("a: 1\nb: 2"), &t) +// +// See the documentation of Marshal for the format of tags and a list of +// supported tag options. +// +func Unmarshal(in []byte, out interface{}) (err error) { + return unmarshal(in, out, false) +} + +// A Decoder reads and decodes YAML values from an input stream. +type Decoder struct { + parser *parser + knownFields bool +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may read +// data from r beyond the YAML values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{ + parser: newParserFromReader(r), + } +} + +// KnownFields ensures that the keys in decoded mappings to +// exist as fields in the struct being decoded into. +func (dec *Decoder) KnownFields(enable bool) { + dec.knownFields = enable +} + +// Decode reads the next YAML-encoded value from its input +// and stores it in the value pointed to by v. +// +// See the documentation for Unmarshal for details about the +// conversion of YAML into a Go value. +func (dec *Decoder) Decode(v interface{}) (err error) { + d := newDecoder() + d.knownFields = dec.knownFields + defer handleErr(&err) + node := dec.parser.parse() + if node == nil { + return io.EOF + } + out := reflect.ValueOf(v) + if out.Kind() == reflect.Ptr && !out.IsNil() { + out = out.Elem() + } + d.unmarshal(node, out) + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +// Decode decodes the node and stores its data into the value pointed to by v. +// +// See the documentation for Unmarshal for details about the +// conversion of YAML into a Go value. +func (n *Node) Decode(v interface{}) (err error) { + d := newDecoder() + defer handleErr(&err) + out := reflect.ValueOf(v) + if out.Kind() == reflect.Ptr && !out.IsNil() { + out = out.Elem() + } + d.unmarshal(n, out) + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +func unmarshal(in []byte, out interface{}, strict bool) (err error) { + defer handleErr(&err) + d := newDecoder() + p := newParser(in) + defer p.destroy() + node := p.parse() + if node != nil { + v := reflect.ValueOf(out) + if v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + d.unmarshal(node, v) + } + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +// Marshal serializes the value provided into a YAML document. The structure +// of the generated document will reflect the structure of the value itself. +// Maps and pointers (to struct, string, int, etc) are accepted as the in value. +// +// Struct fields are only marshalled if they are exported (have an upper case +// first letter), and are marshalled using the field name lowercased as the +// default key. Custom keys may be defined via the "yaml" name in the field +// tag: the content preceding the first comma is used as the key, and the +// following comma-separated options are used to tweak the marshalling process. +// Conflicting names result in a runtime error. +// +// The field tag format accepted is: +// +// `(...) yaml:"[][,[,]]" (...)` +// +// The following flags are currently supported: +// +// omitempty Only include the field if it's not set to the zero +// value for the type or to empty slices or maps. +// Zero valued structs will be omitted if all their public +// fields are zero, unless they implement an IsZero +// method (see the IsZeroer interface type), in which +// case the field will be excluded if IsZero returns true. +// +// flow Marshal using a flow style (useful for structs, +// sequences and maps). +// +// inline Inline the field, which must be a struct or a map, +// causing all of its fields or keys to be processed as if +// they were part of the outer struct. For maps, keys must +// not conflict with the yaml keys of other struct fields. +// +// In addition, if the key is "-", the field is ignored. +// +// For example: +// +// type T struct { +// F int `yaml:"a,omitempty"` +// B int +// } +// yaml.Marshal(&T{B: 2}) // Returns "b: 2\n" +// yaml.Marshal(&T{F: 1}} // Returns "a: 1\nb: 0\n" +// +func Marshal(in interface{}) (out []byte, err error) { + defer handleErr(&err) + e := newEncoder() + defer e.destroy() + e.marshalDoc("", reflect.ValueOf(in)) + e.finish() + out = e.out + return +} + +// An Encoder writes YAML values to an output stream. +type Encoder struct { + encoder *encoder +} + +// NewEncoder returns a new encoder that writes to w. +// The Encoder should be closed after use to flush all data +// to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{ + encoder: newEncoderWithWriter(w), + } +} + +// Encode writes the YAML encoding of v to the stream. +// If multiple items are encoded to the stream, the +// second and subsequent document will be preceded +// with a "---" document separator, but the first will not. +// +// See the documentation for Marshal for details about the conversion of Go +// values to YAML. +func (e *Encoder) Encode(v interface{}) (err error) { + defer handleErr(&err) + e.encoder.marshalDoc("", reflect.ValueOf(v)) + return nil +} + +// Encode encodes value v and stores its representation in n. +// +// See the documentation for Marshal for details about the +// conversion of Go values into YAML. +func (n *Node) Encode(v interface{}) (err error) { + defer handleErr(&err) + e := newEncoder() + defer e.destroy() + e.marshalDoc("", reflect.ValueOf(v)) + e.finish() + p := newParser(e.out) + p.textless = true + defer p.destroy() + doc := p.parse() + *n = *doc.Content[0] + return nil +} + +// SetIndent changes the used indentation used when encoding. +func (e *Encoder) SetIndent(spaces int) { + if spaces < 0 { + panic("yaml: cannot indent to a negative number of spaces") + } + e.encoder.indent = spaces +} + +// Close closes the encoder by writing any remaining data. +// It does not write a stream terminating string "...". +func (e *Encoder) Close() (err error) { + defer handleErr(&err) + e.encoder.finish() + return nil +} + +func handleErr(err *error) { + if v := recover(); v != nil { + if e, ok := v.(yamlError); ok { + *err = e.err + } else { + panic(v) + } + } +} + +type yamlError struct { + err error +} + +func fail(err error) { + panic(yamlError{err}) +} + +func failf(format string, args ...interface{}) { + panic(yamlError{fmt.Errorf("yaml: "+format, args...)}) +} + +// A TypeError is returned by Unmarshal when one or more fields in +// the YAML document cannot be properly decoded into the requested +// types. When this error is returned, the value is still +// unmarshaled partially. +type TypeError struct { + Errors []string +} + +func (e *TypeError) Error() string { + return fmt.Sprintf("yaml: unmarshal errors:\n %s", strings.Join(e.Errors, "\n ")) +} + +type Kind uint32 + +const ( + DocumentNode Kind = 1 << iota + SequenceNode + MappingNode + ScalarNode + AliasNode +) + +type Style uint32 + +const ( + TaggedStyle Style = 1 << iota + DoubleQuotedStyle + SingleQuotedStyle + LiteralStyle + FoldedStyle + FlowStyle +) + +// Node represents an element in the YAML document hierarchy. While documents +// are typically encoded and decoded into higher level types, such as structs +// and maps, Node is an intermediate representation that allows detailed +// control over the content being decoded or encoded. +// +// It's worth noting that although Node offers access into details such as +// line numbers, colums, and comments, the content when re-encoded will not +// have its original textual representation preserved. An effort is made to +// render the data plesantly, and to preserve comments near the data they +// describe, though. +// +// Values that make use of the Node type interact with the yaml package in the +// same way any other type would do, by encoding and decoding yaml data +// directly or indirectly into them. +// +// For example: +// +// var person struct { +// Name string +// Address yaml.Node +// } +// err := yaml.Unmarshal(data, &person) +// +// Or by itself: +// +// var person Node +// err := yaml.Unmarshal(data, &person) +// +type Node struct { + // Kind defines whether the node is a document, a mapping, a sequence, + // a scalar value, or an alias to another node. The specific data type of + // scalar nodes may be obtained via the ShortTag and LongTag methods. + Kind Kind + + // Style allows customizing the apperance of the node in the tree. + Style Style + + // Tag holds the YAML tag defining the data type for the value. + // When decoding, this field will always be set to the resolved tag, + // even when it wasn't explicitly provided in the YAML content. + // When encoding, if this field is unset the value type will be + // implied from the node properties, and if it is set, it will only + // be serialized into the representation if TaggedStyle is used or + // the implicit tag diverges from the provided one. + Tag string + + // Value holds the unescaped and unquoted represenation of the value. + Value string + + // Anchor holds the anchor name for this node, which allows aliases to point to it. + Anchor string + + // Alias holds the node that this alias points to. Only valid when Kind is AliasNode. + Alias *Node + + // Content holds contained nodes for documents, mappings, and sequences. + Content []*Node + + // HeadComment holds any comments in the lines preceding the node and + // not separated by an empty line. + HeadComment string + + // LineComment holds any comments at the end of the line where the node is in. + LineComment string + + // FootComment holds any comments following the node and before empty lines. + FootComment string + + // Line and Column hold the node position in the decoded YAML text. + // These fields are not respected when encoding the node. + Line int + Column int +} + +// IsZero returns whether the node has all of its fields unset. +func (n *Node) IsZero() bool { + return n.Kind == 0 && n.Style == 0 && n.Tag == "" && n.Value == "" && n.Anchor == "" && n.Alias == nil && n.Content == nil && + n.HeadComment == "" && n.LineComment == "" && n.FootComment == "" && n.Line == 0 && n.Column == 0 +} + + +// LongTag returns the long form of the tag that indicates the data type for +// the node. If the Tag field isn't explicitly defined, one will be computed +// based on the node properties. +func (n *Node) LongTag() string { + return longTag(n.ShortTag()) +} + +// ShortTag returns the short form of the YAML tag that indicates data type for +// the node. If the Tag field isn't explicitly defined, one will be computed +// based on the node properties. +func (n *Node) ShortTag() string { + if n.indicatedString() { + return strTag + } + if n.Tag == "" || n.Tag == "!" { + switch n.Kind { + case MappingNode: + return mapTag + case SequenceNode: + return seqTag + case AliasNode: + if n.Alias != nil { + return n.Alias.ShortTag() + } + case ScalarNode: + tag, _ := resolve("", n.Value) + return tag + case 0: + // Special case to make the zero value convenient. + if n.IsZero() { + return nullTag + } + } + return "" + } + return shortTag(n.Tag) +} + +func (n *Node) indicatedString() bool { + return n.Kind == ScalarNode && + (shortTag(n.Tag) == strTag || + (n.Tag == "" || n.Tag == "!") && n.Style&(SingleQuotedStyle|DoubleQuotedStyle|LiteralStyle|FoldedStyle) != 0) +} + +// SetString is a convenience function that sets the node to a string value +// and defines its style in a pleasant way depending on its content. +func (n *Node) SetString(s string) { + n.Kind = ScalarNode + if utf8.ValidString(s) { + n.Value = s + n.Tag = strTag + } else { + n.Value = encodeBase64(s) + n.Tag = binaryTag + } + if strings.Contains(n.Value, "\n") { + n.Style = LiteralStyle + } +} + +// -------------------------------------------------------------------------- +// Maintain a mapping of keys to structure field indexes + +// The code in this section was copied from mgo/bson. + +// structInfo holds details for the serialization of fields of +// a given struct. +type structInfo struct { + FieldsMap map[string]fieldInfo + FieldsList []fieldInfo + + // InlineMap is the number of the field in the struct that + // contains an ,inline map, or -1 if there's none. + InlineMap int + + // InlineUnmarshalers holds indexes to inlined fields that + // contain unmarshaler values. + InlineUnmarshalers [][]int +} + +type fieldInfo struct { + Key string + Num int + OmitEmpty bool + Flow bool + // Id holds the unique field identifier, so we can cheaply + // check for field duplicates without maintaining an extra map. + Id int + + // Inline holds the field index if the field is part of an inlined struct. + Inline []int +} + +var structMap = make(map[reflect.Type]*structInfo) +var fieldMapMutex sync.RWMutex +var unmarshalerType reflect.Type + +func init() { + var v Unmarshaler + unmarshalerType = reflect.ValueOf(&v).Elem().Type() +} + +func getStructInfo(st reflect.Type) (*structInfo, error) { + fieldMapMutex.RLock() + sinfo, found := structMap[st] + fieldMapMutex.RUnlock() + if found { + return sinfo, nil + } + + n := st.NumField() + fieldsMap := make(map[string]fieldInfo) + fieldsList := make([]fieldInfo, 0, n) + inlineMap := -1 + inlineUnmarshalers := [][]int(nil) + for i := 0; i != n; i++ { + field := st.Field(i) + if field.PkgPath != "" && !field.Anonymous { + continue // Private field + } + + info := fieldInfo{Num: i} + + tag := field.Tag.Get("yaml") + if tag == "" && strings.Index(string(field.Tag), ":") < 0 { + tag = string(field.Tag) + } + if tag == "-" { + continue + } + + inline := false + fields := strings.Split(tag, ",") + if len(fields) > 1 { + for _, flag := range fields[1:] { + switch flag { + case "omitempty": + info.OmitEmpty = true + case "flow": + info.Flow = true + case "inline": + inline = true + default: + return nil, errors.New(fmt.Sprintf("unsupported flag %q in tag %q of type %s", flag, tag, st)) + } + } + tag = fields[0] + } + + if inline { + switch field.Type.Kind() { + case reflect.Map: + if inlineMap >= 0 { + return nil, errors.New("multiple ,inline maps in struct " + st.String()) + } + if field.Type.Key() != reflect.TypeOf("") { + return nil, errors.New("option ,inline needs a map with string keys in struct " + st.String()) + } + inlineMap = info.Num + case reflect.Struct, reflect.Ptr: + ftype := field.Type + for ftype.Kind() == reflect.Ptr { + ftype = ftype.Elem() + } + if ftype.Kind() != reflect.Struct { + return nil, errors.New("option ,inline may only be used on a struct or map field") + } + if reflect.PtrTo(ftype).Implements(unmarshalerType) { + inlineUnmarshalers = append(inlineUnmarshalers, []int{i}) + } else { + sinfo, err := getStructInfo(ftype) + if err != nil { + return nil, err + } + for _, index := range sinfo.InlineUnmarshalers { + inlineUnmarshalers = append(inlineUnmarshalers, append([]int{i}, index...)) + } + for _, finfo := range sinfo.FieldsList { + if _, found := fieldsMap[finfo.Key]; found { + msg := "duplicated key '" + finfo.Key + "' in struct " + st.String() + return nil, errors.New(msg) + } + if finfo.Inline == nil { + finfo.Inline = []int{i, finfo.Num} + } else { + finfo.Inline = append([]int{i}, finfo.Inline...) + } + finfo.Id = len(fieldsList) + fieldsMap[finfo.Key] = finfo + fieldsList = append(fieldsList, finfo) + } + } + default: + return nil, errors.New("option ,inline may only be used on a struct or map field") + } + continue + } + + if tag != "" { + info.Key = tag + } else { + info.Key = strings.ToLower(field.Name) + } + + if _, found = fieldsMap[info.Key]; found { + msg := "duplicated key '" + info.Key + "' in struct " + st.String() + return nil, errors.New(msg) + } + + info.Id = len(fieldsList) + fieldsList = append(fieldsList, info) + fieldsMap[info.Key] = info + } + + sinfo = &structInfo{ + FieldsMap: fieldsMap, + FieldsList: fieldsList, + InlineMap: inlineMap, + InlineUnmarshalers: inlineUnmarshalers, + } + + fieldMapMutex.Lock() + structMap[st] = sinfo + fieldMapMutex.Unlock() + return sinfo, nil +} + +// IsZeroer is used to check whether an object is zero to +// determine whether it should be omitted when marshaling +// with the omitempty flag. One notable implementation +// is time.Time. +type IsZeroer interface { + IsZero() bool +} + +func isZero(v reflect.Value) bool { + kind := v.Kind() + if z, ok := v.Interface().(IsZeroer); ok { + if (kind == reflect.Ptr || kind == reflect.Interface) && v.IsNil() { + return true + } + return z.IsZero() + } + switch kind { + case reflect.String: + return len(v.String()) == 0 + case reflect.Interface, reflect.Ptr: + return v.IsNil() + case reflect.Slice: + return v.Len() == 0 + case reflect.Map: + return v.Len() == 0 + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Bool: + return !v.Bool() + case reflect.Struct: + vt := v.Type() + for i := v.NumField() - 1; i >= 0; i-- { + if vt.Field(i).PkgPath != "" { + continue // Private field + } + if !isZero(v.Field(i)) { + return false + } + } + return true + } + return false +} diff --git a/goyaml.v3/yamlh.go b/goyaml.v3/yamlh.go new file mode 100644 index 0000000..7c6d007 --- /dev/null +++ b/goyaml.v3/yamlh.go @@ -0,0 +1,807 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +import ( + "fmt" + "io" +) + +// The version directive data. +type yaml_version_directive_t struct { + major int8 // The major version number. + minor int8 // The minor version number. +} + +// The tag directive data. +type yaml_tag_directive_t struct { + handle []byte // The tag handle. + prefix []byte // The tag prefix. +} + +type yaml_encoding_t int + +// The stream encoding. +const ( + // Let the parser choose the encoding. + yaml_ANY_ENCODING yaml_encoding_t = iota + + yaml_UTF8_ENCODING // The default UTF-8 encoding. + yaml_UTF16LE_ENCODING // The UTF-16-LE encoding with BOM. + yaml_UTF16BE_ENCODING // The UTF-16-BE encoding with BOM. +) + +type yaml_break_t int + +// Line break types. +const ( + // Let the parser choose the break type. + yaml_ANY_BREAK yaml_break_t = iota + + yaml_CR_BREAK // Use CR for line breaks (Mac style). + yaml_LN_BREAK // Use LN for line breaks (Unix style). + yaml_CRLN_BREAK // Use CR LN for line breaks (DOS style). +) + +type yaml_error_type_t int + +// Many bad things could happen with the parser and emitter. +const ( + // No error is produced. + yaml_NO_ERROR yaml_error_type_t = iota + + yaml_MEMORY_ERROR // Cannot allocate or reallocate a block of memory. + yaml_READER_ERROR // Cannot read or decode the input stream. + yaml_SCANNER_ERROR // Cannot scan the input stream. + yaml_PARSER_ERROR // Cannot parse the input stream. + yaml_COMPOSER_ERROR // Cannot compose a YAML document. + yaml_WRITER_ERROR // Cannot write to the output stream. + yaml_EMITTER_ERROR // Cannot emit a YAML stream. +) + +// The pointer position. +type yaml_mark_t struct { + index int // The position index. + line int // The position line. + column int // The position column. +} + +// Node Styles + +type yaml_style_t int8 + +type yaml_scalar_style_t yaml_style_t + +// Scalar styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SCALAR_STYLE yaml_scalar_style_t = 0 + + yaml_PLAIN_SCALAR_STYLE yaml_scalar_style_t = 1 << iota // The plain scalar style. + yaml_SINGLE_QUOTED_SCALAR_STYLE // The single-quoted scalar style. + yaml_DOUBLE_QUOTED_SCALAR_STYLE // The double-quoted scalar style. + yaml_LITERAL_SCALAR_STYLE // The literal scalar style. + yaml_FOLDED_SCALAR_STYLE // The folded scalar style. +) + +type yaml_sequence_style_t yaml_style_t + +// Sequence styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SEQUENCE_STYLE yaml_sequence_style_t = iota + + yaml_BLOCK_SEQUENCE_STYLE // The block sequence style. + yaml_FLOW_SEQUENCE_STYLE // The flow sequence style. +) + +type yaml_mapping_style_t yaml_style_t + +// Mapping styles. +const ( + // Let the emitter choose the style. + yaml_ANY_MAPPING_STYLE yaml_mapping_style_t = iota + + yaml_BLOCK_MAPPING_STYLE // The block mapping style. + yaml_FLOW_MAPPING_STYLE // The flow mapping style. +) + +// Tokens + +type yaml_token_type_t int + +// Token types. +const ( + // An empty token. + yaml_NO_TOKEN yaml_token_type_t = iota + + yaml_STREAM_START_TOKEN // A STREAM-START token. + yaml_STREAM_END_TOKEN // A STREAM-END token. + + yaml_VERSION_DIRECTIVE_TOKEN // A VERSION-DIRECTIVE token. + yaml_TAG_DIRECTIVE_TOKEN // A TAG-DIRECTIVE token. + yaml_DOCUMENT_START_TOKEN // A DOCUMENT-START token. + yaml_DOCUMENT_END_TOKEN // A DOCUMENT-END token. + + yaml_BLOCK_SEQUENCE_START_TOKEN // A BLOCK-SEQUENCE-START token. + yaml_BLOCK_MAPPING_START_TOKEN // A BLOCK-SEQUENCE-END token. + yaml_BLOCK_END_TOKEN // A BLOCK-END token. + + yaml_FLOW_SEQUENCE_START_TOKEN // A FLOW-SEQUENCE-START token. + yaml_FLOW_SEQUENCE_END_TOKEN // A FLOW-SEQUENCE-END token. + yaml_FLOW_MAPPING_START_TOKEN // A FLOW-MAPPING-START token. + yaml_FLOW_MAPPING_END_TOKEN // A FLOW-MAPPING-END token. + + yaml_BLOCK_ENTRY_TOKEN // A BLOCK-ENTRY token. + yaml_FLOW_ENTRY_TOKEN // A FLOW-ENTRY token. + yaml_KEY_TOKEN // A KEY token. + yaml_VALUE_TOKEN // A VALUE token. + + yaml_ALIAS_TOKEN // An ALIAS token. + yaml_ANCHOR_TOKEN // An ANCHOR token. + yaml_TAG_TOKEN // A TAG token. + yaml_SCALAR_TOKEN // A SCALAR token. +) + +func (tt yaml_token_type_t) String() string { + switch tt { + case yaml_NO_TOKEN: + return "yaml_NO_TOKEN" + case yaml_STREAM_START_TOKEN: + return "yaml_STREAM_START_TOKEN" + case yaml_STREAM_END_TOKEN: + return "yaml_STREAM_END_TOKEN" + case yaml_VERSION_DIRECTIVE_TOKEN: + return "yaml_VERSION_DIRECTIVE_TOKEN" + case yaml_TAG_DIRECTIVE_TOKEN: + return "yaml_TAG_DIRECTIVE_TOKEN" + case yaml_DOCUMENT_START_TOKEN: + return "yaml_DOCUMENT_START_TOKEN" + case yaml_DOCUMENT_END_TOKEN: + return "yaml_DOCUMENT_END_TOKEN" + case yaml_BLOCK_SEQUENCE_START_TOKEN: + return "yaml_BLOCK_SEQUENCE_START_TOKEN" + case yaml_BLOCK_MAPPING_START_TOKEN: + return "yaml_BLOCK_MAPPING_START_TOKEN" + case yaml_BLOCK_END_TOKEN: + return "yaml_BLOCK_END_TOKEN" + case yaml_FLOW_SEQUENCE_START_TOKEN: + return "yaml_FLOW_SEQUENCE_START_TOKEN" + case yaml_FLOW_SEQUENCE_END_TOKEN: + return "yaml_FLOW_SEQUENCE_END_TOKEN" + case yaml_FLOW_MAPPING_START_TOKEN: + return "yaml_FLOW_MAPPING_START_TOKEN" + case yaml_FLOW_MAPPING_END_TOKEN: + return "yaml_FLOW_MAPPING_END_TOKEN" + case yaml_BLOCK_ENTRY_TOKEN: + return "yaml_BLOCK_ENTRY_TOKEN" + case yaml_FLOW_ENTRY_TOKEN: + return "yaml_FLOW_ENTRY_TOKEN" + case yaml_KEY_TOKEN: + return "yaml_KEY_TOKEN" + case yaml_VALUE_TOKEN: + return "yaml_VALUE_TOKEN" + case yaml_ALIAS_TOKEN: + return "yaml_ALIAS_TOKEN" + case yaml_ANCHOR_TOKEN: + return "yaml_ANCHOR_TOKEN" + case yaml_TAG_TOKEN: + return "yaml_TAG_TOKEN" + case yaml_SCALAR_TOKEN: + return "yaml_SCALAR_TOKEN" + } + return "" +} + +// The token structure. +type yaml_token_t struct { + // The token type. + typ yaml_token_type_t + + // The start/end of the token. + start_mark, end_mark yaml_mark_t + + // The stream encoding (for yaml_STREAM_START_TOKEN). + encoding yaml_encoding_t + + // The alias/anchor/scalar value or tag/tag directive handle + // (for yaml_ALIAS_TOKEN, yaml_ANCHOR_TOKEN, yaml_SCALAR_TOKEN, yaml_TAG_TOKEN, yaml_TAG_DIRECTIVE_TOKEN). + value []byte + + // The tag suffix (for yaml_TAG_TOKEN). + suffix []byte + + // The tag directive prefix (for yaml_TAG_DIRECTIVE_TOKEN). + prefix []byte + + // The scalar style (for yaml_SCALAR_TOKEN). + style yaml_scalar_style_t + + // The version directive major/minor (for yaml_VERSION_DIRECTIVE_TOKEN). + major, minor int8 +} + +// Events + +type yaml_event_type_t int8 + +// Event types. +const ( + // An empty event. + yaml_NO_EVENT yaml_event_type_t = iota + + yaml_STREAM_START_EVENT // A STREAM-START event. + yaml_STREAM_END_EVENT // A STREAM-END event. + yaml_DOCUMENT_START_EVENT // A DOCUMENT-START event. + yaml_DOCUMENT_END_EVENT // A DOCUMENT-END event. + yaml_ALIAS_EVENT // An ALIAS event. + yaml_SCALAR_EVENT // A SCALAR event. + yaml_SEQUENCE_START_EVENT // A SEQUENCE-START event. + yaml_SEQUENCE_END_EVENT // A SEQUENCE-END event. + yaml_MAPPING_START_EVENT // A MAPPING-START event. + yaml_MAPPING_END_EVENT // A MAPPING-END event. + yaml_TAIL_COMMENT_EVENT +) + +var eventStrings = []string{ + yaml_NO_EVENT: "none", + yaml_STREAM_START_EVENT: "stream start", + yaml_STREAM_END_EVENT: "stream end", + yaml_DOCUMENT_START_EVENT: "document start", + yaml_DOCUMENT_END_EVENT: "document end", + yaml_ALIAS_EVENT: "alias", + yaml_SCALAR_EVENT: "scalar", + yaml_SEQUENCE_START_EVENT: "sequence start", + yaml_SEQUENCE_END_EVENT: "sequence end", + yaml_MAPPING_START_EVENT: "mapping start", + yaml_MAPPING_END_EVENT: "mapping end", + yaml_TAIL_COMMENT_EVENT: "tail comment", +} + +func (e yaml_event_type_t) String() string { + if e < 0 || int(e) >= len(eventStrings) { + return fmt.Sprintf("unknown event %d", e) + } + return eventStrings[e] +} + +// The event structure. +type yaml_event_t struct { + + // The event type. + typ yaml_event_type_t + + // The start and end of the event. + start_mark, end_mark yaml_mark_t + + // The document encoding (for yaml_STREAM_START_EVENT). + encoding yaml_encoding_t + + // The version directive (for yaml_DOCUMENT_START_EVENT). + version_directive *yaml_version_directive_t + + // The list of tag directives (for yaml_DOCUMENT_START_EVENT). + tag_directives []yaml_tag_directive_t + + // The comments + head_comment []byte + line_comment []byte + foot_comment []byte + tail_comment []byte + + // The anchor (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_ALIAS_EVENT). + anchor []byte + + // The tag (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). + tag []byte + + // The scalar value (for yaml_SCALAR_EVENT). + value []byte + + // Is the document start/end indicator implicit, or the tag optional? + // (for yaml_DOCUMENT_START_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_SCALAR_EVENT). + implicit bool + + // Is the tag optional for any non-plain style? (for yaml_SCALAR_EVENT). + quoted_implicit bool + + // The style (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). + style yaml_style_t +} + +func (e *yaml_event_t) scalar_style() yaml_scalar_style_t { return yaml_scalar_style_t(e.style) } +func (e *yaml_event_t) sequence_style() yaml_sequence_style_t { return yaml_sequence_style_t(e.style) } +func (e *yaml_event_t) mapping_style() yaml_mapping_style_t { return yaml_mapping_style_t(e.style) } + +// Nodes + +const ( + yaml_NULL_TAG = "tag:yaml.org,2002:null" // The tag !!null with the only possible value: null. + yaml_BOOL_TAG = "tag:yaml.org,2002:bool" // The tag !!bool with the values: true and false. + yaml_STR_TAG = "tag:yaml.org,2002:str" // The tag !!str for string values. + yaml_INT_TAG = "tag:yaml.org,2002:int" // The tag !!int for integer values. + yaml_FLOAT_TAG = "tag:yaml.org,2002:float" // The tag !!float for float values. + yaml_TIMESTAMP_TAG = "tag:yaml.org,2002:timestamp" // The tag !!timestamp for date and time values. + + yaml_SEQ_TAG = "tag:yaml.org,2002:seq" // The tag !!seq is used to denote sequences. + yaml_MAP_TAG = "tag:yaml.org,2002:map" // The tag !!map is used to denote mapping. + + // Not in original libyaml. + yaml_BINARY_TAG = "tag:yaml.org,2002:binary" + yaml_MERGE_TAG = "tag:yaml.org,2002:merge" + + yaml_DEFAULT_SCALAR_TAG = yaml_STR_TAG // The default scalar tag is !!str. + yaml_DEFAULT_SEQUENCE_TAG = yaml_SEQ_TAG // The default sequence tag is !!seq. + yaml_DEFAULT_MAPPING_TAG = yaml_MAP_TAG // The default mapping tag is !!map. +) + +type yaml_node_type_t int + +// Node types. +const ( + // An empty node. + yaml_NO_NODE yaml_node_type_t = iota + + yaml_SCALAR_NODE // A scalar node. + yaml_SEQUENCE_NODE // A sequence node. + yaml_MAPPING_NODE // A mapping node. +) + +// An element of a sequence node. +type yaml_node_item_t int + +// An element of a mapping node. +type yaml_node_pair_t struct { + key int // The key of the element. + value int // The value of the element. +} + +// The node structure. +type yaml_node_t struct { + typ yaml_node_type_t // The node type. + tag []byte // The node tag. + + // The node data. + + // The scalar parameters (for yaml_SCALAR_NODE). + scalar struct { + value []byte // The scalar value. + length int // The length of the scalar value. + style yaml_scalar_style_t // The scalar style. + } + + // The sequence parameters (for YAML_SEQUENCE_NODE). + sequence struct { + items_data []yaml_node_item_t // The stack of sequence items. + style yaml_sequence_style_t // The sequence style. + } + + // The mapping parameters (for yaml_MAPPING_NODE). + mapping struct { + pairs_data []yaml_node_pair_t // The stack of mapping pairs (key, value). + pairs_start *yaml_node_pair_t // The beginning of the stack. + pairs_end *yaml_node_pair_t // The end of the stack. + pairs_top *yaml_node_pair_t // The top of the stack. + style yaml_mapping_style_t // The mapping style. + } + + start_mark yaml_mark_t // The beginning of the node. + end_mark yaml_mark_t // The end of the node. + +} + +// The document structure. +type yaml_document_t struct { + + // The document nodes. + nodes []yaml_node_t + + // The version directive. + version_directive *yaml_version_directive_t + + // The list of tag directives. + tag_directives_data []yaml_tag_directive_t + tag_directives_start int // The beginning of the tag directives list. + tag_directives_end int // The end of the tag directives list. + + start_implicit int // Is the document start indicator implicit? + end_implicit int // Is the document end indicator implicit? + + // The start/end of the document. + start_mark, end_mark yaml_mark_t +} + +// The prototype of a read handler. +// +// The read handler is called when the parser needs to read more bytes from the +// source. The handler should write not more than size bytes to the buffer. +// The number of written bytes should be set to the size_read variable. +// +// [in,out] data A pointer to an application data specified by +// yaml_parser_set_input(). +// [out] buffer The buffer to write the data from the source. +// [in] size The size of the buffer. +// [out] size_read The actual number of bytes read from the source. +// +// On success, the handler should return 1. If the handler failed, +// the returned value should be 0. On EOF, the handler should set the +// size_read to 0 and return 1. +type yaml_read_handler_t func(parser *yaml_parser_t, buffer []byte) (n int, err error) + +// This structure holds information about a potential simple key. +type yaml_simple_key_t struct { + possible bool // Is a simple key possible? + required bool // Is a simple key required? + token_number int // The number of the token. + mark yaml_mark_t // The position mark. +} + +// The states of the parser. +type yaml_parser_state_t int + +const ( + yaml_PARSE_STREAM_START_STATE yaml_parser_state_t = iota + + yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE // Expect the beginning of an implicit document. + yaml_PARSE_DOCUMENT_START_STATE // Expect DOCUMENT-START. + yaml_PARSE_DOCUMENT_CONTENT_STATE // Expect the content of a document. + yaml_PARSE_DOCUMENT_END_STATE // Expect DOCUMENT-END. + yaml_PARSE_BLOCK_NODE_STATE // Expect a block node. + yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE // Expect a block node or indentless sequence. + yaml_PARSE_FLOW_NODE_STATE // Expect a flow node. + yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a block sequence. + yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE // Expect an entry of a block sequence. + yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE // Expect an entry of an indentless sequence. + yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. + yaml_PARSE_BLOCK_MAPPING_KEY_STATE // Expect a block mapping key. + yaml_PARSE_BLOCK_MAPPING_VALUE_STATE // Expect a block mapping value. + yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE // Expect an entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE // Expect a key of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE // Expect the and of an ordered mapping entry. + yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. + yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE // Expect an empty value of a flow mapping. + yaml_PARSE_END_STATE // Expect nothing. +) + +func (ps yaml_parser_state_t) String() string { + switch ps { + case yaml_PARSE_STREAM_START_STATE: + return "yaml_PARSE_STREAM_START_STATE" + case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return "yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE" + case yaml_PARSE_DOCUMENT_START_STATE: + return "yaml_PARSE_DOCUMENT_START_STATE" + case yaml_PARSE_DOCUMENT_CONTENT_STATE: + return "yaml_PARSE_DOCUMENT_CONTENT_STATE" + case yaml_PARSE_DOCUMENT_END_STATE: + return "yaml_PARSE_DOCUMENT_END_STATE" + case yaml_PARSE_BLOCK_NODE_STATE: + return "yaml_PARSE_BLOCK_NODE_STATE" + case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return "yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE" + case yaml_PARSE_FLOW_NODE_STATE: + return "yaml_PARSE_FLOW_NODE_STATE" + case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return "yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE" + case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return "yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE" + case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: + return "yaml_PARSE_BLOCK_MAPPING_KEY_STATE" + case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: + return "yaml_PARSE_BLOCK_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE" + case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return "yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE" + case yaml_PARSE_FLOW_MAPPING_KEY_STATE: + return "yaml_PARSE_FLOW_MAPPING_KEY_STATE" + case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: + return "yaml_PARSE_FLOW_MAPPING_VALUE_STATE" + case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return "yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE" + case yaml_PARSE_END_STATE: + return "yaml_PARSE_END_STATE" + } + return "" +} + +// This structure holds aliases data. +type yaml_alias_data_t struct { + anchor []byte // The anchor. + index int // The node id. + mark yaml_mark_t // The anchor mark. +} + +// The parser structure. +// +// All members are internal. Manage the structure using the +// yaml_parser_ family of functions. +type yaml_parser_t struct { + + // Error handling + + error yaml_error_type_t // Error type. + + problem string // Error description. + + // The byte about which the problem occurred. + problem_offset int + problem_value int + problem_mark yaml_mark_t + + // The error context. + context string + context_mark yaml_mark_t + + // Reader stuff + + read_handler yaml_read_handler_t // Read handler. + + input_reader io.Reader // File input data. + input []byte // String input data. + input_pos int + + eof bool // EOF flag + + buffer []byte // The working buffer. + buffer_pos int // The current position of the buffer. + + unread int // The number of unread characters in the buffer. + + newlines int // The number of line breaks since last non-break/non-blank character + + raw_buffer []byte // The raw buffer. + raw_buffer_pos int // The current position of the buffer. + + encoding yaml_encoding_t // The input encoding. + + offset int // The offset of the current position (in bytes). + mark yaml_mark_t // The mark of the current position. + + // Comments + + head_comment []byte // The current head comments + line_comment []byte // The current line comments + foot_comment []byte // The current foot comments + tail_comment []byte // Foot comment that happens at the end of a block. + stem_comment []byte // Comment in item preceding a nested structure (list inside list item, etc) + + comments []yaml_comment_t // The folded comments for all parsed tokens + comments_head int + + // Scanner stuff + + stream_start_produced bool // Have we started to scan the input stream? + stream_end_produced bool // Have we reached the end of the input stream? + + flow_level int // The number of unclosed '[' and '{' indicators. + + tokens []yaml_token_t // The tokens queue. + tokens_head int // The head of the tokens queue. + tokens_parsed int // The number of tokens fetched from the queue. + token_available bool // Does the tokens queue contain a token ready for dequeueing. + + indent int // The current indentation level. + indents []int // The indentation levels stack. + + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number + + // Parser stuff + + state yaml_parser_state_t // The current parser state. + states []yaml_parser_state_t // The parser states stack. + marks []yaml_mark_t // The stack of marks. + tag_directives []yaml_tag_directive_t // The list of TAG directives. + + // Dumper stuff + + aliases []yaml_alias_data_t // The alias data. + + document *yaml_document_t // The currently parsed document. +} + +type yaml_comment_t struct { + + scan_mark yaml_mark_t // Position where scanning for comments started + token_mark yaml_mark_t // Position after which tokens will be associated with this comment + start_mark yaml_mark_t // Position of '#' comment mark + end_mark yaml_mark_t // Position where comment terminated + + head []byte + line []byte + foot []byte +} + +// Emitter Definitions + +// The prototype of a write handler. +// +// The write handler is called when the emitter needs to flush the accumulated +// characters to the output. The handler should write @a size bytes of the +// @a buffer to the output. +// +// @param[in,out] data A pointer to an application data specified by +// yaml_emitter_set_output(). +// @param[in] buffer The buffer with bytes to be written. +// @param[in] size The size of the buffer. +// +// @returns On success, the handler should return @c 1. If the handler failed, +// the returned value should be @c 0. +// +type yaml_write_handler_t func(emitter *yaml_emitter_t, buffer []byte) error + +type yaml_emitter_state_t int + +// The emitter states. +const ( + // Expect STREAM-START. + yaml_EMIT_STREAM_START_STATE yaml_emitter_state_t = iota + + yaml_EMIT_FIRST_DOCUMENT_START_STATE // Expect the first DOCUMENT-START or STREAM-END. + yaml_EMIT_DOCUMENT_START_STATE // Expect DOCUMENT-START or STREAM-END. + yaml_EMIT_DOCUMENT_CONTENT_STATE // Expect the content of a document. + yaml_EMIT_DOCUMENT_END_STATE // Expect DOCUMENT-END. + yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a flow sequence. + yaml_EMIT_FLOW_SEQUENCE_TRAIL_ITEM_STATE // Expect the next item of a flow sequence, with the comma already written out + yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE // Expect an item of a flow sequence. + yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_TRAIL_KEY_STATE // Expect the next key of a flow mapping, with the comma already written out + yaml_EMIT_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a flow mapping. + yaml_EMIT_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. + yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a block sequence. + yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE // Expect an item of a block sequence. + yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_KEY_STATE // Expect the key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a block mapping. + yaml_EMIT_BLOCK_MAPPING_VALUE_STATE // Expect a value of a block mapping. + yaml_EMIT_END_STATE // Expect nothing. +) + +// The emitter structure. +// +// All members are internal. Manage the structure using the @c yaml_emitter_ +// family of functions. +type yaml_emitter_t struct { + + // Error handling + + error yaml_error_type_t // Error type. + problem string // Error description. + + // Writer stuff + + write_handler yaml_write_handler_t // Write handler. + + output_buffer *[]byte // String output data. + output_writer io.Writer // File output data. + + buffer []byte // The working buffer. + buffer_pos int // The current position of the buffer. + + raw_buffer []byte // The raw buffer. + raw_buffer_pos int // The current position of the buffer. + + encoding yaml_encoding_t // The stream encoding. + + // Emitter stuff + + canonical bool // If the output is in the canonical style? + best_indent int // The number of indentation spaces. + best_width int // The preferred width of the output lines. + unicode bool // Allow unescaped non-ASCII characters? + line_break yaml_break_t // The preferred line break. + + state yaml_emitter_state_t // The current emitter state. + states []yaml_emitter_state_t // The stack of states. + + events []yaml_event_t // The event queue. + events_head int // The head of the event queue. + + indents []int // The stack of indentation levels. + + tag_directives []yaml_tag_directive_t // The list of tag directives. + + indent int // The current indentation level. + + flow_level int // The current flow level. + + root_context bool // Is it the document root context? + sequence_context bool // Is it a sequence context? + mapping_context bool // Is it a mapping context? + simple_key_context bool // Is it a simple mapping key context? + + line int // The current line. + column int // The current column. + whitespace bool // If the last character was a whitespace? + indention bool // If the last character was an indentation character (' ', '-', '?', ':')? + open_ended bool // If an explicit document end is required? + + space_above bool // Is there's an empty line above? + foot_indent int // The indent used to write the foot comment above, or -1 if none. + + // Anchor analysis. + anchor_data struct { + anchor []byte // The anchor value. + alias bool // Is it an alias? + } + + // Tag analysis. + tag_data struct { + handle []byte // The tag handle. + suffix []byte // The tag suffix. + } + + // Scalar analysis. + scalar_data struct { + value []byte // The scalar value. + multiline bool // Does the scalar contain line breaks? + flow_plain_allowed bool // Can the scalar be expessed in the flow plain style? + block_plain_allowed bool // Can the scalar be expressed in the block plain style? + single_quoted_allowed bool // Can the scalar be expressed in the single quoted style? + block_allowed bool // Can the scalar be expressed in the literal or folded styles? + style yaml_scalar_style_t // The output style. + } + + // Comments + head_comment []byte + line_comment []byte + foot_comment []byte + tail_comment []byte + + key_line_comment []byte + + // Dumper stuff + + opened bool // If the stream was already opened? + closed bool // If the stream was already closed? + + // The information associated with the document nodes. + anchors *struct { + references int // The number of references. + anchor int // The anchor id. + serialized bool // If the node has been emitted? + } + + last_anchor_id int // The last assigned anchor id. + + document *yaml_document_t // The currently emitted document. +} diff --git a/goyaml.v3/yamlprivateh.go b/goyaml.v3/yamlprivateh.go new file mode 100644 index 0000000..e88f9c5 --- /dev/null +++ b/goyaml.v3/yamlprivateh.go @@ -0,0 +1,198 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package yaml + +const ( + // The size of the input raw buffer. + input_raw_buffer_size = 512 + + // The size of the input buffer. + // It should be possible to decode the whole raw buffer. + input_buffer_size = input_raw_buffer_size * 3 + + // The size of the output buffer. + output_buffer_size = 128 + + // The size of the output raw buffer. + // It should be possible to encode the whole output buffer. + output_raw_buffer_size = (output_buffer_size*2 + 2) + + // The size of other stacks and queues. + initial_stack_size = 16 + initial_queue_size = 16 + initial_string_size = 16 +) + +// Check if the character at the specified position is an alphabetical +// character, a digit, '_', or '-'. +func is_alpha(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'Z' || b[i] >= 'a' && b[i] <= 'z' || b[i] == '_' || b[i] == '-' +} + +// Check if the character at the specified position is a digit. +func is_digit(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' +} + +// Get the value of a digit. +func as_digit(b []byte, i int) int { + return int(b[i]) - '0' +} + +// Check if the character at the specified position is a hex-digit. +func is_hex(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'F' || b[i] >= 'a' && b[i] <= 'f' +} + +// Get the value of a hex-digit. +func as_hex(b []byte, i int) int { + bi := b[i] + if bi >= 'A' && bi <= 'F' { + return int(bi) - 'A' + 10 + } + if bi >= 'a' && bi <= 'f' { + return int(bi) - 'a' + 10 + } + return int(bi) - '0' +} + +// Check if the character is ASCII. +func is_ascii(b []byte, i int) bool { + return b[i] <= 0x7F +} + +// Check if the character at the start of the buffer can be printed unescaped. +func is_printable(b []byte, i int) bool { + return ((b[i] == 0x0A) || // . == #x0A + (b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E + (b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF + (b[i] > 0xC2 && b[i] < 0xED) || + (b[i] == 0xED && b[i+1] < 0xA0) || + (b[i] == 0xEE) || + (b[i] == 0xEF && // #xE000 <= . <= #xFFFD + !(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF + !(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF)))) +} + +// Check if the character at the specified position is NUL. +func is_z(b []byte, i int) bool { + return b[i] == 0x00 +} + +// Check if the beginning of the buffer is a BOM. +func is_bom(b []byte, i int) bool { + return b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF +} + +// Check if the character at the specified position is space. +func is_space(b []byte, i int) bool { + return b[i] == ' ' +} + +// Check if the character at the specified position is tab. +func is_tab(b []byte, i int) bool { + return b[i] == '\t' +} + +// Check if the character at the specified position is blank (space or tab). +func is_blank(b []byte, i int) bool { + //return is_space(b, i) || is_tab(b, i) + return b[i] == ' ' || b[i] == '\t' +} + +// Check if the character at the specified position is a line break. +func is_break(b []byte, i int) bool { + return (b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9) // PS (#x2029) +} + +func is_crlf(b []byte, i int) bool { + return b[i] == '\r' && b[i+1] == '\n' +} + +// Check if the character is a line break or NUL. +func is_breakz(b []byte, i int) bool { + //return is_break(b, i) || is_z(b, i) + return ( + // is_break: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + // is_z: + b[i] == 0) +} + +// Check if the character is a line break, space, or NUL. +func is_spacez(b []byte, i int) bool { + //return is_space(b, i) || is_breakz(b, i) + return ( + // is_space: + b[i] == ' ' || + // is_breakz: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + b[i] == 0) +} + +// Check if the character is a line break, space, tab, or NUL. +func is_blankz(b []byte, i int) bool { + //return is_blank(b, i) || is_breakz(b, i) + return ( + // is_blank: + b[i] == ' ' || b[i] == '\t' || + // is_breakz: + b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) + b[i] == 0) +} + +// Determine the width of the character. +func width(b byte) int { + // Don't replace these by a switch without first + // confirming that it is being inlined. + if b&0x80 == 0x00 { + return 1 + } + if b&0xE0 == 0xC0 { + return 2 + } + if b&0xF0 == 0xE0 { + return 3 + } + if b&0xF8 == 0xF0 { + return 4 + } + return 0 + +}