From 1731084a268ebd859915a6390f561bf269764eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Eustace?= Date: Tue, 28 Jul 2020 10:03:15 +0200 Subject: [PATCH] Fix TOML 1.0.0 compliance --- tests/test_api.py | 3 +- tomlkit/_utils.py | 14 ++++++ tomlkit/container.py | 37 +++++++++++++-- tomlkit/exceptions.py | 17 +++++++ tomlkit/items.py | 3 ++ tomlkit/parser.py | 107 ++++++++++++++++++++++++++++++++++-------- 6 files changed, 156 insertions(+), 25 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 0553cf38..9bd9c2e9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -13,6 +13,7 @@ from tomlkit import parse from tomlkit.exceptions import EmptyKeyError from tomlkit.exceptions import InvalidCharInStringError +from tomlkit.exceptions import InvalidControlChar from tomlkit.exceptions import InvalidDateError from tomlkit.exceptions import InvalidDateTimeError from tomlkit.exceptions import InvalidNumberError @@ -83,7 +84,7 @@ def test_parsed_document_are_properly_json_representable( ("invalid_time", InvalidTimeError), ("invalid_datetime", InvalidDateTimeError), ("trailing_comma", UnexpectedCharError), - ("newline_in_singleline_string", InvalidCharInStringError), + ("newline_in_singleline_string", InvalidControlChar), ("string_slash_whitespace_char", InvalidCharInStringError), ("array_no_comma", UnexpectedCharError), ("array_duplicate_comma", UnexpectedCharError), diff --git a/tomlkit/_utils.py b/tomlkit/_utils.py index f1eee0f7..2ae3e424 100644 --- a/tomlkit/_utils.py +++ b/tomlkit/_utils.py @@ -10,6 +10,12 @@ from ._compat import timezone +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping + + RFC_3339_LOOSE = re.compile( "^" r"(([0-9]+)-(\d{2})-(\d{2}))?" # Date @@ -128,3 +134,11 @@ def flush(): flush() return "".join(res) + + +def merge_dicts(d1, d2): + for k, v in d2.items(): + if k in d1 and isinstance(d1[k], dict) and isinstance(d2[k], Mapping): + merge_dicts(d1[k], d2[k]) + else: + d1[k] = d2[k] diff --git a/tomlkit/container.py b/tomlkit/container.py index 0da2761f..6386e738 100644 --- a/tomlkit/container.py +++ b/tomlkit/container.py @@ -11,8 +11,11 @@ from typing import Union from ._compat import decode +from ._utils import merge_dicts from .exceptions import KeyAlreadyPresent from .exceptions import NonExistentKey +from .exceptions import ParseError +from .exceptions import TOMLKitError from .items import AoT from .items import Comment from .items import Item @@ -44,13 +47,20 @@ def body(self): # type: () -> List[Tuple[Optional[Key], Item]] @property def value(self): # type: () -> Dict[Any, Any] d = {} - for k in self.keys(): - v = self[k] + for k, v in self._body: + if k is None: + continue + + k = k.key + v = v.value - if isinstance(v, (Container, OutOfOrderTableProxy)): + if isinstance(v, Container): v = v.value - d[k] = v + if k in d: + merge_dicts(d[k], v) + else: + d[k] = v return d @@ -127,12 +137,22 @@ def append(self, key, item): # type: (Union[Key, str, None], Item) -> Container else: current.append(item) + return self + elif current.is_aot(): + if not item.is_aot_element(): + # Tried to define a table after an AoT with the same name. + raise KeyAlreadyPresent(key) + + current.append(item) + return self elif current.is_super_table(): if item.is_super_table(): + # We need to merge both super tables if ( self._table_keys[-1] != current_body_element[0] or key.is_dotted() + or current_body_element[0].is_dotted() ): if not isinstance(current_idx, tuple): current_idx = (current_idx,) @@ -141,16 +161,22 @@ def append(self, key, item): # type: (Union[Key, str, None], Item) -> Container self._body.append((key, item)) self._table_keys.append(key) + # Building a temporary proxy to check for errors + OutOfOrderTableProxy(self, self._map[key]) + return self for k, v in item.value.body: current.append(k, v) return self + elif current_body_element[0].is_dotted(): + raise TOMLKitError("Redefinition of an existing table") elif not item.is_super_table(): raise KeyAlreadyPresent(key) elif isinstance(item, AoT): if not isinstance(current, AoT): + # Tried to define an AoT after a table with the same name. raise KeyAlreadyPresent(key) for table in item.body: @@ -626,6 +652,9 @@ def _replace_at( def __str__(self): # type: () -> str return str(self.value) + def __repr__(self): # type: () -> str + return super(Container, self).__repr__() + def __eq__(self, other): # type: (Dict) -> bool if not isinstance(other, dict): return NotImplemented diff --git a/tomlkit/exceptions.py b/tomlkit/exceptions.py index 4fbc667b..44836363 100644 --- a/tomlkit/exceptions.py +++ b/tomlkit/exceptions.py @@ -202,3 +202,20 @@ def __init__(self, key): message = 'Key "{}" already exists.'.format(key) super(KeyAlreadyPresent, self).__init__(message) + + +class InvalidControlChar(ParseError): + def __init__(self, line, col, char, type): # type: (int, int, int, str) -> None + display_code = "\\u00" + + if char < 16: + display_code += "0" + + display_code += str(char) + + message = ( + "Control characters (codes less than 0x1f and 0x7f) are not allowed in {}, " + "use {} instead".format(type, display_code) + ) + + super(InvalidControlChar, self).__init__(line, col, message=message) diff --git a/tomlkit/items.py b/tomlkit/items.py index 59e8267f..8218f9cd 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -232,6 +232,9 @@ def delimiter(self): # type: () -> str def is_dotted(self): # type: () -> bool return self._dotted + def is_bare(self): # type: () -> bool + return self.t == KeyType.Bare + def as_string(self): # type: () -> str return "{}{}{}".format(self.delimiter, self._original, self.delimiter) diff --git a/tomlkit/parser.py b/tomlkit/parser.py index bdf98b31..49929954 100644 --- a/tomlkit/parser.py +++ b/tomlkit/parser.py @@ -21,6 +21,7 @@ from .exceptions import EmptyTableNameError from .exceptions import InternalParserError from .exceptions import InvalidCharInStringError +from .exceptions import InvalidControlChar from .exceptions import InvalidDateError from .exceptions import InvalidDateTimeError from .exceptions import InvalidNumberError @@ -54,6 +55,13 @@ from .toml_document import TOMLDocument +CTRL_I = 0x09 # Tab +CTRL_J = 0x0A # Line feed +CTRL_M = 0x0D # Carriage return +CTRL_CHAR_LIMIT = 0x1F +CHR_DEL = 0x7F + + class Parser: """ Parser for TOML documents. @@ -322,8 +330,13 @@ def _parse_comment_trail(self): # type: () -> Tuple[str, str, str] self.inc() # Skip # # The comment itself - while not self.end() and not self._current.is_nl() and self.inc(): - pass + while not self.end() and not self._current.is_nl(): + code = ord(self._current) + if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I: + raise self.parse_error(InvalidControlChar, code, "comments") + + if not self.inc(): + break comment = self.extract() self.mark() @@ -456,6 +469,13 @@ def _parse_bare_key(self): # type: () -> Key original = self.extract() key = original.strip() + if not key: + # Empty key + raise self.parse_error(ParseError, "Empty key found") + + if " " in key: + # Bare key with spaces in it + raise self.parse_error(ParseError, 'Invalid key "{}"'.format(key)) if self._current == ".": self.inc() @@ -473,7 +493,33 @@ def _handle_dotted_key( name = names[0] name._dotted = True if name in container: - table = container[name] + if not isinstance(value, Table): + table = Table(Container(True), Trivia(), False, is_super_table=True) + _table = table + for i, _name in enumerate(names[1:]): + if i == len(names) - 2: + _name.sep = key.sep + + _table.append(_name, value) + else: + _name._dotted = True + _table.append( + _name, + Table( + Container(True), + Trivia(), + False, + is_super_table=i < len(names) - 2, + ), + ) + + _table = _table[_name] + + value = table + + container.append(name, value) + + return else: table = Table(Container(True), Trivia(), False, is_super_table=True) if isinstance(container, Table): @@ -489,7 +535,7 @@ def _handle_dotted_key( else: _name._dotted = True if _name in table.value: - table = table.value.item(_name) + table = table.value[_name] else: table.append( _name, @@ -718,11 +764,18 @@ def _parse_inline_table(self): # type: () -> InlineTable # consume closing bracket, EOF here doesn't matter self.inc() break - if trailing_comma is False: + + if ( + trailing_comma is False + or trailing_comma is None + and self._current == "," + ): + # Either the previous key-value pair was not followed by a comma + # or the table has an unexpected leading comma. raise self.parse_error(UnexpectedCharError, self._current) else: # True: previous key-value pair was followed by a comma - if self._current == "}": + if self._current == "}" or self._current == ",": raise self.parse_error(UnexpectedCharError, self._current) key, val = self._parse_key_value(False) @@ -879,9 +932,23 @@ def _parse_string(self, delim): # type: (StringType) -> String escaped = False # whether the previous key was ESCAPE while True: - if delim.is_singleline() and self._current.is_nl(): - # single line cannot have actual newline characters - raise self.parse_error(InvalidCharInStringError, self._current) + code = ord(self._current) + if ( + delim.is_singleline() + and not escaped + and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I) + ): + raise self.parse_error(InvalidControlChar, code, "strings") + elif ( + delim.is_multiline() + and not escaped + and ( + code == CHR_DEL + or code <= CTRL_CHAR_LIMIT + and code not in [CTRL_I, CTRL_J, CTRL_M] + ) + ): + raise self.parse_error(InvalidControlChar, code, "strings") elif not escaped and self._current == delim.unit: # try to process current as a closing delim original = self.extract() @@ -1007,6 +1074,9 @@ def _parse_table( key = Key(name, sep="") name_parts = tuple(self._split_table_name(name)) + if any(" " in part.key.strip() and part.is_bare() for part in name_parts): + raise self.parse_error(ParseError, 'Invalid table name "{}"'.format(name)) + missing_table = False if parent_name: parent_name_parts = tuple(self._split_table_name(parent_name)) @@ -1043,16 +1113,13 @@ def _parse_table( # without initializing [foo] # # So we have to create the parent tables - if parent and name_parts[0] in parent: - table = parent[name_parts[0]] - else: - table = Table( - Container(True), - Trivia(indent, cws, comment, trail), - is_aot and name_parts[0].key in self._aot_stack, - is_super_table=True, - name=name_parts[0].key, - ) + table = Table( + Container(True), + Trivia(indent, cws, comment, trail), + is_aot and name_parts[0].key in self._aot_stack, + is_super_table=True, + name=name_parts[0].key, + ) result = table key = name_parts[0] @@ -1226,7 +1293,7 @@ def _peek_unicode( try: value = chr(int(extracted, 16)) - except ValueError: + except (ValueError, OverflowError): value = None return value, extracted