From 1121584def2eed3a60c1b980331d52817f003fb2 Mon Sep 17 00:00:00 2001 From: Andrei Litvin Date: Fri, 14 Apr 2023 13:02:24 -0400 Subject: [PATCH] Add some ability for matter file parser to support doc comments. (#26059) * Start adding the ability to parse descriptions. In this case zapxml already contains description * Support doc comment parsing in matter files * Restyle * Fix tests for xml * Fix flake8 warnings * Zap regen --------- Co-authored-by: Andrei Litvin --- .../matter_idl/matter_grammar.lark | 4 +- .../matter_idl/matter_idl_parser.py | 109 ++++++++++++++---- .../matter_idl/matter_idl_types.py | 14 ++- .../matter_idl/test_matter_idl_parser.py | 35 +++++- .../matter_idl/test_xml_parser.py | 4 + .../matter_idl/zapxml/handlers/handlers.py | 21 +++- 6 files changed, 157 insertions(+), 30 deletions(-) diff --git a/scripts/py_matter_idl/matter_idl/matter_grammar.lark b/scripts/py_matter_idl/matter_idl/matter_grammar.lark index e31e5a5cc2b700..6116e7cbf08071 100644 --- a/scripts/py_matter_idl/matter_idl/matter_grammar.lark +++ b/scripts/py_matter_idl/matter_idl/matter_grammar.lark @@ -98,8 +98,8 @@ data_type: type ("<" positive_integer ">")? id: ID type: ID -POSITIVE_INTEGER: /\d+/ -HEX_INTEGER: /0x[A-Fa-f0-9]+/ +POSITIVE_INTEGER: /\d+/ +HEX_INTEGER: /0x[A-Fa-f0-9]+/ ID: /[a-zA-Z_][a-zA-Z0-9_]*/ idl: (struct|enum|cluster|endpoint)* diff --git a/scripts/py_matter_idl/matter_idl/matter_idl_parser.py b/scripts/py_matter_idl/matter_idl/matter_idl_parser.py index 0bbea23c1688bd..75043665c52b5f 100755 --- a/scripts/py_matter_idl/matter_idl/matter_idl_parser.py +++ b/scripts/py_matter_idl/matter_idl/matter_idl_parser.py @@ -4,6 +4,7 @@ import logging from lark import Lark +from lark.lexer import Token from lark.visitors import Transformer, v_args try: @@ -28,6 +29,40 @@ def UnionOfAllFlags(flags_list): return functools.reduce(lambda a, b: a | b, flags_list) +class PrefixCppDocComment: + def __init__(self, token): + self.start_pos = token.start_pos + # Assume CPP comments: /**...*/ + self.value_len = len(token.value) # includes /***/ AND whitespace + self.value = token.value[3:-2].strip() + + def appply_to_idl(self, idl: Idl, content: str): + if self.start_pos is None: + return + + actual_pos = self.start_pos + self.value_len + while content[actual_pos] in ' \t\n\r': + actual_pos += 1 + + # A doc comment will apply to any supported element assuming it immediately + # preceeds id (skipping whitespace) + for item in self.supported_types(idl): + if item.parse_meta and item.parse_meta.start_pos == actual_pos: + item.description = self.value + return + + def supported_types(self, idl: Idl): + """List all types supported by doc comments.""" + for cluster in idl.clusters: + yield cluster + + for command in cluster.commands: + yield command + + def __repr__(self): + return ("PREFIXDoc: %r at %r" % (self.value, self.start_pos)) + + class AddServerClusterToEndpointTransform: """Provides an 'apply' method that can be run on endpoints to add a server cluster to the given endpoint. @@ -86,11 +121,13 @@ class MatterIdlTransformer(Transformer): parsed input (as strings unless transformed) and interpret them. Actual parametes to the methods depend on the rules multiplicity and/or - optionality. + optionally. """ def __init__(self, skip_meta): self.skip_meta = skip_meta + self.doc_comments = [] + self._cluster_start_pos = None def positive_integer(self, tokens): """Numbers in the grammar are integers or hex numbers. @@ -218,10 +255,14 @@ def struct_field(self, args): field.qualities = UnionOfAllFlags(args[:-1]) or FieldQuality.NONE return field - def server_cluster(self, _): + @v_args(meta=True) + def server_cluster(self, meta, _): + self._cluster_start_pos = meta and meta.start_pos return ClusterSide.SERVER - def client_cluster(self, _): + @v_args(meta=True) + def client_cluster(self, meta, _): + self._cluster_start_pos = meta and meta.start_pos return ClusterSide.CLIENT def command_access(self, privilege): @@ -239,18 +280,24 @@ def command_with_access(self, args): return init_args - def command(self, args): + @v_args(meta=True) + def command(self, meta, args): # The command takes 4 arguments if no input argument, 5 if input # argument is provided if len(args) != 5: args.insert(2, None) - return Command( + meta = None if self.skip_meta else ParseMetaData(meta) + + cmd = Command( + parse_meta=meta, qualities=args[0], input_param=args[2], output_param=args[3], code=args[4], - **args[1] + **args[1], ) + return cmd + def event_access(self, privilege): return privilege[0] @@ -397,6 +444,10 @@ def endpoint_server_cluster(self, meta, id, *content): def cluster(self, meta, side, name, code, *content): meta = None if self.skip_meta else ParseMetaData(meta) + # shift actual starting position where the doc comment would start + if meta and self._cluster_start_pos: + meta.start_pos = self._cluster_start_pos + result = Cluster(parse_meta=meta, side=side, name=name, code=code) for item in content: @@ -434,16 +485,41 @@ def idl(self, items): return idl + def prefix_doc_comment(self): + print("TODO: prefix") + + # Processing of (potential-doc)-comments: + def c_comment(self, token: Token): + """Processes comments starting with "/*" """ + if token.value.startswith("/**"): + self.doc_comments.append(PrefixCppDocComment(token)) + class ParserWithLines: - def __init__(self, parser, skip_meta: bool): - self.parser = parser - self.skip_meta = skip_meta + def __init__(self, skip_meta: bool): + self.transformer = MatterIdlTransformer(skip_meta) + + # NOTE: LALR parser is fast. While Earley could parse more ambigous grammars, + # earley is much slower: + # - 0.39s LALR parsing of all-clusters-app.matter + # - 2.26s Earley parsing of the same thing. + # For this reason, every attempt should be made to make the grammar context free + self.parser = Lark.open( + 'matter_grammar.lark', rel_to=__file__, start='idl', parser='lalr', propagate_positions=True, + # separate callbacks to ignore from regular parsing (no tokens) + # while still getting notified about them + lexer_callbacks={ + 'C_COMMENT': self.transformer.c_comment, + } + ) - def parse(self, file, file_name: str = None): - idl = MatterIdlTransformer(self.skip_meta).transform( - self.parser.parse(file)) + def parse(self, file: str, file_name: str = None): + idl = self.transformer.transform(self.parser.parse(file)) idl.parse_file_name = file_name + + for comment in self.transformer.doc_comments: + comment.appply_to_idl(idl, file) + return idl @@ -451,14 +527,7 @@ def CreateParser(skip_meta: bool = False): """ Generates a parser that will process a ".matter" file into a IDL """ - - # NOTE: LALR parser is fast. While Earley could parse more ambigous grammars, - # earley is much slower: - # - 0.39s LALR parsing of all-clusters-app.matter - # - 2.26s Earley parsing of the same thing. - # For this reason, every attempt should be made to make the grammar context free - return ParserWithLines(Lark.open( - 'matter_grammar.lark', rel_to=__file__, start='idl', parser='lalr', propagate_positions=True), skip_meta) + return ParserWithLines(skip_meta) if __name__ == '__main__': diff --git a/scripts/py_matter_idl/matter_idl/matter_idl_types.py b/scripts/py_matter_idl/matter_idl/matter_idl_types.py index b4a47218a4d6d2..56f1e36028fcc4 100644 --- a/scripts/py_matter_idl/matter_idl/matter_idl_types.py +++ b/scripts/py_matter_idl/matter_idl/matter_idl_types.py @@ -9,16 +9,19 @@ # Helpful when referencing data items in logs when processing @dataclass class ParseMetaData: - line: int - column: int + line: Optional[int] + column: Optional[int] + start_pos: Optional[int] - def __init__(self, meta: Meta = None, line: int = None, column: int = None): + def __init__(self, meta: Meta = None, line: int = None, column: int = None, start_pos: int = None): if meta: self.line = meta.line self.column = meta.column + self.start_pos = meta.start_pos else: self.line = line self.column = column + self.start_pos = start_pos class StructQuality(enum.Flag): @@ -192,6 +195,10 @@ class Command: output_param: str qualities: CommandQuality = CommandQuality.NONE invokeacl: AccessPrivilege = AccessPrivilege.OPERATE + description: Optional[str] = None + + # Parsing meta data missing only when skip meta data is requested + parse_meta: Optional[ParseMetaData] = field(default=None) @property def is_timed_invoke(self): @@ -209,6 +216,7 @@ class Cluster: attributes: List[Attribute] = field(default_factory=list) structs: List[Struct] = field(default_factory=list) commands: List[Command] = field(default_factory=list) + description: Optional[str] = None # Parsing meta data missing only when skip meta data is requested parse_meta: Optional[ParseMetaData] = field(default=None) diff --git a/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py b/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py index 00b8bce8193510..b91cd58b3db1e1 100755 --- a/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py +++ b/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py @@ -33,8 +33,8 @@ import unittest -def parseText(txt): - return CreateParser(skip_meta=True).parse(txt) +def parseText(txt, skip_meta=True): + return CreateParser(skip_meta=skip_meta).parse(txt) class TestParser(unittest.TestCase): @@ -144,6 +144,33 @@ def test_cluster_attribute(self): )]) self.assertEqual(actual, expected) + def test_doc_comments(self): + actual = parseText(""" + /** Documentation for MyCluster */ + server cluster MyCluster = 0x321 { + } + + /** Documentation for MyCluster #2 */ + client cluster MyCluster = 0x321 { + /* NOT a doc comment */ + command WithoutArg(): DefaultSuccess = 123; + + /** Some command doc comment */ + command InOutStuff(InParam): OutParam = 222; + } + """, skip_meta=False) + + # meta_data may not match but is required for doc comments. Clean it up + + # Metadata parsing varies line/column, so only check doc comments + self.assertEqual( + actual.clusters[0].description, "Documentation for MyCluster") + self.assertEqual( + actual.clusters[1].description, "Documentation for MyCluster #2") + self.assertIsNone(actual.clusters[1].commands[0].description) + self.assertEqual( + actual.clusters[1].commands[1].description, "Some command doc comment") + def test_sized_attribute(self): actual = parseText(""" server cluster MyCluster = 1 { @@ -438,9 +465,9 @@ def test_parsing_metadata_for_cluster(self): """) expected = Idl(clusters=[ - Cluster(parse_meta=ParseMetaData(line=2, column=1), + Cluster(parse_meta=ParseMetaData(line=2, column=1, start_pos=1), side=ClusterSide.SERVER, name="A", code=1), - Cluster(parse_meta=ParseMetaData(line=5, column=4), + Cluster(parse_meta=ParseMetaData(line=5, column=4, start_pos=87), side=ClusterSide.CLIENT, name="B", code=2), ]) self.assertEqual(actual, expected) diff --git a/scripts/py_matter_idl/matter_idl/test_xml_parser.py b/scripts/py_matter_idl/matter_idl/test_xml_parser.py index 98f36c08f57e84..d3ff819b37d9c7 100755 --- a/scripts/py_matter_idl/matter_idl/test_xml_parser.py +++ b/scripts/py_matter_idl/matter_idl/test_xml_parser.py @@ -60,6 +60,7 @@ def testCluster(self): Test 0x1234 + Test SomeIntAttribute @@ -92,6 +93,7 @@ def testCluster(self): side=ClusterSide.CLIENT, name='Test', code=0x1234, + description="Test", attributes=[ Attribute(definition=Field( data_type=DataType(name='INT32U'), @@ -130,6 +132,7 @@ def testCluster(self): commands=[ Command(name='GetSomeData', code=33, input_param='GetSomeDataRequest', output_param='GetSomeDataResponse', + description='This is just a test: client to server', invokeacl=AccessPrivilege.ADMINISTER) ]) ])) @@ -307,6 +310,7 @@ def testSkipsNotProcessedFields(self): self.assertEqual(idl, Idl(clusters=[ Cluster(side=ClusterSide.CLIENT, name='WindowCovering', code=0x102, + description='Provides an interface for controlling and adjusting automatic window coverings. ', structs=[], attributes=[ Attribute( diff --git a/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py b/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py index 1bb9cff8856853..8d55fc531609f1 100644 --- a/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py +++ b/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import Any from matter_idl.matter_idl_types import (Attribute, Bitmap, Cluster, ClusterSide, Command, CommandQuality, ConstantEntry, DataType, Enum, Event, EventPriority, EventQuality, Field, FieldQuality, Idl, Struct, StructQuality, @@ -339,6 +340,20 @@ def EndProcessing(self): self.context.AddIdlPostProcessor(self) +class DescriptionHandler(BaseHandler): + """Handles .../description text elements + + Attaches a "description" attribute to a given structure + """ + + def __init__(self, context: Context, target: Any): + super().__init__(context) + self.target = target + + def HandleContent(self, content): + self.target.description = content + + class CommandHandler(BaseHandler): """Handles /configurator/cluster/command elements.""" @@ -421,6 +436,8 @@ def GetNextProcessor(self, name: str, attrs): self._struct.fields.append(self.GetArgumentField(attrs)) return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG) elif name.lower() == 'description': + if self._command: + return DescriptionHandler(self.context, self._command) return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE) else: return BaseHandler(self.context) @@ -496,7 +513,9 @@ def GetNextProcessor(self, name: str, attrs): return ClusterGlobalAttributeHandler(self.context, self._cluster, ParseInt(attrs['code'])) elif name.lower() == 'command': return CommandHandler(self.context, self._cluster, attrs) - elif name.lower() in ['define', 'description', 'domain', 'tag', 'client', 'server']: + elif name.lower() == 'description': + return DescriptionHandler(self.context, self._cluster) + elif name.lower() in ['define', 'domain', 'tag', 'client', 'server']: # NOTE: we COULD use client and server to create separate definitions # of each, but the usefulness of this is unclear as the definitions are # likely identical and matter has no concept of differences between the two