Add some ability for matter file parser to support doc comments. (#26059

) * Start adding the ability to parse descriptions. In this case zapxml already contains description * Support doc comment parsing in matter files * Restyle * Fix tests for xml * Fix flake8 warnings * Zap regen --------- Co-authored-by: Andrei Litvin <[email protected]>
project-chip · Feb 6, 2024 · 1378720 · 1378720
1 parent 183993c
commit 1378720
Show file tree

Hide file tree

Showing 6 changed files with 157 additions and 30 deletions.
diff --git a/scripts/py_matter_idl/matter_idl/matter_grammar.lark b/scripts/py_matter_idl/matter_idl/matter_grammar.lark
@@ -98,8 +98,8 @@ data_type: type ("<" positive_integer ">")?
 id: ID
 type: ID
 
-POSITIVE_INTEGER: /\d+/ 
-HEX_INTEGER: /0x[A-Fa-f0-9]+/ 
+POSITIVE_INTEGER: /\d+/
+HEX_INTEGER: /0x[A-Fa-f0-9]+/
 ID: /[a-zA-Z_][a-zA-Z0-9_]*/
 
 idl: (struct|enum|cluster|endpoint)*

diff --git a/scripts/py_matter_idl/matter_idl/matter_idl_parser.py b/scripts/py_matter_idl/matter_idl/matter_idl_parser.py
@@ -4,6 +4,7 @@
 import logging
 
 from lark import Lark
+from lark.lexer import Token
 from lark.visitors import Transformer, v_args
 
 try:
@@ -28,6 +29,40 @@ def UnionOfAllFlags(flags_list):
     return functools.reduce(lambda a, b: a | b, flags_list)
 
 
+class PrefixCppDocComment:
+    def __init__(self, token):
+        self.start_pos = token.start_pos
+        # Assume CPP comments: /**...*/
+        self.value_len = len(token.value)  # includes /***/ AND whitespace
+        self.value = token.value[3:-2].strip()
+
+    def appply_to_idl(self, idl: Idl, content: str):
+        if self.start_pos is None:
+            return
+
+        actual_pos = self.start_pos + self.value_len
+        while content[actual_pos] in ' \t\n\r':
+            actual_pos += 1
+
+        # A doc comment will apply to any supported element assuming it immediately
+        # preceeds id (skipping whitespace)
+        for item in self.supported_types(idl):
+            if item.parse_meta and item.parse_meta.start_pos == actual_pos:
+                item.description = self.value
+                return
+
+    def supported_types(self, idl: Idl):
+        """List all types supported by doc comments."""
+        for cluster in idl.clusters:
+            yield cluster
+
+            for command in cluster.commands:
+                yield command
+
+    def __repr__(self):
+        return ("PREFIXDoc: %r at %r" % (self.value, self.start_pos))
+
+
 class AddServerClusterToEndpointTransform:
     """Provides an 'apply' method that can be run on endpoints
        to add a server cluster to the given endpoint.
@@ -86,11 +121,13 @@ class MatterIdlTransformer(Transformer):
       parsed input (as strings unless transformed) and interpret them.
 
       Actual parametes to the methods depend on the rules multiplicity and/or
-      optionality.
+      optionally.
     """
 
     def __init__(self, skip_meta):
         self.skip_meta = skip_meta
+        self.doc_comments = []
+        self._cluster_start_pos = None
 
     def positive_integer(self, tokens):
         """Numbers in the grammar are integers or hex numbers.
@@ -218,10 +255,14 @@ def struct_field(self, args):
         field.qualities = UnionOfAllFlags(args[:-1]) or FieldQuality.NONE
         return field
 
-    def server_cluster(self, _):
+    @v_args(meta=True)
+    def server_cluster(self, meta, _):
+        self._cluster_start_pos = meta and meta.start_pos
         return ClusterSide.SERVER
 
-    def client_cluster(self, _):
+    @v_args(meta=True)
+    def client_cluster(self, meta, _):
+        self._cluster_start_pos = meta and meta.start_pos
         return ClusterSide.CLIENT
 
     def command_access(self, privilege):
@@ -239,18 +280,24 @@ def command_with_access(self, args):
 
         return init_args
 
-    def command(self, args):
+    @v_args(meta=True)
+    def command(self, meta, args):
         # The command takes 4 arguments if no input argument, 5 if input
         # argument is provided
         if len(args) != 5:
             args.insert(2, None)
 
-        return Command(
+        meta = None if self.skip_meta else ParseMetaData(meta)
+
+        cmd = Command(
+            parse_meta=meta,
             qualities=args[0],
             input_param=args[2], output_param=args[3], code=args[4],
-            **args[1]
+            **args[1],
         )
 
+        return cmd
+
     def event_access(self, privilege):
         return privilege[0]
 
@@ -397,6 +444,10 @@ def endpoint_server_cluster(self, meta, id, *content):
     def cluster(self, meta, side, name, code, *content):
         meta = None if self.skip_meta else ParseMetaData(meta)
 
+        # shift actual starting position where the doc comment would start
+        if meta and self._cluster_start_pos:
+            meta.start_pos = self._cluster_start_pos
+
         result = Cluster(parse_meta=meta, side=side, name=name, code=code)
 
         for item in content:
@@ -434,31 +485,49 @@ def idl(self, items):
 
         return idl
 
+    def prefix_doc_comment(self):
+        print("TODO: prefix")
+
+    # Processing of (potential-doc)-comments:
+    def c_comment(self, token: Token):
+        """Processes comments starting with "/*" """
+        if token.value.startswith("/**"):
+            self.doc_comments.append(PrefixCppDocComment(token))
+
 
 class ParserWithLines:
-    def __init__(self, parser, skip_meta: bool):
-        self.parser = parser
-        self.skip_meta = skip_meta
+    def __init__(self, skip_meta: bool):
+        self.transformer = MatterIdlTransformer(skip_meta)
+
+        # NOTE: LALR parser is fast. While Earley could parse more ambigous grammars,
+        #       earley is much slower:
+        #    - 0.39s LALR parsing of all-clusters-app.matter
+        #    - 2.26s Earley parsing of the same thing.
+        # For this reason, every attempt should be made to make the grammar context free
+        self.parser = Lark.open(
+            'matter_grammar.lark', rel_to=__file__, start='idl', parser='lalr', propagate_positions=True,
+            # separate callbacks to ignore from regular parsing (no tokens)
+            # while still getting notified about them
+            lexer_callbacks={
+                'C_COMMENT': self.transformer.c_comment,
+            }
+        )
 
-    def parse(self, file, file_name: str = None):
-        idl = MatterIdlTransformer(self.skip_meta).transform(
-            self.parser.parse(file))
+    def parse(self, file: str, file_name: str = None):
+        idl = self.transformer.transform(self.parser.parse(file))
         idl.parse_file_name = file_name
+
+        for comment in self.transformer.doc_comments:
+            comment.appply_to_idl(idl, file)
+
         return idl
 
 
 def CreateParser(skip_meta: bool = False):
     """
     Generates a parser that will process a ".matter" file into a IDL
     """
-
-    # NOTE: LALR parser is fast. While Earley could parse more ambigous grammars,
-    #       earley is much slower:
-    #    - 0.39s LALR parsing of all-clusters-app.matter
-    #    - 2.26s Earley parsing of the same thing.
-    # For this reason, every attempt should be made to make the grammar context free
-    return ParserWithLines(Lark.open(
-        'matter_grammar.lark', rel_to=__file__, start='idl', parser='lalr', propagate_positions=True), skip_meta)
+    return ParserWithLines(skip_meta)
 
 
 if __name__ == '__main__':

diff --git a/scripts/py_matter_idl/matter_idl/matter_idl_types.py b/scripts/py_matter_idl/matter_idl/matter_idl_types.py
@@ -9,16 +9,19 @@
 # Helpful when referencing data items in logs when processing
 @dataclass
 class ParseMetaData:
-    line: int
-    column: int
+    line: Optional[int]
+    column: Optional[int]
+    start_pos: Optional[int]
 
-    def __init__(self, meta: Meta = None, line: int = None, column: int = None):
+    def __init__(self, meta: Meta = None, line: int = None, column: int = None, start_pos: int = None):
         if meta:
             self.line = meta.line
             self.column = meta.column
+            self.start_pos = meta.start_pos
         else:
             self.line = line
             self.column = column
+            self.start_pos = start_pos
 
 
 class StructQuality(enum.Flag):
@@ -192,6 +195,10 @@ class Command:
     output_param: str
     qualities: CommandQuality = CommandQuality.NONE
     invokeacl: AccessPrivilege = AccessPrivilege.OPERATE
+    description: Optional[str] = None
+
+    # Parsing meta data missing only when skip meta data is requested
+    parse_meta: Optional[ParseMetaData] = field(default=None)
 
     @property
     def is_timed_invoke(self):
@@ -209,6 +216,7 @@ class Cluster:
     attributes: List[Attribute] = field(default_factory=list)
     structs: List[Struct] = field(default_factory=list)
     commands: List[Command] = field(default_factory=list)
+    description: Optional[str] = None
 
     # Parsing meta data missing only when skip meta data is requested
     parse_meta: Optional[ParseMetaData] = field(default=None)

diff --git a/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py b/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py
@@ -33,8 +33,8 @@
 import unittest
 
 
-def parseText(txt):
-    return CreateParser(skip_meta=True).parse(txt)
+def parseText(txt, skip_meta=True):
+    return CreateParser(skip_meta=skip_meta).parse(txt)
 
 
 class TestParser(unittest.TestCase):
@@ -144,6 +144,33 @@ def test_cluster_attribute(self):
                     )])
         self.assertEqual(actual, expected)
 
+    def test_doc_comments(self):
+        actual = parseText("""
+            /** Documentation for MyCluster */
+            server cluster MyCluster = 0x321 {
+            }
+
+            /** Documentation for MyCluster #2 */
+            client cluster MyCluster = 0x321 {
+                /* NOT a doc comment */
+                command WithoutArg(): DefaultSuccess = 123;
+
+                /** Some command doc comment */
+                command InOutStuff(InParam): OutParam = 222;
+            }
+        """, skip_meta=False)
+
+        # meta_data may not match but is required for doc comments. Clean it up
+
+        # Metadata parsing varies line/column, so only check doc comments
+        self.assertEqual(
+            actual.clusters[0].description, "Documentation for MyCluster")
+        self.assertEqual(
+            actual.clusters[1].description, "Documentation for MyCluster #2")
+        self.assertIsNone(actual.clusters[1].commands[0].description)
+        self.assertEqual(
+            actual.clusters[1].commands[1].description, "Some command doc comment")
+
     def test_sized_attribute(self):
         actual = parseText("""
             server cluster MyCluster = 1 {
@@ -438,9 +465,9 @@ def test_parsing_metadata_for_cluster(self):
         """)
 
         expected = Idl(clusters=[
-            Cluster(parse_meta=ParseMetaData(line=2, column=1),
+            Cluster(parse_meta=ParseMetaData(line=2, column=1, start_pos=1),
                     side=ClusterSide.SERVER, name="A", code=1),
-            Cluster(parse_meta=ParseMetaData(line=5, column=4),
+            Cluster(parse_meta=ParseMetaData(line=5, column=4, start_pos=87),
                     side=ClusterSide.CLIENT, name="B", code=2),
         ])
         self.assertEqual(actual, expected)

diff --git a/scripts/py_matter_idl/matter_idl/test_xml_parser.py b/scripts/py_matter_idl/matter_idl/test_xml_parser.py
@@ -60,6 +60,7 @@ def testCluster(self):
               <cluster>
                 <name>Test</name>
                 <code>0x1234</code>
+                <description>Test</description>
 
                 <attribute side="server" code="11" type="INT32U" min="0" max="2" isNullable="true" \
                     reportable="true" writable="false">SomeIntAttribute</attribute>
@@ -92,6 +93,7 @@ def testCluster(self):
                                  side=ClusterSide.CLIENT,
                                  name='Test',
                                  code=0x1234,
+                                 description="Test",
                                  attributes=[
                                      Attribute(definition=Field(
                                          data_type=DataType(name='INT32U'),
@@ -130,6 +132,7 @@ def testCluster(self):
                                  commands=[
                                      Command(name='GetSomeData', code=33,
                                              input_param='GetSomeDataRequest', output_param='GetSomeDataResponse',
+                                             description='This is just a test: client to server',
                                              invokeacl=AccessPrivilege.ADMINISTER)
                                  ])
                          ]))
@@ -307,6 +310,7 @@ def testSkipsNotProcessedFields(self):
         self.assertEqual(idl,
                          Idl(clusters=[
                              Cluster(side=ClusterSide.CLIENT, name='WindowCovering', code=0x102,
+                                     description='Provides an interface for controlling and adjusting automatic window coverings. ',
                                      structs=[],
                                      attributes=[
                                          Attribute(

diff --git a/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py b/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+from typing import Any
 
 from matter_idl.matter_idl_types import (Attribute, Bitmap, Cluster, ClusterSide, Command, CommandQuality, ConstantEntry, DataType,
                                          Enum, Event, EventPriority, EventQuality, Field, FieldQuality, Idl, Struct, StructQuality,
@@ -339,6 +340,20 @@ def EndProcessing(self):
         self.context.AddIdlPostProcessor(self)
 
 
+class DescriptionHandler(BaseHandler):
+    """Handles .../description text elements
+
+       Attaches a "description" attribute to a given structure
+    """
+
+    def __init__(self, context: Context, target: Any):
+        super().__init__(context)
+        self.target = target
+
+    def HandleContent(self, content):
+        self.target.description = content
+
+
 class CommandHandler(BaseHandler):
     """Handles /configurator/cluster/command elements."""
 
@@ -421,6 +436,8 @@ def GetNextProcessor(self, name: str, attrs):
             self._struct.fields.append(self.GetArgumentField(attrs))
             return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
         elif name.lower() == 'description':
+            if self._command:
+                return DescriptionHandler(self.context, self._command)
             return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
         else:
             return BaseHandler(self.context)
@@ -496,7 +513,9 @@ def GetNextProcessor(self, name: str, attrs):
             return ClusterGlobalAttributeHandler(self.context, self._cluster, ParseInt(attrs['code']))
         elif name.lower() == 'command':
             return CommandHandler(self.context, self._cluster, attrs)
-        elif name.lower() in ['define', 'description', 'domain', 'tag', 'client', 'server']:
+        elif name.lower() == 'description':
+            return DescriptionHandler(self.context, self._cluster)
+        elif name.lower() in ['define', 'domain', 'tag', 'client', 'server']:
             # NOTE: we COULD use client and server to create separate definitions
             #       of each, but the usefulness of this is unclear as the definitions are
             #       likely identical and matter has no concept of differences between the two