chore: An image for parsing repo spec compliance. (#254)

``` $ docker build -t specfinder . $ docker run --mount src=/path/tojava-sdk/,target=/appdir,type=bind -it specfinder \ spec_finder.py --code-directory /appdir --diff --json-report ``` This will generate `java-report.json` in the java-sdk folder which we can use for other automation. The report looks like this: ```json { "extra": [ "1.1.8", "2.4.2", "2.4.3", "2.4.4", "2.4.5", "4.1.4" ], "missing": [ "1.1.2.4", "1.7.1", "1.7.2.1", "1.7.3", "1.7.4", "1.7.5", "1.7.6", "1.7.7", "1.7.8", "2.4.2.1", "2.5.2", "3.2.2.3", "3.2.2.4", "3.2.4.2", "4.1.4.1", "5.1.5", "5.3.4.1", "5.3.4.2", "5.3.4.3", "5.3.5" ], "different-text": [ "1.1.3", "1.1.6", "1.2.2", "2.4.1", "2.6.1", "3.2.3", "3.2.4.1" ], "good": [ "1.1.1", "1.1.2.1", "1.1.2.2", "1.1.2.3", "1.1.4", "1.1.5", "1.1.7", "1.2.1", "1.3.1.1", "1.3.2.1", "1.3.3.1", "1.3.4", "1.4.1.1", "1.4.10", "1.4.11", "1.4.12", "1.4.13", "1.4.14", "1.4.14.1", "1.4.2.1", "1.4.3", "1.4.4.1", "1.4.5", "1.4.6", "1.4.7", "1.4.8", "1.4.9", "1.5.1", "1.6.1", "2.1.1", "2.2.1", "2.2.10", "2.2.2.1", "2.2.3", "2.2.4", "2.2.5", "2.2.6", "2.2.7", "2.2.8.1", "2.2.9", "2.3.1", "2.3.2", "2.3.3", "2.5.1", "3.1.1", "3.1.2", "3.1.3", "3.1.4", "3.2.1.1", "3.2.2.1", "3.2.2.2", "3.3.1.1", "3.3.1.2.1", "3.3.1.2.2", "3.3.1.2.3", "3.3.2.1", "4.1.1", "4.1.2", "4.1.3", "4.2.1", "4.2.2.1", "4.2.2.2", "4.2.2.3", "4.3.1", "4.3.2.1", "4.3.3.1", "4.3.4", "4.3.5", "4.3.6", "4.3.7", "4.3.8", "4.3.9.1", "4.4.1", "4.4.2", "4.4.3", "4.4.4", "4.4.5", "4.4.6", "4.4.7", "4.5.1", "4.5.2", "4.5.3", "5.1.1", "5.1.2", "5.1.3", "5.1.4", "5.2.1", "5.2.2", "5.2.3", "5.2.4", "5.2.5", "5.2.6", "5.2.7", "5.3.1", "5.3.2", "5.3.3" ] } ``` --------- Signed-off-by: Justin Abrahms <[email protected]>
open-feature · May 9, 2024 · 9a80acf · 9a80acf
1 parent 681f38a
commit 9a80acf
Show file tree

Hide file tree

Showing 3 changed files with 215 additions and 0 deletions.
diff --git a/tools/repo_parser/Dockerfile b/tools/repo_parser/Dockerfile
@@ -0,0 +1,8 @@
+FROM cgr.dev/chainguard/python:latest
+
+WORKDIR .
+
+COPY ./spec_finder.py ./
+VOLUME /appdir
+
+ENTRYPOINT ["python", "spec_finder.py"]
diff --git a/tools/repo_parser/README.md b/tools/repo_parser/README.md
@@ -0,0 +1,49 @@
+# Repo Parser
+
+This will parse the contents of an OpenFeature repo and determine how they adhere to the spec. This can be gamed and assumes everyone is participating in good faith.
+
+We look for a `.specrc` file in the root of a repository to figure out how to find test cases that are annotated with the spec number and the text of the spec. We can then produce a report which says "you're covered" or details about how you're not covered. The goal of this is to use that resulting report to power a spec-compliance matrix for end users to vet SDK quality.
+
+## Usage
+
+```
+$ docker build -t specfinder .
+$ docker run --mount src=/path/tojava-sdk/,target=/appdir,type=bind -it specfinder \
+    spec_finder.py --code-directory /appdir --diff --json-report
+```
+
+### `.specrc`
+
+This should be at the root of the repository.
+
+`multiline_regex` captures the text which contains the test marker. In java, for instance, it's a specially crafted annotation. `number_subregex` and `text_subregex` which will match the substring found in the `multiline_regex` to parse the spec number and text found. These are multi-line regexes.
+
+Example:
+
+```conf
+[spec]
+file_extension=java
+multiline_regex=@Specification\((?P<innards>.*?)\)\s*$
+number_subregex=number\s*=\s*['"](.*?)['"]
+text_subregex=text\s*=\s*['"](.*)['"]
+```
+
+You can test the regex in python like this to validate they work:
+
+```
+$ python3
+Python 3.9.6 (default, Feb  3 2024, 15:58:27)
+[Clang 15.0.0 (clang-1500.3.9.4)] on darwin
+Type "help", "copyright", "credits" or "license" for more information.
+>>> import re
+>>> text = '''    @Specification(number="4.3.6", text="The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value.")
+...     @Specification(number="4.3.7", text="The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value.")
+... '''
+>>> entries = re.findall(r'@Specification\((?P<innards>.*?)\)\s*$', text, re.MULTILINE | re.DOTALL)
+>>> entries
+['number="4.3.7", text="The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value."']
+>>> re.findall(r'''number\s*=\s*['"](.*?)['"]''', entries[0], re.MULTILINE | re.DOTALL)
+['4.3.7']
+>>> re.findall(r'''text\s*=\s*['"](.*)['"]''', entries[0], re.MULTILINE | re.DOTALL)
+['The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value.']
+```
diff --git a/tools/repo_parser/spec_finder.py b/tools/repo_parser/spec_finder.py
@@ -0,0 +1,158 @@
+#!/usr/bin/python
+import urllib.request
+import json
+import re
+import difflib
+import os
+import sys
+import configparser
+
+def _demarkdown(t):
+    return t.replace('**', '').replace('`', '').replace('"', '')
+
+def get_spec_parser(code_dir):
+    with open(os.path.join(code_dir, '.specrc')) as f:
+        data = '\n'.join(f.readlines())
+
+    typical = configparser.ConfigParser()
+    typical.read_string(data)
+    retval = typical['spec']
+    assert 'file_extension' in retval
+    assert 'multiline_regex' in retval
+    assert 'number_subregex' in retval
+    assert 'text_subregex' in retval
+    return retval
+
+
+
+def get_spec(force_refresh=False, path_prefix="./"):
+    spec_path = os.path.join(path_prefix, 'specification.json')
+    print("Going to look in ", spec_path)
+    data = ""
+    if os.path.exists(spec_path) and not force_refresh:
+        with open(spec_path) as f:
+            data = ''.join(f.readlines())
+    else:
+        # TODO: Status code check
+        spec_response = urllib.request.urlopen('https://raw.githubusercontent.com/open-feature/spec/main/specification.json')
+        raw = []
+        for i in spec_response.readlines():
+            raw.append(i.decode('utf-8'))
+        data = ''.join(raw)
+        with open(spec_path, 'w') as f:
+            f.write(data)
+    return json.loads(data)
+
+
+def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directory=None, json_report=False):
+    report = {
+        'extra': set(),
+        'missing': set(),
+        'different-text': set(),
+        'good': set(),
+    }
+
+    actual_spec = get_spec(refresh_spec, path_prefix=code_directory)
+    config = get_spec_parser(code_directory)
+
+    spec_map = {}
+    for entry in actual_spec['rules']:
+        number = re.search(r'[\d.]+', entry['id']).group()
+        if 'requirement' in entry['machine_id']:
+            spec_map[number] = _demarkdown(entry['content'])
+
+        if len(entry['children']) > 0:
+            for ch in entry['children']:
+                number = re.search(r'[\d.]+', ch['id']).group()
+                if 'requirement' in ch['machine_id']:
+                    spec_map[number] = _demarkdown(ch['content'])
+
+    repo_specs = {}
+    missing = set(spec_map.keys())
+
+    for root, dirs, files in os.walk(".", topdown=False):
+        for name in files:
+            F = os.path.join(root, name)
+            if ('.%s' % config['file_extension']) not in name:
+                continue
+            with open(F) as f:
+                data = ''.join(f.readlines())
+
+            for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL):
+                match = match.replace('\n', '')
+                number = re.findall(config['number_subregex'], match)[0]
+
+                if number in missing:
+                    missing.remove(number)
+                text_with_concat_chars = re.findall(config['text_subregex'], match, re.MULTILINE | re.DOTALL)
+                try:
+                    text = ''.join(text_with_concat_chars).strip()
+                    # We have to match for ") to capture text with parens inside, so we add the trailing " back in.
+                    text = _demarkdown(eval('"%s"' % text))
+                    entry = repo_specs[number] = {
+                        'number': number,
+                        'text': text,
+                    }
+                except Exception as e:
+                    print(f"Skipping {match} b/c we couldn't parse it")
+
+    bad_num = len(missing)
+    for number, entry in sorted(repo_specs.items(), key=lambda x: x[0]):
+        if limit_numbers is not None and len(limit_numbers) > 0 and number not in limit_numbers:
+            continue
+        if number in spec_map:
+            txt = entry['text']
+            if txt == spec_map[number]:
+                report['good'].add(number)
+                continue
+            else:
+                print(f"{number} is bad.")
+                report['different-text'].add(number)
+                bad_num += 1
+                if diff_output:
+                    print("Official:")
+                    print("\t%s" % spec_map[number])
+                    print("")
+                    print("Ours:")
+                    print("\t%s" % txt)
+                continue
+
+        report['extra'].add(number)
+        print(f"{number} is defined in our tests, but couldn't find it in the spec")
+    print("")
+
+    if len(missing) > 0:
+        report['missing'] = missing
+        print('In the spec, but not in our tests: ')
+        for m in sorted(missing):
+            print(f"  {m}: {spec_map[m]}")
+
+    if json_report:
+        for k in report.keys():
+            report[k] = sorted(list(report[k]))
+        report_txt = json.dumps(report, indent=4)
+        loc = '/appdir/%s-report.json' % config['file_extension']
+        with open(loc, 'w') as f:
+            f.write(report_txt)
+    sys.exit(bad_num)
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Parse the spec to make sure our tests cover it')
+    parser.add_argument('--refresh-spec', action='store_true', help='Re-downloads the spec')
+    parser.add_argument('--diff-output', action='store_true', help='print the text differences')
+    parser.add_argument('--code-directory', action='store', required=True, help='directory to find code in')
+    parser.add_argument('--json-report', action='store_true', help="Store a json report into ${extension}-report.json")
+    parser.add_argument('specific_numbers', metavar='num', type=str, nargs='*',
+                        help='limit this to specific numbers')
+
+    args = parser.parse_args()
+    main(
+        refresh_spec=args.refresh_spec,
+        diff_output=args.diff_output,
+        limit_numbers=args.specific_numbers,
+        code_directory=args.code_directory,
+        json_report=args.json_report,
+    )