Merge pull request #18 from ansible/ttakamiy/AAP-16412/modify-no-excl…

…ude-option Change the behavior of --no-exclude option
ansible · Oct 2, 2023 · 4a9b28b · 4a9b28b · github-actions · Oct 2, 2023
2 parents 1e120a5 + 1e1da53
commit 4a9b28b
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 53 deletions.
diff --git a/.config/dictionary.txt b/.config/dictionary.txt
@@ -1,4 +1,6 @@
 ansiblelint
+autofix
+autofixed
 clamav
 clamscan
 commandline

diff --git a/README.md b/README.md
@@ -2,14 +2,12 @@
 
 ## Overview
 
-`ansible-content-parser` used for analyze Ansible files, such
-as playbooks, task files, etc. in a given directory.
-
-It runs `ansible-lint` internally against a given
-source directory and
-updates Ansible files (the `--fix` option of `ansible-lint`)
-and generates the `lint-result.json` file, which summarizes
-files found in the directory and lint errors.
+`ansible-content-parser` analyzes Ansible files in a given source
+(a local directory, an archive file or a git URL)
+by running `ansible-lint` internally,
+updates Ansible files using the [Autofix feature of `ansible-lint`](https://ansible.readthedocs.io/projects/lint/autofix/)
+and generates the `ftdata.json` file, which is the training dataset
+for developing custom AI models used with Ansible Lightspeed.
 
 ## Build
 
@@ -54,8 +52,9 @@ options:
                         effective rule transforms (the 'write_list') by passing a keywords 'all' (=default) or 'none'
                         or a comma separated list of rule ids or rule tags.
   --skip-ansible-lint   Skip the execution of ansible-lint.
-  --no-exclude          Do not rerun ansible-lint with excluding files that caused syntax check errors. If one or more
-                        syntax check errors were found, execution fails without generating the training dataset.
+  --no-exclude          Do not let ansible-content-parser to generate training dataset by excluding files that caused
+                        lint errors. With this option specified, a single lint error terminates the execution without
+                        generating the training dataset.
   -v, --verbose         Explain what is being done
   --source-license SOURCE_LICENSE
                         Specify the license that will be included in the training dataset.

diff --git a/src/ansible_content_parser/__main__.py b/src/ansible_content_parser/__main__.py
@@ -12,12 +12,12 @@
 import shutil
 import sys
 import tarfile
-import typing
 import zipfile
 
 from collections.abc import Generator
 from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
+from typing import Any
 
 import giturlparse  # pylint: disable=import-error
 
@@ -51,16 +51,16 @@ def pushd(new_dir: str) -> Generator[None, None, None]:
 def execute_ansiblelint(
     argv: list[str],
     work_dir: str,
-) -> dict[str, typing.Any]:
+) -> tuple[dict[str, list[Any]], int]:
     """Execute ansible-lint."""
     with pushd(work_dir):
         # Clear root logger handlers as ansible-lint adds one without checking existing ones.
         logging.getLogger().handlers.clear()
 
-        result, mark_as_success = ansiblelint_main(argv)
+        result, mark_as_success, return_code = ansiblelint_main(argv)
         return {
             "files": [LintableDict(lintable) for lintable in result.files],
-        }
+        }, return_code
 
 
 def parse_args(argv: list[str]) -> argparse.Namespace:
@@ -97,8 +97,10 @@ def parse_args(argv: list[str]) -> argparse.Namespace:
     parser.add_argument(
         "--no-exclude",
         action="store_true",
-        help="Do not rerun ansible-lint with excluding files that caused syntax check errors. If one or more syntax "
-        "check errors were found, execution fails without generating the training dataset.",
+        help="Do not let ansible-content-parser to generate training dataset by "
+        "excluding files that caused lint errors. With this option specified, "
+        "a single lint error terminates the execution without generating the "
+        "training dataset.",
     )
     parser.add_argument(
         "-v",
@@ -290,7 +292,7 @@ def main() -> None:
         metadata_path = out_path / "metadata"
 
         sarif_file = str(metadata_path / "sarif.json")
-        argv = ["__DUMMY__", "--sarif-file", sarif_file]
+        argv = ["ansible-lint", "--sarif-file", sarif_file]
         update_argv(argv, args)
 
         try:
@@ -335,8 +337,16 @@ def execute_lint_step(
 ) -> None:
     """Execute ansible-lint and create metadata files."""
     exclude_paths: list[str] = []
-    if not args.skip_ansible_lint:
-        serializable_result = execute_ansiblelint(
+
+    lint_result = ""
+    lint_result2 = ""
+    sarif_file2 = ""
+    return_code = RC.SUCCESS
+
+    if args.skip_ansible_lint:
+        sarif_file = ""
+    else:
+        serializable_result, return_code = execute_ansiblelint(
             argv,
             str(repository_path),
         )
@@ -347,54 +357,77 @@ def execute_lint_step(
         ) as f:
             f.write(json.dumps(serializable_result))
 
-        parse_sarif_json(exclude_paths, sarif_file)
-
-        # If syntax-errors occurred on some files, kick off the second run excluding those files
-        if len(exclude_paths) > 0 and not args.no_exclude:
-            lint_result2 = str(metadata_path / "lint-result-2.json")
-            sarif_file2 = str(metadata_path / "sarif-2.json")
-            argv = ["__DUMMY__", "--sarif-file", sarif_file2]
-            argv.append("--exclude")
-            argv.extend(exclude_paths)
-            update_argv(argv, args)
-            _logger.info(",".join(argv))
-            serializable_result_2 = execute_ansiblelint(
-                argv,
-                str(repository_path),
-            )
-            serializable_result_2["excluded"] = exclude_paths
-
-            with Path(lint_result2).open(mode="w", encoding="utf-8") as f:
-                f.write(json.dumps(serializable_result_2))
-        else:
-            lint_result2 = ""
-            sarif_file2 = ""
+        if return_code == RC.SUCCESS or not args.no_exclude:
+            exclude_paths = parse_sarif_json(exclude_paths, sarif_file, True)
+
+            # If syntax-errors occurred on some files, kick off the second run excluding those files
+            if len(exclude_paths) > 0:
+                lint_result2 = str(metadata_path / "lint-result-2.json")
+                sarif_file2 = str(metadata_path / "sarif-2.json")
+                argv = ["ansible-lint", "--sarif-file", sarif_file2]
+                argv.append("--exclude")
+                argv.extend(exclude_paths)
+                update_argv(argv, args)
+                _logger.info(",".join(argv))
+                serializable_result_2, return_code = execute_ansiblelint(
+                    argv,
+                    str(repository_path),
+                )
+                serializable_result_2["excluded"] = exclude_paths
+                exclude_paths = parse_sarif_json(exclude_paths, sarif_file2, False)
+
+                _rename_excluded_files(exclude_paths, repository_path)
+
+                with Path(lint_result2).open(mode="w", encoding="utf-8") as f:
+                    f.write(json.dumps(serializable_result_2))
+            else:
+                exclude_paths = parse_sarif_json(exclude_paths, sarif_file, False)
 
     generate_report(
-        "" if args.skip_ansible_lint else lint_result2 if lint_result2 else lint_result,
+        lint_result,
+        lint_result2,
         sarif_file,
         sarif_file2,
         args,
+        exclude_paths,
     )
 
-    if len(exclude_paths) > 0 and args.no_exclude:
-        msg = "One or more syntax-check errors were found by ansible-lint"
+    if return_code != RC.SUCCESS and args.no_exclude:
+        msg = "One or more lint errors were found by ansible-lint"
         raise RuntimeError(msg)
 
 
-def parse_sarif_json(exclude_paths: list[str], sarif_file: str) -> None:
+def _rename_excluded_files(exclude_paths: list[str], repository_path: Path) -> None:
+    with pushd(str(repository_path)):
+        for p in exclude_paths:
+            path = Path(p)
+            # Do not attempt to rename directories (e.g. role names)
+            if path.is_file():
+                Path(p).rename(p + ".__EXCLUDED__")
+
+
+def parse_sarif_json(
+    exclude_paths: list[str],
+    sarif_file: str,
+    syntax_check_errors_only: bool,
+) -> list[str]:
     """Analyze SARIF.json to see if syntax-check errors occurred or not on the first run."""
     with Path(sarif_file).open("rb") as f:
         o = json.load(f)
         for run in o["runs"]:
             for result in run["results"]:
-                if result["ruleId"].startswith("syntax-check"):
+                if (
+                    result["ruleId"].startswith("syntax-check")
+                    or not syntax_check_errors_only
+                    and ("level" not in result or result["level"] == "error")
+                ):
                     exclude_paths.extend(
                         [
                             location["physicalLocation"]["artifactLocation"]["uri"]
                             for location in result["locations"]
                         ],
                     )
+    return sorted(set(exclude_paths))
 
 
 def update_argv(argv: list[str], args: argparse.Namespace) -> None:

diff --git a/src/ansible_content_parser/lint.py b/src/ansible_content_parser/lint.py
@@ -98,6 +98,6 @@ def ansiblelint_main(argv: list[str] | None = None) -> LintResult:
             ",".join(options.mock_filters),
         )
 
-    app.report_outcome(result, mark_as_success=mark_as_success)
+    return_code = app.report_outcome(result, mark_as_success=mark_as_success)
 
-    return result, mark_as_success
+    return result, mark_as_success, return_code
diff --git a/src/ansible_content_parser/report.py b/src/ansible_content_parser/report.py
@@ -20,7 +20,7 @@
 _label_count = "Count"
 _label_file_type = "File Type"
 _label_file_path = "File Path"
-_label_file_state = "Updated"
+_label_file_state = "Excluded/Autofixed"
 _label_module_name = "Module Name"
 _label_total = "TOTAL"
 
@@ -74,18 +74,25 @@ def filetype_summary(result: dict[str, list[LintableDict]]) -> str:
     return summary
 
 
-def get_file_list_summary(files: list[LintableDict]) -> str:
+def get_file_list_summary(files: list[LintableDict], excluded_paths: list[str]) -> str:
     """Get summary string from the lintable list."""
     entries = []
     max_filename_len = len(_label_file_path)
     max_kind_len = len(_label_file_type)
     max_state_len = len(_label_file_state)
     kinds = {f["filename"]: f["kind"] for f in files}
     updated = {f["filename"]: f["updated"] for f in files}
+    excluded = {f["filename"]: (f["filename"] in excluded_paths) for f in files}
     for filename in sorted(kinds):
         kind = kinds[filename]
         if kind != "":  # Skip files that was not identified by ansible-lint
-            state = "updated" if updated[filename] else ""
+            state = (
+                "excluded"
+                if excluded[filename]
+                else "autofixed"
+                if updated[filename]
+                else ""
+            )
             entries.append([filename, kind, state])
             if len(filename) > max_filename_len:
                 max_filename_len = len(filename)
@@ -216,9 +223,11 @@ def get_excluded_files(excluded: list[str]) -> str:
 
 def generate_report(
     json_file: str,
+    json_file2: str,
     sarif_file: str,
     sarif_file2: str,
     args: argparse.Namespace,
+    excluded_paths: list[str],
 ) -> None:
     """Generate report."""
     report = f"""
@@ -238,6 +247,11 @@ def generate_report(
         with Path(json_file).open(encoding="utf-8") as f:
             result = json.load(f)
             files = result["files"]
+
+    last_json_file = json_file2 if json_file2 else json_file
+    if last_json_file:
+        with Path(last_json_file).open(encoding="utf-8") as f:
+            result = json.load(f)
             excluded = result.get("excluded", [])
 
         report += f"""
@@ -249,7 +263,7 @@ def generate_report(
 
 [ List of Ansible files identified ]
 
-{get_file_list_summary(files)}
+{get_file_list_summary(files, excluded_paths)}
 
 
 [ Issues found by ansible-lint ]
@@ -267,7 +281,8 @@ def generate_report(
 {get_sarif_summary(metadata_path, sarif_file2)}
 """
     else:
-        report += f"""
+        if sarif_file:
+            report += f"""
 {get_sarif_summary(metadata_path, sarif_file)}
 """
     with (out_path / _report_txt).open(mode="w") as f:

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -210,6 +210,8 @@ def test_cli_with_local_directory(self) -> None:
                 testargs = [
                     "ansible-content-parser",
                     "-v",
+                    "--profile",
+                    "min",
                     source.name + "/",  # intentionally add "/" to the end
                     output.name,
                 ]
@@ -220,9 +222,58 @@ def test_cli_with_local_directory(self) -> None:
 
                 assert context.exception.code == 0, "The exit code should be 0"
 
+                found_file_counts_section = False
+                with (Path(output.name) / "report.txt").open("r") as f:
+                    for line in f:
+                        if "[ File counts per type ]" in line:
+                            found_file_counts_section = True
+                        if line == "Module Name     Count\n":
+                            assert found_file_counts_section is True
+                            line = f.readline()
+                            assert line == "---------------------\n"
+                            line = f.readline()
+                            assert line == "service             2\n"
+                            line = f.readline()
+                            assert line == "yum                 2\n"
+                            line = f.readline()
+                            assert line == "firewalld           1\n"
+                            line = f.readline()
+                            assert line == "meta                1\n"
+                            line = f.readline()
+                            assert line == "---------------------\n"
+                            line = f.readline()
+                            assert line == "TOTAL               6\n"
+                            line = f.readline()
+                            assert line == "---------------------\n"
+
+    def test_cli_with_local_directory_with_no_ansible_lint(self) -> None:
+        """Run the CLI with a local directory."""
+        with temp_dir() as source:
+            self._create_repo(source)
+            self._add_second_playbook(source)
+            self._add_third_playbook(source)
+            with temp_dir() as output:
+                testargs = [
+                    "ansible-content-parser",
+                    "-v",
+                    "--skip-ansible-lint",
+                    source.name,
+                    output.name,
+                ]
+                with patch.object(sys, "argv", testargs), self.assertRaises(
+                    SystemExit,
+                ) as context:
+                    main()
+
+                assert context.exception.code == 0, "The exit code should be 0"
+
+                found_file_counts_section = False
                 with (Path(output.name) / "report.txt").open("r") as f:
                     for line in f:
+                        if "[ File counts per type ]" in line:
+                            found_file_counts_section = True
                         if line == "Module Name     Count\n":
+                            assert found_file_counts_section is False
                             line = f.readline()
                             assert line == "---------------------\n"
                             line = f.readline()