Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: a tool to convert json output to SARIF format #16

Merged
merged 8 commits into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions tools/convert_to_sarif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Convert the output of lintrunner json to SARIF."""

import argparse
import json
import os
from typing import Iterable


def format_rule_name(lintrunner_result: dict) -> str:
return f"{lintrunner_result['code']}/{lintrunner_result['name']}"


def severity_to_github_level(severity: str) -> str:
if severity in {"advice", "disabled"}:
return "note"
return severity


def parse_single_lintrunner_result(lintrunner_result: dict) -> tuple:
r"""Parse a single lintrunner result.

A result looks like this:
{
"path":"/adapters/pytorch/grep_linter.py",
"line":227,
"char":80,
"code":"FLAKE8",
"severity":"advice",
"name":"E501",
"description":"line too long (81 > 79 characters)\nSee https://www.flake8rules.com/rules/E501.html"
}
"""
if lintrunner_result["path"] is None:
artifact_uri = None
else:
artifact_uri = (
("file://" + lintrunner_result["path"])
if lintrunner_result["path"].startswith("/")
else lintrunner_result["path"]
)
result = {
"ruleId": format_rule_name(lintrunner_result),
"level": severity_to_github_level(lintrunner_result["severity"]),
"message": {
"text": format_rule_name(lintrunner_result)
+ "\n"
+ lintrunner_result["description"],
},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": artifact_uri,
},
"region": {
"startLine": lintrunner_result["line"] or 1,
"startColumn": lintrunner_result["char"] or 1,
},
},
},
],
}

rule = {
"id": format_rule_name(lintrunner_result),
"rule": {
"id": format_rule_name(lintrunner_result),
"name": format_rule_name(lintrunner_result),
"shortDescription": {
"text": format_rule_name(lintrunner_result)
+ ": "
+ lintrunner_result["description"].split("\n")[0],
},
"fullDescription": {
"text": format_rule_name(lintrunner_result)
+ "\n"
+ lintrunner_result["description"],
},
"defaultConfiguration": {
"level": severity_to_github_level(lintrunner_result["severity"]),
},
},
}

return result, rule


def produce_sarif(lintrunner_results: Iterable[dict]) -> dict:
"""Convert the output of lintrunner json to SARIF."""

rules = {}
results = []
for lintrunner_json in lintrunner_results:
result, rule = parse_single_lintrunner_result(lintrunner_json)
results.append(result)
rules[rule["id"]] = rule["rule"]

sarif = {
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "lintrunner",
"rules": list(rules.values()),
},
},
"results": results,
},
],
}

return sarif


def main(args):
"""Convert the output of lintrunner json to SARIF."""

with open(args.input, "r", encoding="utf-8") as f:
lintrunner_jsons = [json.loads(line) for line in f]

sarif = produce_sarif(lintrunner_jsons)

output_dir = os.path.dirname(args.output)
if output_dir:
os.makedirs(os.path.dirname(args.output), exist_ok=True)

with open(args.output, "w", encoding="utf-8") as f:
json.dump(sarif, f)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--input", type=str, required=True, help="json file generated by lintrunner"
)
parser.add_argument("--output", type=str, required=True, help="output sarif file")
args = parser.parse_args()
main(args)
181 changes: 181 additions & 0 deletions tools/convert_to_sarif_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import unittest

import convert_to_sarif


class TestConvertToSarif(unittest.TestCase):
def test_produce_sarif_returns_correct_sarif_result(self):
lintrunner_results = [
{
"path": "test.py",
"line": 1,
"char": 2,
"code": "FLAKE8",
"severity": "error",
"description": "test description",
"name": "test-code",
},
{
"path": "test.py",
"line": 1,
"char": 2,
"code": "FLAKE8",
"severity": "error",
"description": "test description",
"name": "test-code-2",
},
{
"path": "test2.py",
"line": 3,
"char": 4,
"code": "FLAKE8",
"severity": "advice",
"description": "test description",
"name": "test-code",
},
]
actual = convert_to_sarif.produce_sarif(lintrunner_results)
expected = {
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "lintrunner",
"rules": [
{
"id": "FLAKE8/test-code",
"name": "FLAKE8/test-code",
"shortDescription": {
"text": "FLAKE8/test-code: test description"
},
"fullDescription": {
"text": "FLAKE8/test-code\ntest description"
},
"defaultConfiguration": {"level": "note"},
},
{
"id": "FLAKE8/test-code-2",
"name": "FLAKE8/test-code-2",
"shortDescription": {
"text": "FLAKE8/test-code-2: test description"
},
"fullDescription": {
"text": "FLAKE8/test-code-2\ntest description"
},
"defaultConfiguration": {"level": "error"},
},
],
}
},
"results": [
{
"ruleId": "FLAKE8/test-code",
"level": "error",
"message": {"text": "FLAKE8/test-code\ntest description"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {"uri": "test.py"},
"region": {"startLine": 1, "startColumn": 2},
}
}
],
},
{
"ruleId": "FLAKE8/test-code-2",
"level": "error",
"message": {"text": "FLAKE8/test-code-2\ntest description"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {"uri": "test.py"},
"region": {"startLine": 1, "startColumn": 2},
}
}
],
},
{
"ruleId": "FLAKE8/test-code",
"level": "note",
"message": {"text": "FLAKE8/test-code\ntest description"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {"uri": "test2.py"},
"region": {"startLine": 3, "startColumn": 4},
}
}
],
},
],
}
],
}
self.maxDiff = None
self.assertEqual(actual, expected)

def test_it_handles_relative_paths(self):
lintrunner_results = [
{
"path": "test.py",
"line": 1,
"char": 2,
"code": "FLAKE8",
"severity": "error",
"description": "test description",
"name": "test-code",
},
]
actual = convert_to_sarif.produce_sarif(lintrunner_results)
expected_results = [
{
"ruleId": "FLAKE8/test-code",
"level": "error",
"message": {"text": "FLAKE8/test-code\ntest description"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {"uri": "test.py"},
"region": {"startLine": 1, "startColumn": 2},
}
}
],
},
]
self.assertEqual(actual["runs"][0]["results"], expected_results)

def test_it_handles_absolute_paths(self):
lintrunner_results = [
{
"path": "/path/to/test.py",
"line": 1,
"char": 2,
"code": "FLAKE8",
"severity": "error",
"description": "test description",
"name": "test-code",
},
]
actual = convert_to_sarif.produce_sarif(lintrunner_results)
expected_results = [
{
"ruleId": "FLAKE8/test-code",
"level": "error",
"message": {"text": "FLAKE8/test-code\ntest description"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {"uri": "file:///path/to/test.py"},
"region": {"startLine": 1, "startColumn": 2},
}
}
],
},
]
self.assertEqual(actual["runs"][0]["results"], expected_results)


if __name__ == "__main__":
unittest.main()