Skip to content

Commit

Permalink
feat: fetch list of rules from dart website
Browse files Browse the repository at this point in the history
  • Loading branch information
parth-deepsource committed Nov 15, 2023
1 parent 3b5ab5c commit a2ea66b
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 404 deletions.
153 changes: 22 additions & 131 deletions analyzers/dart-analyze/utils/extractor.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import os
import re
import subprocess
import json
from dataclasses import dataclass
from pathlib import Path
from typing import List

from constants import VERSION
from urllib.request import urlopen


@dataclass
Expand All @@ -17,154 +13,49 @@ class Issue:


class IssueExtractor:
CLONE_URL = "https://github.com/dart-lang/sdk.git"
RULES_DIRECTORY = "sdk/pkg/linter/lib/src/rules"

RULE_TITLE_REGEX = re.compile(
r"const _?desc\s*=\s*([\w\,\.\=\-\+\:\>\<\\\%\!\[\]\{\}\(\)\`\'\"\$\?\@\s\/\\]*);"
)
RULE_DESCRIPTION_REGEX = re.compile(
r"const _?details\s*=\s* r?('''|\"\"\")(.*)('''|\"\"\");", re.S
ISSUES_URL = (
"https://github.com/dart-lang/site-www/raw/main/src/_data/linter_rules.json"
)
RULE_GROUP_REGEX = re.compile(r"group:.*Group\.(.*)(\)\s*;|,)")

GROUP_CATEGORY_MAP = {
"style": "antipattern",
"errors": "bug-risk",
"pub": "antipattern",
}

def __init__(self, directory: str):
def __init__(self):
"""
Extracts dart analyze issues.
Args:
directory (str): The directory to use to clone the dart \
analyze repository
"""
self.clone_directory = Path(directory)
self._issues = []
self._clone_repository()

@property
def rules_directory(self) -> Path:
"""The directory where the dart analyze rules are defined."""
return self.clone_directory / self.RULES_DIRECTORY

@property
def issues(self) -> List[Issue]:
"""The list of issues extracted from the dart analyze repository."""
if len(self._issues) > 0:
return self._issues

self._issues = []
for file in self.get_rule_files():
issue = self.get_issue_from_rule_file(file)
if issue:
self._issues.append(issue)

return self._issues

def _clone_repository(self) -> None:
"""Clones the dart analyzer repository."""
commands = (
f"git clone {self.CLONE_URL} --depth 1 -b {VERSION}",
f"cd sdk && git checkout {VERSION}",
)
for command in commands:
subprocess.run(
command,
cwd=self.clone_directory,
check=True,
shell=True,
rules = self.fetch_rules()
self._issues = [
Issue(
rule["name"],
rule["description"].rstrip("."),
rule["details"],
self.GROUP_CATEGORY_MAP.get(rule["group"]),
)
for rule in rules
if rule["state"] == "stable"
]

def get_rules(self) -> List[str]:
"""Returns the list of rules present in the repository."""
return [file.stem for file in self.get_rule_files()]

def get_rule_files(self) -> List[Path]:
"""Returns a list of file paths of the rule files."""
extracted_files = []

for curr_dir, _, files in os.walk(self.rules_directory):
for file in files:
extracted_files.append(Path(curr_dir) / file)

return extracted_files

def get_rule_title(self, contents: str) -> str | None:
"""
Extracts the title of the rule from the file contents.
Args:
contents (str): The rule file contents.
"""
CHARS_TO_STRIP = "'\" "
CHARS_TO_LSTRIP = "r"

match = self.RULE_TITLE_REGEX.search(contents)
if not match:
return None

title_parts = match.group(1).split("\n")
sanitized_parts = []
for part in title_parts:
sanitized_parts.append(part.lstrip(CHARS_TO_LSTRIP).strip(CHARS_TO_STRIP))

return " ".join(sanitized_parts).replace('"', r"\"")

def get_rule_description(self, contents: str) -> str | None:
"""
Extracts the description of the rule from the file contents.
Args:
contents (str): The rule file contents.
"""
if match := self.RULE_DESCRIPTION_REGEX.search(contents):
return match.group(2).strip()

return None

def get_rule_category(self, contents: str) -> None:
"""
Extracts the category of the rule from the file contents.
Args:
contents (str): The rule file contents.
"""
if match := self.RULE_GROUP_REGEX.search(contents):
group = match.group(1)
return self.GROUP_CATEGORY_MAP.get(group)

return None

def get_issue_from_rule_file(self, file: Path) -> Issue | None:
"""
Extracts an issue from the given rule file.
"""
with file.open() as f:
contents = f.read()

title = self.get_rule_title(contents)
if not title:
print(f"[{file.name}] Failed to extract title")

description = self.get_rule_description(contents)
if not description:
print(f"[{file.name}] Failed to extract description")
return None

if title is None:
print(f"[{file.name}] Setting title from description")
title = description.split("\n")[0]
return self._issues

category = self.get_rule_category(contents)
if not category:
print(f"[{file.name}] Failed to extract category")
return None
def fetch_rules(self) -> dict:
"""Fetches the list of issues."""
response = urlopen(self.ISSUES_URL)
if response.code != 200:
raise Exception(f"Failed to fetch rules {response.code}")

return Issue(
code=file.stem, title=title, description=description, category=category
)
return json.loads(response.read())
7 changes: 2 additions & 5 deletions analyzers/dart-analyze/utils/issue_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def update_issues(issues: List[Issue]) -> None:


if __name__ == "__main__":
issues = []
with tempfile.TemporaryDirectory() as d:
extractor = IssueExtractor(d)
issues = extractor.issues

extractor = IssueExtractor()
issues = extractor.issues
update_issues(issues=issues)
Loading

0 comments on commit a2ea66b

Please sign in to comment.