forked from mlflow/mlflow
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: harupy <[email protected]>
- Loading branch information
Showing
22 changed files
with
310 additions
and
715 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
.github/workflows/matchers/pylint.json → .github/workflows/matchers/clint.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
{ | ||
"problemMatcher": [ | ||
{ | ||
"owner": "pylint", | ||
"owner": "clint", | ||
"severity": "error", | ||
"pattern": [ | ||
{ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Clint | ||
|
||
A custom linter for mlflow to enforce rules that ruff doesn't cover. | ||
|
||
## Installation | ||
|
||
``` | ||
pip install -e dev/clint | ||
``` | ||
|
||
## Usage | ||
|
||
```bash | ||
clint file.py ... | ||
``` | ||
|
||
## Integrating with Visual Studio Code | ||
|
||
1. Install [the Pylint extension](https://marketplace.visualstudio.com/items?itemName=ms-python.pylint) | ||
2. Add the following setting in your `settings.json` file: | ||
|
||
```json | ||
{ | ||
"pylint.path": ["${interpreter}", "-m", "clint"] | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[build-system] | ||
requires = ["setuptools"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project] | ||
name = "clint" | ||
version = "0.1.0" | ||
description = "A custom linter for mlflow to enforce rules that ruff doesn't cover." | ||
readme = "README.md" | ||
authors = [{ name = "mlflow", email = "[email protected]" }] | ||
dependencies = ["tomli"] | ||
|
||
[tool.setuptools] | ||
package-dir = { "" = "src" } | ||
|
||
[project.scripts] | ||
clint = "clint:main" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from __future__ import annotations | ||
|
||
import argparse | ||
import itertools | ||
import json | ||
import os | ||
import re | ||
import sys | ||
from concurrent.futures import ProcessPoolExecutor, as_completed | ||
from dataclasses import dataclass | ||
from typing import Literal | ||
|
||
from clint.config import Config | ||
from clint.linter import lint_file | ||
|
||
|
||
@dataclass | ||
class Args: | ||
files: list[str] | ||
output_format: Literal["text", "json"] | ||
|
||
@classmethod | ||
def parse(cls) -> Args: | ||
parser = argparse.ArgumentParser(description="Custom linter for mlflow.") | ||
parser.add_argument("files", nargs="+", help="Files to lint.") | ||
parser.add_argument("--output-format", default="text") | ||
args, _ = parser.parse_known_args() | ||
return cls(files=args.files, output_format=args.output_format) | ||
|
||
|
||
def main(): | ||
config = Config.load() | ||
EXCLUDE_REGEX = re.compile("|".join(map(re.escape, config.exclude))) | ||
args = Args.parse() | ||
with ProcessPoolExecutor() as pool: | ||
futures = [ | ||
pool.submit(lint_file, f) | ||
for f in args.files | ||
if not EXCLUDE_REGEX.match(f) and os.path.exists(f) | ||
] | ||
violations_iter = itertools.chain.from_iterable(f.result() for f in as_completed(futures)) | ||
if violations := list(violations_iter): | ||
if args.output_format == "json": | ||
sys.stdout.write(json.dumps([v.json() for v in violations])) | ||
elif args.output_format == "text": | ||
sys.stderr.write("\n".join(map(str, violations)) + "\n") | ||
sys.exit(1) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from clint import main | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
|
||
import tomli | ||
|
||
|
||
@dataclass | ||
class Config: | ||
exclude: list[str] | ||
|
||
@classmethod | ||
def load(cls) -> Config: | ||
with open("pyproject.toml", "rb") as f: | ||
data = tomli.load(f) | ||
exclude = data["tool"]["clint"]["exclude"] | ||
return cls(exclude) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
from __future__ import annotations | ||
|
||
import ast | ||
import re | ||
import tokenize | ||
from dataclasses import dataclass | ||
|
||
from clint.builtin import BUILTIN_MODULES | ||
|
||
PARAM_REGEX = re.compile(r"\s+:param\s+\w+:", re.MULTILINE) | ||
RETURN_REGEX = re.compile(r"\s+:returns?:", re.MULTILINE) | ||
DISABLE_COMMENT_REGEX = re.compile(r"clint:\s*disable=([a-z0-9-]+)") | ||
|
||
|
||
def ignore_map(code: str) -> dict[str, set[int]]: | ||
""" | ||
Creates a mapping of rule name to line numbers to ignore. | ||
{ | ||
"<rule_name>": {<line_number>, ...}, | ||
... | ||
} | ||
""" | ||
mapping: dict[str, set[int]] = {} | ||
readline = iter(code.splitlines(True)).__next__ | ||
for tok in tokenize.generate_tokens(readline): | ||
if tok.type != tokenize.COMMENT: | ||
continue | ||
if m := DISABLE_COMMENT_REGEX.search(tok.string): | ||
mapping.setdefault(m.group(1), set()).add(tok.start[0]) | ||
return mapping | ||
|
||
|
||
@dataclass | ||
class Rule: | ||
id: str | ||
name: str | ||
message: str | ||
|
||
|
||
@dataclass | ||
class Violation: | ||
rule: Rule | ||
path: str | ||
lineno: int | ||
col_offset: int | ||
|
||
def __str__(self): | ||
return f"{self.path}:{self.lineno}:{self.col_offset}: {self.rule.id}: {self.rule.message}" | ||
|
||
def json(self) -> dict[str, str | int | None]: | ||
return { | ||
"type": "error", | ||
"module": None, | ||
"obj": None, | ||
"line": self.lineno, | ||
"column": self.col_offset, | ||
"endLine": self.lineno, | ||
"endColumn": self.col_offset, | ||
"path": self.path, | ||
"symbol": self.rule.name, | ||
"message": self.rule.message, | ||
"message-id": self.rule.id, | ||
} | ||
|
||
|
||
NO_RST = Rule( | ||
"Z0001", | ||
"no-rst", | ||
"Do not use RST style. Use Google style instead.", | ||
) | ||
LAZY_BUILTIN_IMPORT = Rule( | ||
"Z0002", | ||
"lazy-builtin-import", | ||
"Builtin modules must be imported at the top level.", | ||
) | ||
|
||
# TODO: Remove this once we convert all docstrings to Google style. | ||
NO_RST_IGNORE = { | ||
"mlflow/gateway/client.py", | ||
"mlflow/gateway/providers/utils.py", | ||
"mlflow/keras/callback.py", | ||
"mlflow/metrics/base.py", | ||
"mlflow/metrics/genai/base.py", | ||
"mlflow/models/utils.py", | ||
"mlflow/projects/databricks.py", | ||
"mlflow/projects/kubernetes.py", | ||
"mlflow/store/_unity_catalog/registry/rest_store.py", | ||
"mlflow/store/artifact/azure_data_lake_artifact_repo.py", | ||
"mlflow/store/artifact/gcs_artifact_repo.py", | ||
"mlflow/store/model_registry/rest_store.py", | ||
"mlflow/store/tracking/rest_store.py", | ||
"mlflow/utils/docstring_utils.py", | ||
"mlflow/utils/rest_utils.py", | ||
"tests/utils/test_docstring_utils.py", | ||
} | ||
|
||
|
||
class Linter(ast.NodeVisitor): | ||
def __init__(self, path: str, ignore: dict[str, set[int]]): | ||
self.stack: list[ast.FunctionDef | ast.AsyncFunctionDef] = [] | ||
self.path = path | ||
self.ignore = ignore | ||
self.violations: list[Violation] = [] | ||
|
||
def _check(self, node: ast.AST, rule: Rule) -> None: | ||
if (lines := self.ignore.get(rule.name)) and node.lineno in lines: | ||
return | ||
self.violations.append(Violation(rule, self.path, node.lineno, node.col_offset)) | ||
|
||
def _docstring( | ||
self, node: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef | ||
) -> ast.Constant | None: | ||
if ( | ||
isinstance(node.body[0], ast.Expr) | ||
and isinstance(node.body[0].value, ast.Constant) | ||
and isinstance(node.body[0].value.s, str) | ||
): | ||
return node.body[0].value | ||
return None | ||
|
||
def _no_rst(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: | ||
if self.path in NO_RST_IGNORE: | ||
return | ||
|
||
if (nd := self._docstring(node)) and ( | ||
PARAM_REGEX.search(nd.s) or RETURN_REGEX.search(nd.s) | ||
): | ||
self._check(nd, NO_RST) | ||
|
||
def _is_in_function(self) -> bool: | ||
return bool(self.stack) | ||
|
||
def visit_ClassDef(self, node: ast.ClassDef) -> None: | ||
self.stack.append(node) | ||
self._no_rst(node) | ||
self.generic_visit(node) | ||
self.stack.pop() | ||
|
||
def visit_FunctionDef(self, node: ast.FunctionDef) -> None: | ||
self.stack.append(node) | ||
self._no_rst(node) | ||
self.generic_visit(node) | ||
self.stack.pop() | ||
|
||
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: | ||
self.stack.append(node) | ||
self._no_rst(node) | ||
self.generic_visit(node) | ||
self.stack.pop() | ||
|
||
def visit_Import(self, node: ast.Import) -> None: | ||
if self._is_in_function(): | ||
for alias in node.names: | ||
if alias.name.split(".", 1)[0] in BUILTIN_MODULES: | ||
self._check(node, LAZY_BUILTIN_IMPORT) | ||
self.generic_visit(node) | ||
|
||
def visit_ImportFrom(self, node: ast.ImportFrom) -> None: | ||
if self._is_in_function() and node.module.split(".", 1)[0] in BUILTIN_MODULES: | ||
self._check(node, LAZY_BUILTIN_IMPORT) | ||
self.generic_visit(node) | ||
|
||
|
||
def lint_file(path: str) -> list[Violation]: | ||
with open(path) as f: | ||
code = f.read() | ||
linter = Linter(path, ignore_map(code)) | ||
linter.visit(ast.parse(code)) | ||
return linter.violations |
Oops, something went wrong.