Skip to content

Commit

Permalink
Remove pylint (mlflow#11183)
Browse files Browse the repository at this point in the history
Signed-off-by: harupy <[email protected]>
  • Loading branch information
harupy authored Feb 20, 2024
1 parent 798c9b0 commit 8915365
Show file tree
Hide file tree
Showing 22 changed files with 310 additions and 715 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ jobs:
id: setup-python
- name: Add problem matchers
run: |
echo "::add-matcher::.github/workflows/matchers/pylint.json"
echo "::add-matcher::.github/workflows/matchers/clint.json"
echo "::add-matcher::.github/workflows/matchers/format.json"
echo "::add-matcher::.github/workflows/matchers/ruff.json"
- uses: ./.github/actions/cache-pip
- name: Install dependencies
run: |
python -m venv .venv
source .venv/bin/activate
source ./dev/install-common-deps.sh --ml
pip install -r requirements/lint-requirements.txt
- uses: ./.github/actions/pipdeptree
- name: Install pre-commit hooks
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"problemMatcher": [
{
"owner": "pylint",
"owner": "clint",
"severity": "error",
"pattern": [
{
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/recipe-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- uses: ./.github/actions/cache-pip
- name: Add problem matchers
run: |
echo "::add-matcher::.github/workflows/matchers/pylint.json"
echo "::add-matcher::.github/workflows/matchers/clint.json"
echo "::add-matcher::.github/workflows/matchers/black.json"
echo "::add-matcher::.github/workflows/matchers/ruff.json"
- name: Install dependencies
Expand Down
15 changes: 3 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,9 @@ repos:
stages: [commit]
require_serial: true

- id: pylint
name: pylint
entry: pylint
language: system
types: [python]
args: ["--rcfile=pylintrc"]
stages: [commit]
require_serial: true

- id: disallow-rule-ids
name: disallow-rule-ids
entry: python ./dev/disallow_rule_ids.py
- id: custom-python-lint
name: custom-python-lint
entry: clint
language: system
types: [python]
stages: [commit]
Expand Down
26 changes: 26 additions & 0 deletions dev/clint/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Clint

A custom linter for mlflow to enforce rules that ruff doesn't cover.

## Installation

```
pip install -e dev/clint
```

## Usage

```bash
clint file.py ...
```

## Integrating with Visual Studio Code

1. Install [the Pylint extension](https://marketplace.visualstudio.com/items?itemName=ms-python.pylint)
2. Add the following setting in your `settings.json` file:

```json
{
"pylint.path": ["${interpreter}", "-m", "clint"]
}
```
17 changes: 17 additions & 0 deletions dev/clint/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "clint"
version = "0.1.0"
description = "A custom linter for mlflow to enforce rules that ruff doesn't cover."
readme = "README.md"
authors = [{ name = "mlflow", email = "[email protected]" }]
dependencies = ["tomli"]

[tool.setuptools]
package-dir = { "" = "src" }

[project.scripts]
clint = "clint:main"
51 changes: 51 additions & 0 deletions dev/clint/src/clint/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

import argparse
import itertools
import json
import os
import re
import sys
from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass
from typing import Literal

from clint.config import Config
from clint.linter import lint_file


@dataclass
class Args:
files: list[str]
output_format: Literal["text", "json"]

@classmethod
def parse(cls) -> Args:
parser = argparse.ArgumentParser(description="Custom linter for mlflow.")
parser.add_argument("files", nargs="+", help="Files to lint.")
parser.add_argument("--output-format", default="text")
args, _ = parser.parse_known_args()
return cls(files=args.files, output_format=args.output_format)


def main():
config = Config.load()
EXCLUDE_REGEX = re.compile("|".join(map(re.escape, config.exclude)))
args = Args.parse()
with ProcessPoolExecutor() as pool:
futures = [
pool.submit(lint_file, f)
for f in args.files
if not EXCLUDE_REGEX.match(f) and os.path.exists(f)
]
violations_iter = itertools.chain.from_iterable(f.result() for f in as_completed(futures))
if violations := list(violations_iter):
if args.output_format == "json":
sys.stdout.write(json.dumps([v.json() for v in violations]))
elif args.output_format == "text":
sys.stderr.write("\n".join(map(str, violations)) + "\n")
sys.exit(1)


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions dev/clint/src/clint/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from clint import main

main()
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
import astroid
from pylint.checkers import BaseChecker
from pylint.interfaces import IAstroidChecker
from pylint.lint import PyLinter

from pylint_plugins.errors import LAZY_BUILTIN_IMPORT, to_msgs

# https://github.com/PyCQA/isort/blob/b818cec889657cb786beafe94a6641f8fc0f0e64/isort/stdlibs/py311.py
BUILTIN_MODULES = {
"_ast",
Expand Down Expand Up @@ -221,38 +214,3 @@
"zlib",
"zoneinfo",
}


class ImportChecker(BaseChecker):
__implements__ = IAstroidChecker

name = "import-checker"
msgs = to_msgs(LAZY_BUILTIN_IMPORT)
priority = -1

def __init__(self, linter: PyLinter) -> None:
super().__init__(linter)
self.stack = []

def in_function(self):
return bool(self.stack)

def is_builtin_module(self, name):
return name.split(".", 1)[0] in BUILTIN_MODULES

def visit_functiondef(self, node: astroid.FunctionDef):
self.stack.append(node)

def leave_functiondef(self, node: astroid.FunctionDef): # pylint: disable=unused-argument
self.stack.pop()

def visit_importfrom(self, node: astroid.ImportFrom):
if self.in_function() and self.is_builtin_module(node.modname):
self.add_message(LAZY_BUILTIN_IMPORT.name, node=node, args=(node.modname,))

def visit_import(self, node: astroid.Import):
if self.in_function():
if builtin_modules := [name for name, _ in node.names if self.is_builtin_module(name)]:
self.add_message(
LAZY_BUILTIN_IMPORT.name, node=node, args=(", ".join(builtin_modules),)
)
17 changes: 17 additions & 0 deletions dev/clint/src/clint/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from dataclasses import dataclass

import tomli


@dataclass
class Config:
exclude: list[str]

@classmethod
def load(cls) -> Config:
with open("pyproject.toml", "rb") as f:
data = tomli.load(f)
exclude = data["tool"]["clint"]["exclude"]
return cls(exclude)
170 changes: 170 additions & 0 deletions dev/clint/src/clint/linter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from __future__ import annotations

import ast
import re
import tokenize
from dataclasses import dataclass

from clint.builtin import BUILTIN_MODULES

PARAM_REGEX = re.compile(r"\s+:param\s+\w+:", re.MULTILINE)
RETURN_REGEX = re.compile(r"\s+:returns?:", re.MULTILINE)
DISABLE_COMMENT_REGEX = re.compile(r"clint:\s*disable=([a-z0-9-]+)")


def ignore_map(code: str) -> dict[str, set[int]]:
"""
Creates a mapping of rule name to line numbers to ignore.
{
"<rule_name>": {<line_number>, ...},
...
}
"""
mapping: dict[str, set[int]] = {}
readline = iter(code.splitlines(True)).__next__
for tok in tokenize.generate_tokens(readline):
if tok.type != tokenize.COMMENT:
continue
if m := DISABLE_COMMENT_REGEX.search(tok.string):
mapping.setdefault(m.group(1), set()).add(tok.start[0])
return mapping


@dataclass
class Rule:
id: str
name: str
message: str


@dataclass
class Violation:
rule: Rule
path: str
lineno: int
col_offset: int

def __str__(self):
return f"{self.path}:{self.lineno}:{self.col_offset}: {self.rule.id}: {self.rule.message}"

def json(self) -> dict[str, str | int | None]:
return {
"type": "error",
"module": None,
"obj": None,
"line": self.lineno,
"column": self.col_offset,
"endLine": self.lineno,
"endColumn": self.col_offset,
"path": self.path,
"symbol": self.rule.name,
"message": self.rule.message,
"message-id": self.rule.id,
}


NO_RST = Rule(
"Z0001",
"no-rst",
"Do not use RST style. Use Google style instead.",
)
LAZY_BUILTIN_IMPORT = Rule(
"Z0002",
"lazy-builtin-import",
"Builtin modules must be imported at the top level.",
)

# TODO: Remove this once we convert all docstrings to Google style.
NO_RST_IGNORE = {
"mlflow/gateway/client.py",
"mlflow/gateway/providers/utils.py",
"mlflow/keras/callback.py",
"mlflow/metrics/base.py",
"mlflow/metrics/genai/base.py",
"mlflow/models/utils.py",
"mlflow/projects/databricks.py",
"mlflow/projects/kubernetes.py",
"mlflow/store/_unity_catalog/registry/rest_store.py",
"mlflow/store/artifact/azure_data_lake_artifact_repo.py",
"mlflow/store/artifact/gcs_artifact_repo.py",
"mlflow/store/model_registry/rest_store.py",
"mlflow/store/tracking/rest_store.py",
"mlflow/utils/docstring_utils.py",
"mlflow/utils/rest_utils.py",
"tests/utils/test_docstring_utils.py",
}


class Linter(ast.NodeVisitor):
def __init__(self, path: str, ignore: dict[str, set[int]]):
self.stack: list[ast.FunctionDef | ast.AsyncFunctionDef] = []
self.path = path
self.ignore = ignore
self.violations: list[Violation] = []

def _check(self, node: ast.AST, rule: Rule) -> None:
if (lines := self.ignore.get(rule.name)) and node.lineno in lines:
return
self.violations.append(Violation(rule, self.path, node.lineno, node.col_offset))

def _docstring(
self, node: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef
) -> ast.Constant | None:
if (
isinstance(node.body[0], ast.Expr)
and isinstance(node.body[0].value, ast.Constant)
and isinstance(node.body[0].value.s, str)
):
return node.body[0].value
return None

def _no_rst(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
if self.path in NO_RST_IGNORE:
return

if (nd := self._docstring(node)) and (
PARAM_REGEX.search(nd.s) or RETURN_REGEX.search(nd.s)
):
self._check(nd, NO_RST)

def _is_in_function(self) -> bool:
return bool(self.stack)

def visit_ClassDef(self, node: ast.ClassDef) -> None:
self.stack.append(node)
self._no_rst(node)
self.generic_visit(node)
self.stack.pop()

def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
self.stack.append(node)
self._no_rst(node)
self.generic_visit(node)
self.stack.pop()

def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
self.stack.append(node)
self._no_rst(node)
self.generic_visit(node)
self.stack.pop()

def visit_Import(self, node: ast.Import) -> None:
if self._is_in_function():
for alias in node.names:
if alias.name.split(".", 1)[0] in BUILTIN_MODULES:
self._check(node, LAZY_BUILTIN_IMPORT)
self.generic_visit(node)

def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
if self._is_in_function() and node.module.split(".", 1)[0] in BUILTIN_MODULES:
self._check(node, LAZY_BUILTIN_IMPORT)
self.generic_visit(node)


def lint_file(path: str) -> list[Violation]:
with open(path) as f:
code = f.read()
linter = Linter(path, ignore_map(code))
linter.visit(ast.parse(code))
return linter.violations
Loading

0 comments on commit 8915365

Please sign in to comment.