From 35fbd2a852be2c47e8006429afe9b3ad4b1bfac2 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Sat, 11 May 2024 05:11:02 +0530 Subject: [PATCH] Add Error format support, and JSON output option (#11396) ### Description Resolves #10816 The changes this PR makes are relatively small. It currently: - Adds an `--output` option to mypy CLI - Adds a `ErrorFormatter` abstract base class, which can be subclassed to create new output formats - Adds a `MypyError` class that represents the external format of a mypy error. - Adds a check for `--output` being `'json'`, in which case the `JSONFormatter` is used to produce the reported output. #### Demo: ```console $ mypy mytest.py mytest.py:2: error: Incompatible types in assignment (expression has type "str", variable has type "int") mytest.py:3: error: Name "z" is not defined Found 2 errors in 1 file (checked 1 source file) $ mypy mytest.py --output=json {"file": "mytest.py", "line": 2, "column": 4, "severity": "error", "message": "Incompatible types in assignment (expression has type \"str\", variable has type \"int\")", "code": "assignment"} {"file": "mytest.py", "line": 3, "column": 4, "severity": "error", "message": "Name \"z\" is not defined", "code": "name-defined"} ``` --- A few notes regarding the changes: - I chose to re-use the intermediate `ErrorTuple`s created during error reporting, instead of using the more general `ErrorInfo` class, because a lot of machinery already exists in mypy for sorting and removing duplicate error reports, which produces `ErrorTuple`s at the end. The error sorting and duplicate removal logic could perhaps be separated out from the rest of the code, to be able to use `ErrorInfo` objects more freely. - `ErrorFormatter` doesn't really need to be an abstract class, but I think it would be better this way. If there's a different method that would be preferred, I'd be happy to know. - The `--output` CLI option is, most probably, not added in the correct place. Any help in how to do it properly would be appreciated, the mypy option parsing code seems very complex. - The ability to add custom output formats can be simply added by subclassing the `ErrorFormatter` class inside a mypy plugin, and adding a `name` field to the formatters. The mypy runtime can then check through the `__subclasses__` of the formatter and determine if such a formatter is present. The "checking for the `name` field" part of this code might be appropriate to add within this PR itself, instead of hard-coding `JSONFormatter`. Does that sound like a good idea? --------- Co-authored-by: Tushar Sadhwani <86737547+tushar-deepsource@users.noreply.github.com> Co-authored-by: Tushar Sadhwani --- mypy/build.py | 11 ++--- mypy/error_formatter.py | 37 +++++++++++++++++ mypy/errors.py | 75 ++++++++++++++++++++++++++++++---- mypy/main.py | 17 +++++++- mypy/options.py | 4 +- mypy/test/testoutput.py | 58 ++++++++++++++++++++++++++ mypy/util.py | 9 +++- test-data/unit/outputjson.test | 44 ++++++++++++++++++++ 8 files changed, 239 insertions(+), 16 deletions(-) create mode 100644 mypy/error_formatter.py create mode 100644 mypy/test/testoutput.py create mode 100644 test-data/unit/outputjson.test diff --git a/mypy/build.py b/mypy/build.py index 84c85e66bd49..3ceb473f0948 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -44,6 +44,7 @@ import mypy.semanal_main from mypy.checker import TypeChecker +from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor @@ -253,6 +254,7 @@ def _build( plugin=plugin, plugins_snapshot=snapshot, errors=errors, + error_formatter=None if options.output is None else OUTPUT_CHOICES.get(options.output), flush_errors=flush_errors, fscache=fscache, stdout=stdout, @@ -607,6 +609,7 @@ def __init__( fscache: FileSystemCache, stdout: TextIO, stderr: TextIO, + error_formatter: ErrorFormatter | None = None, ) -> None: self.stats: dict[str, Any] = {} # Values are ints or floats self.stdout = stdout @@ -615,6 +618,7 @@ def __init__( self.data_dir = data_dir self.errors = errors self.errors.set_ignore_prefix(ignore_prefix) + self.error_formatter = error_formatter self.search_paths = search_paths self.source_set = source_set self.reports = reports @@ -3463,11 +3467,8 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No for id in stale: graph[id].transitive_error = True for id in stale: - manager.flush_errors( - manager.errors.simplify_path(graph[id].xpath), - manager.errors.file_messages(graph[id].xpath), - False, - ) + errors = manager.errors.file_messages(graph[id].xpath, formatter=manager.error_formatter) + manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False) graph[id].write_cache() graph[id].mark_as_rechecked() diff --git a/mypy/error_formatter.py b/mypy/error_formatter.py new file mode 100644 index 000000000000..ffc6b6747596 --- /dev/null +++ b/mypy/error_formatter.py @@ -0,0 +1,37 @@ +"""Defines the different custom formats in which mypy can output.""" + +import json +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from mypy.errors import MypyError + + +class ErrorFormatter(ABC): + """Base class to define how errors are formatted before being printed.""" + + @abstractmethod + def report_error(self, error: "MypyError") -> str: + raise NotImplementedError + + +class JSONFormatter(ErrorFormatter): + """Formatter for basic JSON output format.""" + + def report_error(self, error: "MypyError") -> str: + """Prints out the errors as simple, static JSON lines.""" + return json.dumps( + { + "file": error.file_path, + "line": error.line, + "column": error.column, + "message": error.message, + "hint": None if len(error.hints) == 0 else "\n".join(error.hints), + "code": None if error.errorcode is None else error.errorcode.code, + "severity": error.severity, + } + ) + + +OUTPUT_CHOICES = {"json": JSONFormatter()} diff --git a/mypy/errors.py b/mypy/errors.py index eabe96a2dc73..7a937da39c20 100644 --- a/mypy/errors.py +++ b/mypy/errors.py @@ -8,6 +8,7 @@ from typing_extensions import Literal, TypeAlias as _TypeAlias from mypy import errorcodes as codes +from mypy.error_formatter import ErrorFormatter from mypy.errorcodes import IMPORT, IMPORT_NOT_FOUND, IMPORT_UNTYPED, ErrorCode, mypy_error_codes from mypy.message_registry import ErrorMessage from mypy.options import Options @@ -834,7 +835,7 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn: ) def format_messages( - self, error_info: list[ErrorInfo], source_lines: list[str] | None + self, error_tuples: list[ErrorTuple], source_lines: list[str] | None ) -> list[str]: """Return a string list that represents the error messages. @@ -843,9 +844,6 @@ def format_messages( severity 'error'). """ a: list[str] = [] - error_info = [info for info in error_info if not info.hidden] - errors = self.render_messages(self.sort_messages(error_info)) - errors = self.remove_duplicates(errors) for ( file, line, @@ -856,7 +854,7 @@ def format_messages( message, allow_dups, code, - ) in errors: + ) in error_tuples: s = "" if file is not None: if self.options.show_column_numbers and line >= 0 and column >= 0: @@ -901,18 +899,28 @@ def format_messages( a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker) return a - def file_messages(self, path: str) -> list[str]: + def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> list[str]: """Return a string list of new error messages from a given file. Use a form suitable for displaying to the user. """ if path not in self.error_info_map: return [] + + error_info = self.error_info_map[path] + error_info = [info for info in error_info if not info.hidden] + error_tuples = self.render_messages(self.sort_messages(error_info)) + error_tuples = self.remove_duplicates(error_tuples) + + if formatter is not None: + errors = create_errors(error_tuples) + return [formatter.report_error(err) for err in errors] + self.flushed_files.add(path) source_lines = None if self.options.pretty and self.read_source: source_lines = self.read_source(path) - return self.format_messages(self.error_info_map[path], source_lines) + return self.format_messages(error_tuples, source_lines) def new_messages(self) -> list[str]: """Return a string list of new error messages. @@ -1278,3 +1286,56 @@ def report_internal_error( # Exit. The caller has nothing more to say. # We use exit code 2 to signal that this is no ordinary error. raise SystemExit(2) + + +class MypyError: + def __init__( + self, + file_path: str, + line: int, + column: int, + message: str, + errorcode: ErrorCode | None, + severity: Literal["error", "note"], + ) -> None: + self.file_path = file_path + self.line = line + self.column = column + self.message = message + self.errorcode = errorcode + self.severity = severity + self.hints: list[str] = [] + + +# (file_path, line, column) +_ErrorLocation = Tuple[str, int, int] + + +def create_errors(error_tuples: list[ErrorTuple]) -> list[MypyError]: + errors: list[MypyError] = [] + latest_error_at_location: dict[_ErrorLocation, MypyError] = {} + + for error_tuple in error_tuples: + file_path, line, column, _, _, severity, message, _, errorcode = error_tuple + if file_path is None: + continue + + assert severity in ("error", "note") + if severity == "note": + error_location = (file_path, line, column) + error = latest_error_at_location.get(error_location) + if error is None: + # This is purely a note, with no error correlated to it + error = MypyError(file_path, line, column, message, errorcode, severity="note") + errors.append(error) + continue + + error.hints.append(message) + + else: + error = MypyError(file_path, line, column, message, errorcode, severity="error") + errors.append(error) + error_location = (file_path, line, column) + latest_error_at_location[error_location] = error + + return errors diff --git a/mypy/main.py b/mypy/main.py index c2df79d51e83..489ef8fd9a7b 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -18,6 +18,7 @@ parse_version, validate_package_allow_list, ) +from mypy.error_formatter import OUTPUT_CHOICES from mypy.errorcodes import error_codes from mypy.errors import CompileError from mypy.find_sources import InvalidSourceList, create_source_list @@ -72,7 +73,9 @@ def main( if clean_exit: options.fast_exit = False - formatter = util.FancyFormatter(stdout, stderr, options.hide_error_codes) + formatter = util.FancyFormatter( + stdout, stderr, options.hide_error_codes, hide_success=bool(options.output) + ) if options.install_types and (stdout is not sys.stdout or stderr is not sys.stderr): # Since --install-types performs user input, we want regular stdout and stderr. @@ -156,7 +159,9 @@ def run_build( stdout: TextIO, stderr: TextIO, ) -> tuple[build.BuildResult | None, list[str], bool]: - formatter = util.FancyFormatter(stdout, stderr, options.hide_error_codes) + formatter = util.FancyFormatter( + stdout, stderr, options.hide_error_codes, hide_success=bool(options.output) + ) messages = [] messages_by_file = defaultdict(list) @@ -525,6 +530,14 @@ def add_invertible_flag( stdout=stdout, ) + general_group.add_argument( + "-O", + "--output", + metavar="FORMAT", + help="Set a custom output format", + choices=OUTPUT_CHOICES, + ) + config_group = parser.add_argument_group( title="Config file", description="Use a config file instead of command line arguments. " diff --git a/mypy/options.py b/mypy/options.py index bf9c09f1bf4b..91639828801e 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -376,10 +376,12 @@ def __init__(self) -> None: self.disable_bytearray_promotion = False self.disable_memoryview_promotion = False - self.force_uppercase_builtins = False self.force_union_syntax = False + # Sets custom output format + self.output: str | None = None + def use_lowercase_names(self) -> bool: if self.python_version >= (3, 9): return not self.force_uppercase_builtins diff --git a/mypy/test/testoutput.py b/mypy/test/testoutput.py new file mode 100644 index 000000000000..41f6881658c8 --- /dev/null +++ b/mypy/test/testoutput.py @@ -0,0 +1,58 @@ +"""Test cases for `--output=json`. + +These cannot be run by the usual unit test runner because of the backslashes in +the output, which get normalized to forward slashes by the test suite on Windows. +""" + +from __future__ import annotations + +import os +import os.path + +from mypy import api +from mypy.defaults import PYTHON3_VERSION +from mypy.test.config import test_temp_dir +from mypy.test.data import DataDrivenTestCase, DataSuite + + +class OutputJSONsuite(DataSuite): + files = ["outputjson.test"] + + def run_case(self, testcase: DataDrivenTestCase) -> None: + test_output_json(testcase) + + +def test_output_json(testcase: DataDrivenTestCase) -> None: + """Runs Mypy in a subprocess, and ensures that `--output=json` works as intended.""" + mypy_cmdline = ["--output=json"] + mypy_cmdline.append(f"--python-version={'.'.join(map(str, PYTHON3_VERSION))}") + + # Write the program to a file. + program_path = os.path.join(test_temp_dir, "main") + mypy_cmdline.append(program_path) + with open(program_path, "w", encoding="utf8") as file: + for s in testcase.input: + file.write(f"{s}\n") + + output = [] + # Type check the program. + out, err, returncode = api.run(mypy_cmdline) + # split lines, remove newlines, and remove directory of test case + for line in (out + err).rstrip("\n").splitlines(): + if line.startswith(test_temp_dir + os.sep): + output.append(line[len(test_temp_dir + os.sep) :].rstrip("\r\n")) + else: + output.append(line.rstrip("\r\n")) + + if returncode > 1: + output.append("!!! Mypy crashed !!!") + + # Remove temp file. + os.remove(program_path) + + # JSON encodes every `\` character into `\\`, so we need to remove `\\` from windows paths + # and `/` from POSIX paths + json_os_separator = os.sep.replace("\\", "\\\\") + normalized_output = [line.replace(test_temp_dir + json_os_separator, "") for line in output] + + assert normalized_output == testcase.output diff --git a/mypy/util.py b/mypy/util.py index bbb5a8610f7f..4b1b918b92e6 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -563,8 +563,12 @@ class FancyFormatter: This currently only works on Linux and Mac. """ - def __init__(self, f_out: IO[str], f_err: IO[str], hide_error_codes: bool) -> None: + def __init__( + self, f_out: IO[str], f_err: IO[str], hide_error_codes: bool, hide_success: bool = False + ) -> None: self.hide_error_codes = hide_error_codes + self.hide_success = hide_success + # Check if we are in a human-facing terminal on a supported platform. if sys.platform not in ("linux", "darwin", "win32", "emscripten"): self.dummy_term = True @@ -793,6 +797,9 @@ def format_success(self, n_sources: int, use_color: bool = True) -> str: n_sources is total number of files passed directly on command line, i.e. excluding stubs and followed imports. """ + if self.hide_success: + return "" + msg = f"Success: no issues found in {n_sources} source file{plural_s(n_sources)}" if not use_color: return msg diff --git a/test-data/unit/outputjson.test b/test-data/unit/outputjson.test new file mode 100644 index 000000000000..43649b7b781d --- /dev/null +++ b/test-data/unit/outputjson.test @@ -0,0 +1,44 @@ +-- Test cases for `--output=json`. +-- These cannot be run by the usual unit test runner because of the backslashes +-- in the output, which get normalized to forward slashes by the test suite on +-- Windows. + +[case testOutputJsonNoIssues] +# flags: --output=json +def foo() -> None: + pass + +foo() +[out] + +[case testOutputJsonSimple] +# flags: --output=json +def foo() -> None: + pass + +foo(1) +[out] +{"file": "main", "line": 5, "column": 0, "message": "Too many arguments for \"foo\"", "hint": null, "code": "call-arg", "severity": "error"} + +[case testOutputJsonWithHint] +# flags: --output=json +from typing import Optional, overload + +@overload +def foo() -> None: ... +@overload +def foo(x: int) -> None: ... + +def foo(x: Optional[int] = None) -> None: + ... + +reveal_type(foo) + +foo('42') + +def bar() -> None: ... +bar('42') +[out] +{"file": "main", "line": 12, "column": 12, "message": "Revealed type is \"Overload(def (), def (x: builtins.int))\"", "hint": null, "code": "misc", "severity": "note"} +{"file": "main", "line": 14, "column": 0, "message": "No overload variant of \"foo\" matches argument type \"str\"", "hint": "Possible overload variants:\n def foo() -> None\n def foo(x: int) -> None", "code": "call-overload", "severity": "error"} +{"file": "main", "line": 17, "column": 0, "message": "Too many arguments for \"bar\"", "hint": null, "code": "call-arg", "severity": "error"}