From faeef393e9abd0d0a29d57dfb73620959db43aa6 Mon Sep 17 00:00:00 2001 From: djvdq Date: Thu, 15 Aug 2024 22:49:40 +0200 Subject: [PATCH] Implemented #323: Added new check B910 to suggest using Counter() instead of defaultdict(int) --- README.rst | 2 ++ bugbear.py | 15 +++++++++++++++ tests/b910.py | 8 ++++++++ tests/test_bugbear.py | 12 ++++++++++++ 4 files changed, 37 insertions(+) create mode 100644 tests/b910.py diff --git a/README.rst b/README.rst index cacab45..406cd18 100644 --- a/README.rst +++ b/README.rst @@ -273,6 +273,8 @@ on the first line and urls or paths that are on their own line:: "https://some-super-long-domain-name.com/with/some/very/long/paths" ) +**B910**: Use Counter() instead of defaultdict(int) to avoid memory leaks. Using the latter can result in using as much as ~1000 MB of memory even for small and simple dicts. Refactoring the code to use the former can reduce memory usage from mentioned ~1000 MB to flat ~20 MB. + How to enable opinionated warnings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/bugbear.py b/bugbear.py index 96052aa..e0f8218 100644 --- a/bugbear.py +++ b/bugbear.py @@ -503,6 +503,7 @@ def visit_Call(self, node) -> None: self.check_for_b034(node) self.check_for_b039(node) self.check_for_b905(node) + self.check_for_b910(node) # no need for copying, if used in nested calls it will be set to None current_b040_caught_exception = self.b040_caught_exception @@ -1706,6 +1707,16 @@ def check_for_b909(self, node: ast.For) -> None: ): self.errors.append(B909(mutation.lineno, mutation.col_offset)) + def check_for_b910(self, node: ast.Call) -> None: + if ( + isinstance(node.func, ast.Name) + and node.func.id == "defaultdict" + and len(node.args) > 0 + and isinstance(node.args[0], ast.Name) + and node.args[0].id == "int" + ): + self.errors.append(B910(node.lineno, node.col_offset)) + def compose_call_path(node): if isinstance(node, ast.Attribute): @@ -2380,6 +2391,9 @@ def visit_Lambda(self, node) -> None: "B909 editing a loop's mutable iterable often leads to unexpected results/bugs" ) ) +B910 = Error( + message="B910 use Counter() instead of defaultdict(int) to avoid memory leaks" +) B950 = Error(message="B950 line too long ({} > {} characters)") @@ -2392,5 +2406,6 @@ def visit_Lambda(self, node) -> None: "B906", "B908", "B909", + "B910", "B950", ] diff --git a/tests/b910.py b/tests/b910.py new file mode 100644 index 0000000..0b98bc6 --- /dev/null +++ b/tests/b910.py @@ -0,0 +1,8 @@ +from collections import defaultdict + +a = defaultdict(int) +b = defaultdict(float) +c = defaultdict(bool) +d = defaultdict(str) +e = defaultdict() +f = defaultdict(int) diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index 50c04e6..bdc2552 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -57,6 +57,7 @@ B907, B908, B909, + B910, B950, BugBearChecker, BugBearVisitor, @@ -1042,6 +1043,17 @@ def test_b909(self): ] self.assertEqual(errors, self.errors(*expected)) + def test_b910(self): + filename = Path(__file__).absolute().parent / "b910.py" + mock_options = Namespace(select=[], extend_select=["B910"]) + bbc = BugBearChecker(filename=str(filename), options=mock_options) + errors = list(bbc.run()) + expected = [ + B910(3, 4), + B910(8, 4), + ] + self.assertEqual(errors, self.errors(*expected)) + class TestFuzz(unittest.TestCase): from hypothesis import HealthCheck, given, settings