From 90cc2196b42880e2291f948cbad2046bab12fed8 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Sat, 1 Jul 2023 14:00:23 +0200 Subject: [PATCH 1/5] Add B034: re.sub/subn/split must pass flags/count/maxsplit as keyword arguments. --- README.rst | 6 ++++++ bugbear.py | 28 +++++++++++++++++++++++++++- tests/b034.py | 30 ++++++++++++++++++++++++++++++ tests/test_bugbear.py | 19 +++++++++++++++++++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tests/b034.py diff --git a/README.rst b/README.rst index 9f3a96d..ed92f3c 100644 --- a/README.rst +++ b/README.rst @@ -188,6 +188,8 @@ second usage. Save the result to a list if the result is needed multiple times. **B033**: Sets should not contain duplicate items. Duplicate items will be replaced with a single item at runtime. +**B034**: Calls to `re.sub`, `re.subn` or `re.split` should pass `flags` or `output`/`maxsplit` as keyword arguments. Not doing so is a very common source of confusion, and will likely be deprecated in future python versions. + Opinionated warnings ~~~~~~~~~~~~~~~~~~~~ @@ -329,6 +331,10 @@ MIT Change Log ---------- +Future +~~~~~~ +* Add B034: re.sub/subn/split must pass flags/count/maxsplit as keyword arguments. + 23.6.5 ~~~~~~ diff --git a/bugbear.py b/bugbear.py index c5a0174..a604177 100644 --- a/bugbear.py +++ b/bugbear.py @@ -423,8 +423,9 @@ def visit_Call(self, node): self.check_for_b026(node) - self.check_for_b905(node) self.check_for_b028(node) + self.check_for_b034(node) + self.check_for_b905(node) self.generic_visit(node) def visit_Module(self, node): @@ -1368,6 +1369,25 @@ def check_for_b033(self, node): else: seen.add(elt.value) + def check_for_b034(self, node: ast.Call): + if not isinstance(node.func, ast.Attribute): + return + if not isinstance(node.func.value, ast.Name) or node.func.value.id != "re": + return + + def check(num_args, param_name): + if len(node.args) > num_args: + self.errors.append( + B034( + node.lineno, node.col_offset, vars=(node.func.attr, param_name) + ) + ) + + if node.func.attr in ("sub", "subn"): + check(3, "count") + elif node.func.attr == "split": + check(2, "maxsplit") + def compose_call_path(node): if isinstance(node, ast.Attribute): @@ -1772,6 +1792,12 @@ def visit_Lambda(self, node): ) ) +B034 = Error( + message=( + "B034 {} should pass `{}` and `flags` as keyword arguments to avoid confusion." + ) +) + # Warnings disabled by default. B901 = Error( message=( diff --git a/tests/b034.py b/tests/b034.py new file mode 100644 index 0000000..8c20009 --- /dev/null +++ b/tests/b034.py @@ -0,0 +1,30 @@ +import re +from re import sub + +# error +re.sub("a", "b", "aaa", re.IGNORECASE) +re.sub("a", "b", "aaa", 5) +re.sub("a", "b", "aaa", 5, re.IGNORECASE) +re.subn("a", "b", "aaa", re.IGNORECASE) +re.subn("a", "b", "aaa", 5) +re.subn("a", "b", "aaa", 5, re.IGNORECASE) +re.split(" ", "a a a a", re.I) +re.split(" ", "a a a a", 2) +re.split(" ", "a a a a", 2, re.I) + +# okay +re.sub("a", "b", "aaa") +re.sub("a", "b", "aaa", flags=re.IGNORECASE) +re.sub("a", "b", "aaa", count=5) +re.sub("a", "b", "aaa", count=5, flags=re.IGNORECASE) +re.subn("a", "b", "aaa") +re.subn("a", "b", "aaa", flags=re.IGNORECASE) +re.subn("a", "b", "aaa", count=5) +re.subn("a", "b", "aaa", count=5, flags=re.IGNORECASE) +re.split(" ", "a a a a", flags=re.I) +re.split(" ", "a a a a", maxsplit=2) +re.split(" ", "a a a a", maxsplit=2, flags=re.I) + + +# not covered +sub("a", "b", "aaa", re.IGNORECASE) diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index 9047a41..f378b42 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -45,6 +45,7 @@ B031, B032, B033, + B034, B901, B902, B903, @@ -505,6 +506,23 @@ def test_b033(self): ) self.assertEqual(errors, expected) + def test_b034(self): + filename = Path(__file__).absolute().parent / "b034.py" + bbc = BugBearChecker(filename=str(filename)) + errors = list(bbc.run()) + expected = self.errors( + B034(5, 0, vars=("sub", "count")), + B034(6, 0, vars=("sub", "count")), + B034(7, 0, vars=("sub", "count")), + B034(8, 0, vars=("subn", "count")), + B034(9, 0, vars=("subn", "count")), + B034(10, 0, vars=("subn", "count")), + B034(11, 0, vars=("split", "maxsplit")), + B034(12, 0, vars=("split", "maxsplit")), + B034(13, 0, vars=("split", "maxsplit")), + ) + self.assertEqual(errors, expected) + def test_b908(self): filename = Path(__file__).absolute().parent / "b908.py" bbc = BugBearChecker(filename=str(filename)) @@ -520,6 +538,7 @@ def test_b908(self): ) self.assertEqual(errors, expected) + @unittest.skipIf(sys.version_info < (3, 8), "not implemented for <3.8") def test_b907(self): filename = Path(__file__).absolute().parent / "b907.py" bbc = BugBearChecker(filename=str(filename)) From 972457930076cbd963dab8a671a7177a993ebff5 Mon Sep 17 00:00:00 2001 From: John Litborn <11260241+jakkdl@users.noreply.github.com> Date: Mon, 3 Jul 2023 12:52:40 +0200 Subject: [PATCH 2/5] remove <3.8 check accidentally added back --- tests/test_bugbear.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index f378b42..15fab8f 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -538,7 +538,6 @@ def test_b908(self): ) self.assertEqual(errors, expected) - @unittest.skipIf(sys.version_info < (3, 8), "not implemented for <3.8") def test_b907(self): filename = Path(__file__).absolute().parent / "b907.py" bbc = BugBearChecker(filename=str(filename)) From 8c8a05e8957191721b16c5fcfd7ce592d6040ca9 Mon Sep 17 00:00:00 2001 From: John Litborn <11260241+jakkdl@users.noreply.github.com> Date: Mon, 3 Jul 2023 12:53:07 +0200 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Jelle Zijlstra --- README.rst | 2 +- bugbear.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ed92f3c..a315dc0 100644 --- a/README.rst +++ b/README.rst @@ -188,7 +188,7 @@ second usage. Save the result to a list if the result is needed multiple times. **B033**: Sets should not contain duplicate items. Duplicate items will be replaced with a single item at runtime. -**B034**: Calls to `re.sub`, `re.subn` or `re.split` should pass `flags` or `output`/`maxsplit` as keyword arguments. Not doing so is a very common source of confusion, and will likely be deprecated in future python versions. +**B034**: Calls to `re.sub`, `re.subn` or `re.split` should pass `flags` or `output`/`maxsplit` as keyword arguments. Not doing so is a very common source of confusion. Opinionated warnings ~~~~~~~~~~~~~~~~~~~~ diff --git a/bugbear.py b/bugbear.py index a604177..44ad4b9 100644 --- a/bugbear.py +++ b/bugbear.py @@ -1379,7 +1379,7 @@ def check(num_args, param_name): if len(node.args) > num_args: self.errors.append( B034( - node.lineno, node.col_offset, vars=(node.func.attr, param_name) + node.args[num_args].lineno, node.args[num_args].col_offset, vars=(node.func.attr, param_name) ) ) From c166bce819867c926c678e45fa88d516eb600edf Mon Sep 17 00:00:00 2001 From: jakkdl Date: Mon, 3 Jul 2023 12:55:31 +0200 Subject: [PATCH 4/5] update column in testcase --- tests/test_bugbear.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index 15fab8f..910616e 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -511,15 +511,15 @@ def test_b034(self): bbc = BugBearChecker(filename=str(filename)) errors = list(bbc.run()) expected = self.errors( - B034(5, 0, vars=("sub", "count")), - B034(6, 0, vars=("sub", "count")), - B034(7, 0, vars=("sub", "count")), - B034(8, 0, vars=("subn", "count")), - B034(9, 0, vars=("subn", "count")), - B034(10, 0, vars=("subn", "count")), - B034(11, 0, vars=("split", "maxsplit")), - B034(12, 0, vars=("split", "maxsplit")), - B034(13, 0, vars=("split", "maxsplit")), + B034(5, 24, vars=("sub", "count")), + B034(6, 24, vars=("sub", "count")), + B034(7, 24, vars=("sub", "count")), + B034(8, 25, vars=("subn", "count")), + B034(9, 25, vars=("subn", "count")), + B034(10, 25, vars=("subn", "count")), + B034(11, 25, vars=("split", "maxsplit")), + B034(12, 25, vars=("split", "maxsplit")), + B034(13, 25, vars=("split", "maxsplit")), ) self.assertEqual(errors, expected) From cf0ede8ca21c434a1984135dc13ac94a97ce8cc3 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Mon, 10 Jul 2023 12:58:29 +0200 Subject: [PATCH 5/5] improved wording --- README.rst | 2 +- bugbear.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 904e471..52820f4 100644 --- a/README.rst +++ b/README.rst @@ -188,7 +188,7 @@ second usage. Save the result to a list if the result is needed multiple times. **B033**: Sets should not contain duplicate items. Duplicate items will be replaced with a single item at runtime. -**B034**: Calls to `re.sub`, `re.subn` or `re.split` should pass `flags` or `output`/`maxsplit` as keyword arguments. Not doing so is a very common source of confusion. +**B034**: Calls to `re.sub`, `re.subn` or `re.split` should pass `flags` or `count`/`maxsplit` as keyword arguments. It is commonly assumed that `flags` is the third positional parameter, forgetting about `count`/`maxsplit`, since many other `re` module functions are of the form `f(pattern, string, flags)`. Opinionated warnings ~~~~~~~~~~~~~~~~~~~~ diff --git a/bugbear.py b/bugbear.py index 8a7f884..216c69f 100644 --- a/bugbear.py +++ b/bugbear.py @@ -1794,7 +1794,8 @@ def visit_Lambda(self, node): B034 = Error( message=( - "B034 {} should pass `{}` and `flags` as keyword arguments to avoid confusion." + "B034 {} should pass `{}` and `flags` as keyword arguments to avoid confusion" + " due to unintuitive argument positions." ) )