diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e56f3ffc01e85..f61e4a8dbda80 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -31,6 +31,7 @@ Other enhancements - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6364072fd215c..07e8fa4841c04 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -982,6 +982,14 @@ def __init__( ) raise MergeError(msg) + # GH 59435: raise when "how" is not a valid Merge type + merge_type = {"left", "right", "inner", "outer", "cross", "asof"} + if how not in merge_type: + raise ValueError( + f"'{how}' is not a valid Merge type: " + f"left, right, inner, outer, cross, asof" + ) + self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) ( diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 7de87e633cfb1..479ea7d7ba692 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -1,4 +1,5 @@ from datetime import datetime +import re import zoneinfo import numpy as np @@ -276,7 +277,8 @@ def test_join_index(float_frame): tm.assert_index_equal(joined.index, float_frame.index.sort_values()) tm.assert_index_equal(joined.columns, expected_columns) - with pytest.raises(ValueError, match="join method"): + join_msg = "'foo' is not a valid Merge type: left, right, inner, outer, cross, asof" + with pytest.raises(ValueError, match=re.escape(join_msg)): f.join(f2, how="foo") # corner case - overlapping columns diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index cbee85f4aede9..d4766242b8460 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1456,6 +1456,18 @@ def test_merge_readonly(self): data1.merge(data2) # no error + def test_merge_how_validation(self): + # https://github.com/pandas-dev/pandas/issues/59422 + data1 = DataFrame( + np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"] + ) + data2 = DataFrame( + np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"] + ) + msg = "'full' is not a valid Merge type: left, right, inner, outer, cross, asof" + with pytest.raises(ValueError, match=re.escape(msg)): + data1.merge(data2, how="full") + def _check_merge(x, y): for how in ["inner", "left", "outer"]: