diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3de65fe6f682c..9f488e3c51361 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -48,6 +48,7 @@ Other enhancements - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) +- :meth:`DataFrame.info` now have a ``return_dict`` parameter (:issue:`#59387`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b8039746d9952..0934b698b4efa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3529,17 +3529,21 @@ def info( max_cols: int | None = None, memory_usage: bool | str | None = None, show_counts: bool | None = None, + return_dict: bool | None = None, ) -> None: info = DataFrameInfo( data=self, memory_usage=memory_usage, ) - info.render( + info_return = info.render( buf=buf, max_cols=max_cols, verbose=verbose, show_counts=show_counts, + return_dict=return_dict, ) + if return_dict: + return info_return def memory_usage(self, index: bool = True, deep: bool = False) -> Series: """ diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 469dcfb76ba0b..8629eb0ff6368 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -54,6 +54,15 @@ ) +return_dict_sub = dedent( + """\ + return_dict : bool, optional + Whether to return the summary as a dictionary. If True, the method + returns a dictionary containing information about the DataFrame. + If False, the summary is printed and None is returned.""" +) + + frame_examples_sub = dedent( """\ >>> int_values = [1, 2, 3, 4, 5] @@ -136,7 +145,12 @@ 1 column_2 1000000 non-null object 2 column_3 1000000 non-null object dtypes: object(3) - memory usage: 165.9 MB""" + memory usage: 165.9 MB + + >>> info_dict = df.info(return_dict=True) + >>> print(info_dict) + {'Column_summary': '...', 'Memory_usage': 24000128, + 'Index_type': 'RangeIndex', 'Index_entries': 1000000}""" ) @@ -153,6 +167,7 @@ "type_sub": " and columns", "max_cols_sub": frame_max_cols_sub, "show_counts_sub": show_counts_sub, + "return_dict_sub": return_dict_sub, "examples_sub": frame_examples_sub, "see_also_sub": frame_see_also_sub, "version_added_sub": "", @@ -233,6 +248,7 @@ "type_sub": "", "max_cols_sub": "", "show_counts_sub": show_counts_sub, + "return_dict_sub": return_dict_sub, "examples_sub": series_examples_sub, "see_also_sub": series_see_also_sub, "version_added_sub": "\n.. versionadded:: 1.4.0\n", @@ -273,11 +289,13 @@ :ref:`Frequently Asked Questions ` for more details. {show_counts_sub} + {return_dict_sub} Returns ------- - None - This method prints a summary of a {klass} and returns None. + dict or None + If return_dict is True, returns a dictionary summarizing the {klass}. + Otherwise, returns None. See Also -------- @@ -435,7 +453,7 @@ def render( max_cols: int | None, verbose: bool | None, show_counts: bool | None, - ) -> None: + ) -> None | dict: pass @@ -495,6 +513,27 @@ def memory_usage_bytes(self) -> int: deep = self.memory_usage == "deep" return self.data.memory_usage(index=True, deep=deep).sum() + def to_dict(self) -> dict: + """Return DataFrame info as a dictionary.""" + return { + "Column_summary": self._get_column_summary(), + "Memory_usage": self.memory_usage_bytes, + "Index_type": type(self.data.index).__name__, + "Index_entries": len(self.data.index), + } + + def _get_column_summary(self) -> list[dict]: + """Return a DataFrame summarizing columns.""" + return [ + { + "#": i, + "Column": col, + "Non-Null-Count": self.data[col].notna().sum(), + "Dtype": self.data[col].dtype, + } + for i, col in enumerate(self.ids) + ] + def render( self, *, @@ -502,14 +541,18 @@ def render( max_cols: int | None, verbose: bool | None, show_counts: bool | None, - ) -> None: - printer = _DataFrameInfoPrinter( - info=self, - max_cols=max_cols, - verbose=verbose, - show_counts=show_counts, - ) - printer.to_buffer(buf) + return_dict: bool | None, + ) -> None | dict: + if return_dict: + return self.to_dict() + else: + printer = _DataFrameInfoPrinter( + info=self, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + printer.to_buffer(buf) class SeriesInfo(_BaseInfo): diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index a4319f8a8ae7f..bd7aa0e8f1f06 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -569,3 +569,27 @@ def test_info_show_counts(row, columns, show_counts, result): with StringIO() as buf: df.info(buf=buf, show_counts=show_counts) assert ("non-null" in buf.getvalue()) is result + + +@pytest.mark.parametrize( + "df", + [ + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + DataFrame({}), + ], +) +def test_info_return_dict(df): + result = df.info(return_dict=True) + expected_keys = {"Column_summary", "Memory_usage", "Index_type", "Index_entries"} + assert isinstance(result, dict) + assert expected_keys.issubset(result.keys()) + + assert "Column_summary" in result + assert "Memory_usage" in result + assert "Index_type" in result + assert "Index_entries" in result + + assert isinstance(result["Column_summary"], list) + assert isinstance(result["Memory_usage"], np.int64) + assert isinstance(result["Index_type"], str) + assert isinstance(result["Index_entries"], int)