Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add return_dict parameter to DataFrame.info() method to return info as a dictionary #59387 #59457

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Other enhancements
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
- :meth:`DataFrame.info` now have a ``return_dict`` parameter (:issue:`#59387`)
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3529,17 +3529,21 @@ def info(
max_cols: int | None = None,
memory_usage: bool | str | None = None,
show_counts: bool | None = None,
return_dict: bool | None = None,
) -> None:
info = DataFrameInfo(
data=self,
memory_usage=memory_usage,
)
info.render(
info_return = info.render(
buf=buf,
max_cols=max_cols,
verbose=verbose,
show_counts=show_counts,
return_dict=return_dict,
)
if return_dict:
return info_return

def memory_usage(self, index: bool = True, deep: bool = False) -> Series:
"""
Expand Down
67 changes: 55 additions & 12 deletions pandas/io/formats/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@
)


return_dict_sub = dedent(
"""\
return_dict : bool, optional
Whether to return the summary as a dictionary. If True, the method
returns a dictionary containing information about the DataFrame.
If False, the summary is printed and None is returned."""
)


frame_examples_sub = dedent(
"""\
>>> int_values = [1, 2, 3, 4, 5]
Expand Down Expand Up @@ -136,7 +145,12 @@
1 column_2 1000000 non-null object
2 column_3 1000000 non-null object
dtypes: object(3)
memory usage: 165.9 MB"""
memory usage: 165.9 MB

>>> info_dict = df.info(return_dict=True)
>>> print(info_dict)
{'Column_summary': '...', 'Memory_usage': 24000128,
'Index_type': 'RangeIndex', 'Index_entries': 1000000}"""
)


Expand All @@ -153,6 +167,7 @@
"type_sub": " and columns",
"max_cols_sub": frame_max_cols_sub,
"show_counts_sub": show_counts_sub,
"return_dict_sub": return_dict_sub,
"examples_sub": frame_examples_sub,
"see_also_sub": frame_see_also_sub,
"version_added_sub": "",
Expand Down Expand Up @@ -233,6 +248,7 @@
"type_sub": "",
"max_cols_sub": "",
"show_counts_sub": show_counts_sub,
"return_dict_sub": return_dict_sub,
"examples_sub": series_examples_sub,
"see_also_sub": series_see_also_sub,
"version_added_sub": "\n.. versionadded:: 1.4.0\n",
Expand Down Expand Up @@ -273,11 +289,13 @@
:ref:`Frequently Asked Questions <df-memory-usage>` for more
details.
{show_counts_sub}
{return_dict_sub}

Returns
-------
None
This method prints a summary of a {klass} and returns None.
dict or None
If return_dict is True, returns a dictionary summarizing the {klass}.
Otherwise, returns None.

See Also
--------
Expand Down Expand Up @@ -435,7 +453,7 @@ def render(
max_cols: int | None,
verbose: bool | None,
show_counts: bool | None,
) -> None:
) -> None | dict:
pass


Expand Down Expand Up @@ -495,21 +513,46 @@ def memory_usage_bytes(self) -> int:
deep = self.memory_usage == "deep"
return self.data.memory_usage(index=True, deep=deep).sum()

def to_dict(self) -> dict:
"""Return DataFrame info as a dictionary."""
return {
"Column_summary": self._get_column_summary(),
"Memory_usage": self.memory_usage_bytes,
"Index_type": type(self.data.index).__name__,
"Index_entries": len(self.data.index),
}

def _get_column_summary(self) -> list[dict]:
"""Return a DataFrame summarizing columns."""
return [
{
"#": i,
"Column": col,
"Non-Null-Count": self.data[col].notna().sum(),
"Dtype": self.data[col].dtype,
}
for i, col in enumerate(self.ids)
]

def render(
self,
*,
buf: WriteBuffer[str] | None,
max_cols: int | None,
verbose: bool | None,
show_counts: bool | None,
) -> None:
printer = _DataFrameInfoPrinter(
info=self,
max_cols=max_cols,
verbose=verbose,
show_counts=show_counts,
)
printer.to_buffer(buf)
return_dict: bool | None,
) -> None | dict:
if return_dict:
return self.to_dict()
else:
printer = _DataFrameInfoPrinter(
info=self,
max_cols=max_cols,
verbose=verbose,
show_counts=show_counts,
)
printer.to_buffer(buf)


class SeriesInfo(_BaseInfo):
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/frame/methods/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,3 +569,27 @@ def test_info_show_counts(row, columns, show_counts, result):
with StringIO() as buf:
df.info(buf=buf, show_counts=show_counts)
assert ("non-null" in buf.getvalue()) is result


@pytest.mark.parametrize(
"df",
[
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
DataFrame({}),
],
)
def test_info_return_dict(df):
result = df.info(return_dict=True)
expected_keys = {"Column_summary", "Memory_usage", "Index_type", "Index_entries"}
assert isinstance(result, dict)
assert expected_keys.issubset(result.keys())

assert "Column_summary" in result
assert "Memory_usage" in result
assert "Index_type" in result
assert "Index_entries" in result

assert isinstance(result["Column_summary"], list)
assert isinstance(result["Memory_usage"], np.int64)
assert isinstance(result["Index_type"], str)
assert isinstance(result["Index_entries"], int)
Loading