Skip to content

Commit

Permalink
feat: Add dataframe.to_html (#259)
Browse files Browse the repository at this point in the history
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes b/296945119
  • Loading branch information
Genesis929 authored Dec 28, 2023
1 parent 0e1bbfc commit 2cd6489
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 0 deletions.
52 changes: 52 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,58 @@ def to_string(
encoding,
)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
) -> str:
return self.to_pandas().to_html(
buf,
columns, # type: ignore
col_space,
header,
index,
na_rep,
formatters,
float_format,
sparsify,
index_names,
justify, # type: ignore
max_rows,
max_cols,
show_dimensions,
decimal,
bold_rows,
classes,
escape,
notebook,
border,
table_id,
render_links,
encoding,
)

def to_markdown(
self,
buf=None,
Expand Down
9 changes: 9 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,15 @@ def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
assert bf_result == pd_result


def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
unsupported = ["numeric_col"] # formatted differently

bf_result = scalars_df_index.drop(columns=unsupported).to_html()
pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()

assert bf_result == pd_result


def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
# Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
bf_result = scalars_df_index.dropna().to_markdown()
Expand Down
124 changes: 124 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,130 @@ def to_string(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
):
"""Render a DataFrame as an HTML table.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_html())
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>col1</th>
<th>col2</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1</td>
<td>3</td>
</tr>
<tr>
<th>1</th>
<td>2</td>
<td>4</td>
</tr>
</tbody>
</table>
Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
columns (sequence, optional, default None):
The subset of columns to write. Writes all columns by default.
col_space (str or int, list or dict of int or str, optional):
The minimum width of each column in CSS length units. An int is
assumed to be px units.
header (bool, optional):
Whether to print column labels, default True.
index (bool, optional, default True):
Whether to print index (row) labels.
na_rep (str, optional, default 'NaN'):
String representation of NAN to use.
formatters (list, tuple or dict of one-param. functions, optional):
Formatter functions to apply to columns' elements by position or
name.
The result of each function must be a unicode string.
List/tuple must be of length equal to the number of columns.
float_format (one-parameter function, optional, default None):
Formatter function to apply to columns' elements if they are
floats. This function must return a unicode string and will
be applied only to the non-NaN elements, with NaN being
handled by na_rep.
sparsify (bool, optional, default True):
Set to False for a DataFrame with a hierarchical index to print
every multiindex key at each row.
index_names (bool, optional, default True):
Prints the names of the indexes.
justify (str, default None):
How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), 'right' out
of the box. Valid values are, 'left', 'right', 'center', 'justify',
'justify-all', 'start', 'end', 'inherit', 'match-parent', 'initial',
'unset'.
max_rows (int, optional):
Maximum number of rows to display in the console.
max_cols (int, optional):
Maximum number of columns to display in the console.
show_dimensions (bool, default False):
Display DataFrame dimensions (number of rows by number of columns).
decimal (str, default '.'):
Character recognized as decimal separator, e.g. ',' in Europe.
bold_rows (bool, default True):
Make the row labels bold in the output.
classes (str or list or tuple, default None):
CSS class(es) to apply to the resulting html table.
escape (bool, default True):
Convert the characters <, >, and & to HTML-safe sequences.
notebook (bool, default False):
Whether the generated HTML is for IPython Notebook.
border (int):
A border=border attribute is included in the opening <table>
tag. Default pd.options.display.html.border.
table_id (str, optional):
A css id is included in the opening <table> tag if specified.
render_links (bool, default False):
Convert URLs to HTML links.
encoding (str, default "utf-8"):
Set character encoding.
Returns:
str or None: If buf is None, returns the result as a string. Otherwise
returns None.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_markdown(
self,
buf=None,
Expand Down

0 comments on commit 2cd6489

Please sign in to comment.