Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/github/main' into shobs-co…
Browse files Browse the repository at this point in the history
…de-samples-map-etc
  • Loading branch information
shobsi committed Dec 28, 2023
2 parents 939881d + 746115d commit 7932fd9
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 9 deletions.
66 changes: 60 additions & 6 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,58 @@ def to_string(
encoding,
)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
) -> str:
return self.to_pandas().to_html(
buf,
columns, # type: ignore
col_space,
header,
index,
na_rep,
formatters,
float_format,
sparsify,
index_names,
justify, # type: ignore
max_rows,
max_cols,
show_dimensions,
decimal,
bold_rows,
classes,
escape,
notebook,
border,
table_id,
render_links,
encoding,
)

def to_markdown(
self,
buf=None,
Expand All @@ -2707,26 +2759,28 @@ def _apply_unary_op(self, operation: ops.UnaryOp) -> DataFrame:
def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
"""Create query text representing this dataframe for I/O."""
array_value = self._block.expr

new_col_labels, new_idx_labels = utils.get_standardized_ids(
self._block.column_labels, self.index.names
)

columns = list(self._block.value_columns)
column_labels = list(self._block.column_labels)
column_labels = new_col_labels
# This code drops unnamed indexes to keep consistent with the behavior of
# most pandas write APIs. The exception is `pandas.to_csv`, which keeps
# unnamed indexes as `Unnamed: 0`.
# TODO(chelsealin): check if works for multiple indexes.
if index and self.index.name is not None:
columns.extend(self._block.index_columns)
column_labels.extend(self.index.names)
column_labels.extend(new_idx_labels)
else:
array_value = array_value.drop_columns(self._block.index_columns)

# Make columns in SQL reflect _labels_ not _ids_. Note: This may use
# the arbitrary unicode column labels feature in BigQuery, which is
# currently (June 2023) in preview.
# TODO(swast): Handle duplicate and NULL labels.
id_overrides = {
col_id: col_label
for col_id, col_label in zip(columns, column_labels)
if col_label and isinstance(col_label, str)
col_id: col_label for col_id, col_label in zip(columns, column_labels)
}

if ordering_id is not None:
Expand Down
10 changes: 7 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,9 +518,13 @@ def prerelease(session: nox.sessions.Session, tests_path):
"--prefer-binary",
"--pre",
"--upgrade",
# TODO(shobs): Remove tying to version 2.1.3 after
# https://github.com/pandas-dev/pandas/issues/56463 is resolved
"pandas!=2.1.4",
# TODO(shobs): Remove excluding version 2.1.4 after
# https://github.com/pandas-dev/pandas/issues/56463 is resolved.
#
# TODO(shobs): Remove excluding version 2.2.0rc0 after
# https://github.com/pandas-dev/pandas/issues/56646 and
# https://github.com/pandas-dev/pandas/issues/56651 are resolved.
"pandas!=2.1.4,!=2.2.0rc0",
)
already_installed.add("pandas")

Expand Down
9 changes: 9 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,15 @@ def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
assert bf_result == pd_result


def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
unsupported = ["numeric_col"] # formatted differently

bf_result = scalars_df_index.drop(columns=unsupported).to_html()
pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()

assert bf_result == pd_result


def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
# Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
bf_result = scalars_df_index.dropna().to_markdown()
Expand Down
44 changes: 44 additions & 0 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,50 @@ def test_to_gbq_if_exists(
)


def test_to_gbq_w_duplicate_column_names(
scalars_df_index, scalars_pandas_df_index, dataset_id
):
"""Test the `to_gbq` API when dealing with duplicate column names."""
destination_table = f"{dataset_id}.test_to_gbq_w_duplicate_column_names"

# Renaming 'int64_too' to 'int64_col', which will result in 'int64_too'
# becoming 'int64_col_1' after deduplication.
scalars_df_index = scalars_df_index.rename(columns={"int64_too": "int64_col"})
scalars_df_index.to_gbq(destination_table, if_exists="replace")

bf_result = bpd.read_gbq(destination_table, index_col="rowindex").to_pandas()

pd.testing.assert_series_equal(
scalars_pandas_df_index["int64_col"], bf_result["int64_col"]
)
pd.testing.assert_series_equal(
scalars_pandas_df_index["int64_too"],
bf_result["int64_col_1"],
check_names=False,
)


def test_to_gbq_w_None_column_names(
scalars_df_index, scalars_pandas_df_index, dataset_id
):
"""Test the `to_gbq` API with None as a column name."""
destination_table = f"{dataset_id}.test_to_gbq_w_none_column_names"

scalars_df_index = scalars_df_index.rename(columns={"int64_too": None})
scalars_df_index.to_gbq(destination_table, if_exists="replace")

bf_result = bpd.read_gbq(destination_table, index_col="rowindex").to_pandas()

pd.testing.assert_series_equal(
scalars_pandas_df_index["int64_col"], bf_result["int64_col"]
)
pd.testing.assert_series_equal(
scalars_pandas_df_index["int64_too"],
bf_result["bigframes_unnamed_column"],
check_names=False,
)


def test_to_gbq_w_invalid_destination_table(scalars_df_index):
with pytest.raises(ValueError):
scalars_df_index.to_gbq("table_id")
Expand Down
124 changes: 124 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,130 @@ def to_string(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
):
"""Render a DataFrame as an HTML table.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_html())
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>col1</th>
<th>col2</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1</td>
<td>3</td>
</tr>
<tr>
<th>1</th>
<td>2</td>
<td>4</td>
</tr>
</tbody>
</table>
Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
columns (sequence, optional, default None):
The subset of columns to write. Writes all columns by default.
col_space (str or int, list or dict of int or str, optional):
The minimum width of each column in CSS length units. An int is
assumed to be px units.
header (bool, optional):
Whether to print column labels, default True.
index (bool, optional, default True):
Whether to print index (row) labels.
na_rep (str, optional, default 'NaN'):
String representation of NAN to use.
formatters (list, tuple or dict of one-param. functions, optional):
Formatter functions to apply to columns' elements by position or
name.
The result of each function must be a unicode string.
List/tuple must be of length equal to the number of columns.
float_format (one-parameter function, optional, default None):
Formatter function to apply to columns' elements if they are
floats. This function must return a unicode string and will
be applied only to the non-NaN elements, with NaN being
handled by na_rep.
sparsify (bool, optional, default True):
Set to False for a DataFrame with a hierarchical index to print
every multiindex key at each row.
index_names (bool, optional, default True):
Prints the names of the indexes.
justify (str, default None):
How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), 'right' out
of the box. Valid values are, 'left', 'right', 'center', 'justify',
'justify-all', 'start', 'end', 'inherit', 'match-parent', 'initial',
'unset'.
max_rows (int, optional):
Maximum number of rows to display in the console.
max_cols (int, optional):
Maximum number of columns to display in the console.
show_dimensions (bool, default False):
Display DataFrame dimensions (number of rows by number of columns).
decimal (str, default '.'):
Character recognized as decimal separator, e.g. ',' in Europe.
bold_rows (bool, default True):
Make the row labels bold in the output.
classes (str or list or tuple, default None):
CSS class(es) to apply to the resulting html table.
escape (bool, default True):
Convert the characters <, >, and & to HTML-safe sequences.
notebook (bool, default False):
Whether the generated HTML is for IPython Notebook.
border (int):
A border=border attribute is included in the opening <table>
tag. Default pd.options.display.html.border.
table_id (str, optional):
A css id is included in the opening <table> tag if specified.
render_links (bool, default False):
Convert URLs to HTML links.
encoding (str, default "utf-8"):
Set character encoding.
Returns:
str or None: If buf is None, returns the result as a string. Otherwise
returns None.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_markdown(
self,
buf=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_dummies(
prepended to the value.
**Examples:**
>>> import bigframes.pandas as pd
>>> pd.options.display.progress_bar = None
>>> s = pd.Series(list('abca'))
Expand Down

0 comments on commit 7932fd9

Please sign in to comment.