From 7f53aebcd8592b7639ba97506225b1d044cff3f5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 25 Oct 2019 02:34:05 +0200 Subject: [PATCH] Fix more name_like_string. (#950) --- databricks/koalas/frame.py | 11 ++++++----- databricks/koalas/indexes.py | 3 ++- databricks/koalas/internal.py | 5 +++-- databricks/koalas/namespace.py | 5 +++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py index 47f642d7bb..ac308be5fd 100644 --- a/databricks/koalas/frame.py +++ b/databricks/koalas/frame.py @@ -2358,8 +2358,8 @@ def rename(index): index_name if index_name is not None else rename(i))) index_map.remove(info) - new_data_scols = [ - self._internal.scol_for(column).alias(str(name)) for column, name in new_index_map] + new_data_scols = [self._internal.scol_for(column).alias(name_like_string(name)) + for column, name in new_index_map] if len(index_map) > 0: index_scols = [scol_for(self._sdf, column) for column, _ in index_map] @@ -2379,7 +2379,8 @@ def rename(index): internal = self._internal.copy( sdf=sdf, - data_columns=[str(name) for _, name in new_index_map] + self._internal.data_columns, + data_columns=([name_like_string(name) for _, name in new_index_map] + + self._internal.data_columns), index_map=index_map, column_index=None) @@ -2820,7 +2821,7 @@ def duplicated(self, subset=None, keep='first'): name = self._internal.index_names[0] else: name = ('0',) - column = str(name) if len(name) > 1 else name[0] + column = name_like_string(name) sdf = self._sdf if column == index_column: @@ -3393,7 +3394,7 @@ def _assign(self, kwargs): adding_column_index = [] for idx, scol in pairs.items(): if idx not in set(i[:len(idx)] for i in self._internal.column_index): - name = str(idx) if len(idx) > 1 else idx[0] + name = name_like_string(idx) scols.append(scol.alias(name)) adding_data_columns.append(name) adding_column_index.append(idx) diff --git a/databricks/koalas/indexes.py b/databricks/koalas/indexes.py index 4fbc297800..384bdd96d5 100644 --- a/databricks/koalas/indexes.py +++ b/databricks/koalas/indexes.py @@ -35,6 +35,7 @@ from databricks.koalas.frame import DataFrame from databricks.koalas.missing.indexes import _MissingPandasLikeIndex, _MissingPandasLikeMultiIndex from databricks.koalas.series import Series +from databricks.koalas.utils import name_like_string class Index(IndexOpsMixin): @@ -232,7 +233,7 @@ def to_series(self, name: Union[str, Tuple[str, ...]] = None) -> Series: kdf = self._kdf scol = self._scol if name is not None: - scol = scol.alias(str(name)) + scol = scol.alias(name_like_string(name)) column_index = [None] if len(kdf._internal.index_map) > 1 else kdf._internal.index_names return Series(kdf._internal.copy(scol=scol, column_index=column_index, diff --git a/databricks/koalas/internal.py b/databricks/koalas/internal.py index efbffabad0..0701c3388b 100644 --- a/databricks/koalas/internal.py +++ b/databricks/koalas/internal.py @@ -727,14 +727,15 @@ def from_pandas(pdf: pd.DataFrame) -> '_InternalFrame': for i, name in enumerate(index.names)] else: name = index.name - index_map = [(str(name) if name is not None else SPARK_INDEX_NAME_FORMAT(0), + index_map = [(name_like_string(name) + if name is not None else SPARK_INDEX_NAME_FORMAT(0), name if name is None or isinstance(name, tuple) else (name,))] index_columns = [index_column for index_column, _ in index_map] reset_index = pdf.reset_index() reset_index.columns = index_columns + data_columns - schema = StructType([StructField(str(name), infer_pd_series_spark_type(col), + schema = StructType([StructField(name_like_string(name), infer_pd_series_spark_type(col), nullable=bool(col.isnull().any())) for name, col in reset_index.iteritems()]) for name, col in reset_index.iteritems(): diff --git a/databricks/koalas/namespace.py b/databricks/koalas/namespace.py index d129eee4da..369ba6356e 100644 --- a/databricks/koalas/namespace.py +++ b/databricks/koalas/namespace.py @@ -1515,11 +1515,12 @@ def concat(objs, axis=0, join='outer', ignore_index=False): # TODO: NaN and None difference for missing values. pandas seems filling NaN. sdf = kdf._sdf for idx in columns_to_add: - sdf = sdf.withColumn(str(idx), F.lit(None)) + sdf = sdf.withColumn(name_like_string(idx), F.lit(None)) kdf = DataFrame(kdf._internal.copy( sdf=sdf, - data_columns=kdf._internal.data_columns + [str(idx) for idx in columns_to_add], + data_columns=(kdf._internal.data_columns + + [name_like_string(idx) for idx in columns_to_add]), column_index=kdf._internal.column_index + columns_to_add)) kdfs.append(kdf[merged_columns])