Skip to content

Commit

Permalink
Fix more name_like_string. (#950)
Browse files Browse the repository at this point in the history
  • Loading branch information
ueshin authored and HyukjinKwon committed Oct 25, 2019
1 parent c8dcb64 commit 7f53aeb
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 10 deletions.
11 changes: 6 additions & 5 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2358,8 +2358,8 @@ def rename(index):
index_name if index_name is not None else rename(i)))
index_map.remove(info)

new_data_scols = [
self._internal.scol_for(column).alias(str(name)) for column, name in new_index_map]
new_data_scols = [self._internal.scol_for(column).alias(name_like_string(name))
for column, name in new_index_map]

if len(index_map) > 0:
index_scols = [scol_for(self._sdf, column) for column, _ in index_map]
Expand All @@ -2379,7 +2379,8 @@ def rename(index):

internal = self._internal.copy(
sdf=sdf,
data_columns=[str(name) for _, name in new_index_map] + self._internal.data_columns,
data_columns=([name_like_string(name) for _, name in new_index_map]
+ self._internal.data_columns),
index_map=index_map,
column_index=None)

Expand Down Expand Up @@ -2820,7 +2821,7 @@ def duplicated(self, subset=None, keep='first'):
name = self._internal.index_names[0]
else:
name = ('0',)
column = str(name) if len(name) > 1 else name[0]
column = name_like_string(name)

sdf = self._sdf
if column == index_column:
Expand Down Expand Up @@ -3393,7 +3394,7 @@ def _assign(self, kwargs):
adding_column_index = []
for idx, scol in pairs.items():
if idx not in set(i[:len(idx)] for i in self._internal.column_index):
name = str(idx) if len(idx) > 1 else idx[0]
name = name_like_string(idx)
scols.append(scol.alias(name))
adding_data_columns.append(name)
adding_column_index.append(idx)
Expand Down
3 changes: 2 additions & 1 deletion databricks/koalas/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from databricks.koalas.frame import DataFrame
from databricks.koalas.missing.indexes import _MissingPandasLikeIndex, _MissingPandasLikeMultiIndex
from databricks.koalas.series import Series
from databricks.koalas.utils import name_like_string


class Index(IndexOpsMixin):
Expand Down Expand Up @@ -232,7 +233,7 @@ def to_series(self, name: Union[str, Tuple[str, ...]] = None) -> Series:
kdf = self._kdf
scol = self._scol
if name is not None:
scol = scol.alias(str(name))
scol = scol.alias(name_like_string(name))
column_index = [None] if len(kdf._internal.index_map) > 1 else kdf._internal.index_names
return Series(kdf._internal.copy(scol=scol,
column_index=column_index,
Expand Down
5 changes: 3 additions & 2 deletions databricks/koalas/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,14 +727,15 @@ def from_pandas(pdf: pd.DataFrame) -> '_InternalFrame':
for i, name in enumerate(index.names)]
else:
name = index.name
index_map = [(str(name) if name is not None else SPARK_INDEX_NAME_FORMAT(0),
index_map = [(name_like_string(name)
if name is not None else SPARK_INDEX_NAME_FORMAT(0),
name if name is None or isinstance(name, tuple) else (name,))]

index_columns = [index_column for index_column, _ in index_map]

reset_index = pdf.reset_index()
reset_index.columns = index_columns + data_columns
schema = StructType([StructField(str(name), infer_pd_series_spark_type(col),
schema = StructType([StructField(name_like_string(name), infer_pd_series_spark_type(col),
nullable=bool(col.isnull().any()))
for name, col in reset_index.iteritems()])
for name, col in reset_index.iteritems():
Expand Down
5 changes: 3 additions & 2 deletions databricks/koalas/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,11 +1515,12 @@ def concat(objs, axis=0, join='outer', ignore_index=False):
# TODO: NaN and None difference for missing values. pandas seems filling NaN.
sdf = kdf._sdf
for idx in columns_to_add:
sdf = sdf.withColumn(str(idx), F.lit(None))
sdf = sdf.withColumn(name_like_string(idx), F.lit(None))

kdf = DataFrame(kdf._internal.copy(
sdf=sdf,
data_columns=kdf._internal.data_columns + [str(idx) for idx in columns_to_add],
data_columns=(kdf._internal.data_columns
+ [name_like_string(idx) for idx in columns_to_add]),
column_index=kdf._internal.column_index + columns_to_add))

kdfs.append(kdf[merged_columns])
Expand Down

0 comments on commit 7f53aeb

Please sign in to comment.