From aed3ecbba2cfe64e50f96cf78265f144f03088af Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 1 Nov 2019 14:23:16 -0700 Subject: [PATCH] Remove `Series.schema`. --- databricks/koalas/indexing.py | 4 ++-- databricks/koalas/internal.py | 2 +- databricks/koalas/series.py | 17 ++++++----------- docs/source/reference/series.rst | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/databricks/koalas/indexing.py b/databricks/koalas/indexing.py index 8813ad2079..885223a65f 100644 --- a/databricks/koalas/indexing.py +++ b/databricks/koalas/indexing.py @@ -392,7 +392,7 @@ def raiseNotImplemented(description): stop = rows_sel.stop index_column = self._kdf.index.to_series() - index_data_type = index_column.schema[0].dataType + index_data_type = index_column.spark_type cond = [] if start is not None: cond.append(index_column._scol >= F.lit(start).cast(index_data_type)) @@ -414,7 +414,7 @@ def raiseNotImplemented(description): sdf = sdf.where(F.lit(False)) elif len(self._kdf._internal.index_columns) == 1: index_column = self._kdf.index.to_series() - index_data_type = index_column.schema[0].dataType + index_data_type = index_column.spark_type if len(rows_sel) == 1: sdf = sdf.where( index_column._scol == F.lit(rows_sel[0]).cast(index_data_type)) diff --git a/databricks/koalas/internal.py b/databricks/koalas/internal.py index 0701c3388b..cd45965a4c 100644 --- a/databricks/koalas/internal.py +++ b/databricks/koalas/internal.py @@ -536,7 +536,7 @@ def scol_for(self, column_name_or_index: Union[str, Tuple[str, ...]]) -> spark.C def spark_type_for(self, column_name_or_index: Union[str, Tuple[str, ...]]) -> DataType: """ Return DataType for the given column name or index. """ - return self._sdf.schema[self.column_name_for(column_name_or_index)].dataType + return self._sdf.select(self.scol_for(column_name_or_index)).schema[0].dataType @property def sdf(self) -> spark.DataFrame: diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index e0820342e5..dd00f1d64e 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -350,7 +350,7 @@ def dtypes(self): @property def spark_type(self): """ Returns the data type as defined by Spark, as a Spark DataType object.""" - return self.schema.fields[-1].dataType + return self._internal.spark_type_for(self._internal.column_index[0]) plot = CachedAccessor("plot", KoalasSeriesPlotMethods) @@ -798,10 +798,10 @@ def astype(self, dtype) -> 'Series': return self._with_new_scol(self._scol.cast(spark_type)) def getField(self, name): - if not isinstance(self.schema, StructType): - raise AttributeError("Not a struct: {}".format(self.schema)) + if not isinstance(self.spark_type, StructType): + raise AttributeError("Not a struct: {}".format(self.spark_type)) else: - fnames = self.schema.fieldNames() + fnames = self.spark_type.fieldNames() if name not in fnames: raise AttributeError( "Field {} not found, possible values are {}".format(name, ", ".join(fnames))) @@ -811,11 +811,6 @@ def alias(self, name): """An alias for :meth:`Series.rename`.""" return self.rename(name) - @property - def schema(self) -> StructType: - """Return the underlying Spark DataFrame's schema.""" - return self.to_dataframe()._sdf.schema - @property def shape(self): """Return a tuple of the shape of the underlying data.""" @@ -4030,10 +4025,10 @@ def __repr__(self): return pser.to_string(name=self.name, dtype=self.dtype) def __dir__(self): - if not isinstance(self.schema, StructType): + if not isinstance(self.spark_type, StructType): fields = [] else: - fields = [f for f in self.schema.fieldNames() if ' ' not in f] + fields = [f for f in self.spark_type.fieldNames() if ' ' not in f] return super(Series, self).__dir__() + fields def __iter__(self): diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst index 97ee98f6c4..a5f5e37408 100644 --- a/docs/source/reference/series.rst +++ b/docs/source/reference/series.rst @@ -26,7 +26,7 @@ Attributes Series.dtype Series.dtypes Series.name - Series.schema + Series.spark_type Series.shape Series.size Series.empty