databricks · ueshin · Nov 7, 2020 · Nov 6, 2020 · Nov 6, 2020 · Nov 6, 2020
diff --git a/databricks/koalas/generic.py b/databricks/koalas/generic.py
@@ -22,7 +22,7 @@
 from collections.abc import Iterable
 from distutils.version import LooseVersion
 from functools import reduce
-from typing import List, Optional, Tuple, Union, TYPE_CHECKING
+from typing import List, Optional, Tuple, Union, TYPE_CHECKING, cast
 import warnings
 
 import numpy as np  # noqa: F401
@@ -37,6 +37,7 @@
 from databricks.koalas.indexing import AtIndexer, iAtIndexer, iLocIndexer, LocIndexer
 from databricks.koalas.internal import InternalFrame, NATURAL_ORDER_COLUMN_NAME
 from databricks.koalas.spark import functions as SF
+from databricks.koalas.typedef import Scalar
 from databricks.koalas.utils import (
     is_name_like_tuple,
     is_name_like_value,
@@ -70,8 +71,34 @@ def _apply_series_op(self, op, should_resolve: bool = False):
     def _reduce_for_stat_function(self, sfun, name, axis=None, numeric_only=True):
         pass
 
+    @property
+    @abstractmethod
+    def dtypes(self):
+        pass
+
+    @abstractmethod
+    def to_pandas(self):
+        pass
+
+    @property
+    @abstractmethod
+    def index(self):
+        pass
+
+    @abstractmethod
+    def copy(self):
+        pass
+
+    @abstractmethod
+    def _to_internal_pandas(self):
+        pass
+
+    @abstractmethod
+    def head(self, n: int = 5):
+        pass
+
     # TODO: add 'axis' parameter
-    def cummin(self, skipna: bool = True):
+    def cummin(self, skipna: bool = True) -> Union["ks.Series", "ks.DataFrame"]:
         """
         Return cumulative minimum over a DataFrame or Series axis.
 
@@ -133,7 +160,7 @@ def cummin(self, skipna: bool = True):
         )  # type: ignore
 
     # TODO: add 'axis' parameter
-    def cummax(self, skipna: bool = True):
+    def cummax(self, skipna: bool = True) -> Union["ks.Series", "ks.DataFrame"]:
         """
         Return cumulative maximum over a DataFrame or Series axis.
 
@@ -196,7 +223,7 @@ def cummax(self, skipna: bool = True):
         )  # type: ignore
 
     # TODO: add 'axis' parameter
-    def cumsum(self, skipna: bool = True):
+    def cumsum(self, skipna: bool = True) -> Union["ks.Series", "ks.DataFrame"]:
         """
         Return cumulative sum over a DataFrame or Series axis.
 
@@ -261,7 +288,7 @@ def cumsum(self, skipna: bool = True):
     # TODO: add 'axis' parameter
     # TODO: use pandas_udf to support negative values and other options later
     #  other window except unbounded ones is supported as of Spark 3.0.
-    def cumprod(self, skipna: bool = True):
+    def cumprod(self, skipna: bool = True) -> Union["ks.Series", "ks.DataFrame"]:
         """
         Return cumulative product over a DataFrame or Series axis.
 
@@ -332,7 +359,7 @@ def cumprod(self, skipna: bool = True):
     # TODO: Although this has removed pandas >= 1.0.0, but we're keeping this as deprecated
     # since we're using this for `DataFrame.info` internally.
     # We can drop it once our minimal pandas version becomes 1.0.0.
-    def get_dtype_counts(self):
+    def get_dtype_counts(self) -> pd.Series:
         """
         Return counts of unique dtypes in this object.
 
@@ -375,8 +402,8 @@ def get_dtype_counts(self):
         if not isinstance(self.dtypes, Iterable):
             dtypes = [self.dtypes]
         else:
-            dtypes = self.dtypes
-        return pd.Series(dict(Counter([d.name for d in list(dtypes)])))
+            dtypes = list(self.dtypes)
+        return pd.Series(dict(Counter([d.name for d in dtypes])))
 
     def pipe(self, func, *args, **kwargs):
         r"""
@@ -471,7 +498,7 @@ def pipe(self, func, *args, **kwargs):
         else:
             return func(self, *args, **kwargs)
 
-    def to_numpy(self):
+    def to_numpy(self) -> np.ndarray:
         """
         A NumPy ndarray representing the values in this DataFrame or Series.
 
@@ -509,7 +536,7 @@ def to_numpy(self):
         return self.to_pandas().values
 
     @property
-    def values(self):
+    def values(self) -> np.ndarray:
         """
         Return a Numpy representation of the DataFrame or the Series.
 
@@ -586,7 +613,7 @@ def to_csv(
         partition_cols: Optional[Union[str, List[str]]] = None,
         index_col: Optional[Union[str, List[str]]] = None,
         **options
-    ):
+    ) -> Optional[str]:
         r"""
         Write object to a comma-separated values (csv) file.
 
@@ -640,6 +667,10 @@ def to_csv(
             It has higher priority and overwrites all other options.
             This parameter only works when `path` is specified.
 
+        Returns
+        -------
+        str or None
+
         See Also
         --------
         read_csv
@@ -801,6 +832,7 @@ def to_csv(
             charToEscapeQuoteEscaping=escapechar,
         )
         builder.options(**options).format("csv").save(path)
+        return None
 
     def to_json(
         self,
@@ -811,7 +843,7 @@ def to_json(
         partition_cols: Optional[Union[str, List[str]]] = None,
         index_col: Optional[Union[str, List[str]]] = None,
         **options
-    ):
+    ) -> Optional[str]:
         """
         Convert the object to a JSON string.
 
@@ -860,6 +892,10 @@ def to_json(
             It has a higher priority and overwrites all other options.
             This parameter only works when `path` is specified.
 
+        Returns
+        --------
+        str or None
+
         Examples
         --------
         >>> df = ks.DataFrame([['a', 'b'], ['c', 'd']],
@@ -913,6 +949,7 @@ def to_json(
             builder.partitionBy(partition_cols)
         builder._set_opts(compression=compression)
         builder.options(**options).format("json").save(path)
+        return None
 
     def to_excel(
         self,
@@ -932,7 +969,7 @@ def to_excel(
         inf_rep="inf",
         verbose=True,
         freeze_panes=None,
-    ):
+    ) -> None:
         """
         Write object to an Excel sheet.
 
@@ -1046,7 +1083,7 @@ def to_excel(
             kdf._to_internal_pandas(), self.to_excel, f, args
         )
 
-    def mean(self, axis=None, numeric_only=True):
+    def mean(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return the mean of the values.
 
@@ -1091,7 +1128,7 @@ def mean(self, axis=None, numeric_only=True):
             F.mean, name="mean", numeric_only=numeric_only, axis=axis
         )
 
-    def sum(self, axis=None, numeric_only=True):
+    def sum(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return the sum of the values.
 
@@ -1136,7 +1173,7 @@ def sum(self, axis=None, numeric_only=True):
             F.sum, name="sum", numeric_only=numeric_only, axis=axis
         )
 
-    def skew(self, axis=None, numeric_only=True):
+    def skew(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return unbiased skew normalized by N-1.
 
@@ -1174,7 +1211,7 @@ def skew(self, axis=None, numeric_only=True):
             F.skewness, name="skew", numeric_only=numeric_only, axis=axis
         )
 
-    def kurtosis(self, axis=None, numeric_only=True):
+    def kurtosis(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return unbiased kurtosis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).
         Normalized by N-1.
@@ -1215,7 +1252,7 @@ def kurtosis(self, axis=None, numeric_only=True):
 
     kurt = kurtosis
 
-    def min(self, axis=None, numeric_only=None):
+    def min(self, axis=None, numeric_only=None) -> Union[Scalar, "ks.Series"]:
         """
         Return the minimum of the values.
 
@@ -1261,7 +1298,7 @@ def min(self, axis=None, numeric_only=None):
             F.min, name="min", numeric_only=numeric_only, axis=axis
         )
 
-    def max(self, axis=None, numeric_only=None):
+    def max(self, axis=None, numeric_only=None) -> Union[Scalar, "ks.Series"]:
         """
         Return the maximum of the values.
 
@@ -1307,7 +1344,7 @@ def max(self, axis=None, numeric_only=None):
             F.max, name="max", numeric_only=numeric_only, axis=axis
         )
 
-    def std(self, axis=None, numeric_only=True):
+    def std(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return sample standard deviation.
 
@@ -1352,7 +1389,7 @@ def std(self, axis=None, numeric_only=True):
             F.stddev, name="std", numeric_only=numeric_only, axis=axis
         )
 
-    def var(self, axis=None, numeric_only=True):
+    def var(self, axis=None, numeric_only=True) -> Union[Scalar, "ks.Series"]:
         """
         Return unbiased variance.
 
@@ -1425,7 +1462,7 @@ def size(self) -> int:
         else:
             return len(self) * num_columns  # type: ignore
 
-    def abs(self):
+    def abs(self) -> Union["ks.DataFrame", "ks.Series"]:
         """
         Return a Series/DataFrame with absolute numeric value of each element.
 
@@ -1600,13 +1637,17 @@ def groupby(
                 "Constructor expects DataFrame or Series; however, " "got [%s]" % (self,)
             )
 
-    def bool(self):
+    def bool(self) -> bool:
         """
         Return the bool of a single element in the current object.
 
         This must be a boolean scalar value, either True or False. Raise a ValueError if
         the object does not have exactly 1 element, or that element is not boolean
 
+        Returns
+        --------
+        bool
+
         Examples
         --------
         >>> ks.DataFrame({'a': [True]}).bool()
@@ -1838,7 +1879,7 @@ def last_valid_index(self):
 
         return last_valid_idx
 
-    def median(self, axis=None, numeric_only=True, accuracy=10000):
+    def median(self, axis=None, numeric_only=True, accuracy=10000) -> Union[Scalar, "ks.Series"]:
         """
         Return the median of the values for the requested axis.
 
@@ -1934,7 +1975,7 @@ def median(self, axis=None, numeric_only=True, accuracy=10000):
         )
 
     # TODO: 'center', 'win_type', 'on', 'axis' parameter should be implemented.
-    def rolling(self, window, min_periods=None):
+    def rolling(self, window, min_periods=None) -> Rolling:
         """
         Provide rolling transformations.
 
@@ -1963,7 +2004,7 @@ def rolling(self, window, min_periods=None):
 
     # TODO: 'center' and 'axis' parameter should be implemented.
     #   'axis' implementation, refer https://github.com/databricks/koalas/pull/607
-    def expanding(self, min_periods=1):
+    def expanding(self, min_periods=1) -> Expanding:
         """
         Provide expanding transformations.
 
@@ -2034,7 +2075,7 @@ def get(self, key, default=None):
         except (KeyError, ValueError, IndexError):
             return default
 
-    def squeeze(self, axis=None):
+    def squeeze(self, axis=None) -> Union[Scalar, "ks.DataFrame", "ks.Series"]:
         """
         Squeeze 1 dimensional axis objects into scalars.
 
@@ -2158,15 +2199,17 @@ def squeeze(self, axis=None):
                 return self
             else:
                 return series_from_column
-        elif isinstance(self, ks.Series):
+        else:
             # The case of Series is simple.
             # If Series has only a single value, just return it as a scalar.
             # Otherwise, there is no change.
             self_top_two = self.head(2)
             has_single_value = len(self_top_two) == 1
-            return self_top_two[0] if has_single_value else self
+            return cast(Union[Scalar, ks.Series], self_top_two[0] if has_single_value else self)
 
-    def truncate(self, before=None, after=None, axis=None, copy=True):
+    def truncate(
+        self, before=None, after=None, axis=None, copy=True
+    ) -> Union["ks.DataFrame", "ks.Series"]:
         """
         Truncate a Series or DataFrame before and after some index value.
 
@@ -2285,7 +2328,7 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
         if not indexes_increasing and not indexes.is_monotonic_decreasing:
             raise ValueError("truncate requires a sorted index")
         if (before is None) and (after is None):
-            return self.copy() if copy else self
+            return cast(Union[ks.DataFrame, ks.Series], self.copy() if copy else self)
         if (before is not None and after is not None) and before > after:
             raise ValueError("Truncate: %s must be after %s" % (after, before))
 
@@ -2303,9 +2346,9 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
             elif axis == 1:
                 result = self.loc[:, before:after]
 
-        return result.copy() if copy else result
+        return cast(Union[ks.DataFrame, ks.Series], result.copy() if copy else result)
 
-    def to_markdown(self, buf=None, mode=None):
+    def to_markdown(self, buf=None, mode=None) -> str:
         """
         Print Series or DataFrame in Markdown-friendly format.
 
@@ -2366,7 +2409,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None):
         pass
 
     # TODO: add 'downcast' when value parameter exists
-    def bfill(self, axis=None, inplace=False, limit=None):
+    def bfill(self, axis=None, inplace=False, limit=None) -> Union["ks.DataFrame", "ks.Series"]:
         """
         Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`bfill```.
 
@@ -2440,7 +2483,7 @@ def bfill(self, axis=None, inplace=False, limit=None):
     backfill = bfill
 
     # TODO: add 'downcast' when value parameter exists
-    def ffill(self, axis=None, inplace=False, limit=None):
+    def ffill(self, axis=None, inplace=False, limit=None) -> Union["ks.DataFrame", "ks.Series"]:
         """
         Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`ffill```.