Skip to content

Commit

Permalink
Series.unique (#249)
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon authored and rxin committed May 8, 2019
1 parent 5ce0014 commit b5e08b5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
4 changes: 0 additions & 4 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,10 +1167,6 @@ def count(self):
"""
return self._reduce_for_stat_function(_Frame._count_expr)

def unique(self):
sdf = self._sdf
return DataFrame(spark.DataFrame(sdf._jdf.distinct(), sdf.sql_ctx), self._metadata.copy())

def drop(self, labels=None, axis=1, columns: Union[str, List[str]] = None):
"""
Drop specified labels from columns.
Expand Down
33 changes: 31 additions & 2 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,38 @@ def dropna(self, axis=0, inplace=False, **kwargs):
def head(self, n=5):
return _col(self.to_dataframe().head(n))

# TODO: Categorical type isn't supported (due to PySpark's limitation) and
# some doctests related with timestamps were not added.
def unique(self):
# Pandas wants a series/array-like object
return _col(self.to_dataframe().unique())
"""
Return unique values of Series object.
Uniques are returned in order of appearance. Hash table-based unique,
therefore does NOT sort.
.. note:: This method returns newly creased Series whereas Pandas returns
the unique values as a NumPy array.
Returns
-------
Returns the unique values as a Series.
See Examples section.
Examples
--------
>>> ks.Series([2, 1, 3, 3], name='A').unique()
0 1
1 3
2 2
Name: A, dtype: int64
>>> ks.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
0 2016-01-01
Name: 0, dtype: datetime64[ns]
"""
sdf = self.to_dataframe()._sdf
return _col(DataFrame(sdf.select(self._scol).distinct()))

# TODO: Update Documentation for Bins Parameter when its supported
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
Expand Down

0 comments on commit b5e08b5

Please sign in to comment.