Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support __iter__() for DataFrame and explicitly disable for Series and Index. #836

Merged
merged 3 commits into from
Sep 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,9 @@ def calculate_columns_axis(*cols):

# Arithmetic Operators
def _map_series_op(self, op, other):
if not isinstance(other, DataFrame) and is_sequence(other):
from databricks.koalas.base import IndexOpsMixin
if not isinstance(other, DataFrame) and (isinstance(other, IndexOpsMixin) or
is_sequence(other)):
raise ValueError(
"%s with a sequence is currently not supported; "
"however, got %s." % (op, type(other)))
Expand Down Expand Up @@ -7049,6 +7051,9 @@ def __dir__(self):
fields = [f for f in self._sdf.schema.fieldNames() if ' ' not in f]
return super(DataFrame, self).__dir__() + fields

def __iter__(self):
return iter(self.columns)

@classmethod
def _validate_axis(cls, axis=0):
if axis not in (0, 1, 'index', 'columns', None):
Expand Down
6 changes: 6 additions & 0 deletions databricks/koalas/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ def __repr__(self):
return repr_string + footer
return repr_string

def __iter__(self):
return _MissingPandasLikeIndex.__iter__(self)


class MultiIndex(Index):
"""
Expand Down Expand Up @@ -445,3 +448,6 @@ def __repr__(self):
footer = '\nShowing only the first {}'.format(max_display_count)
return repr_string + footer
return repr_string

def __iter__(self):
return _MissingPandasLikeMultiIndex.__iter__(self)
4 changes: 4 additions & 0 deletions databricks/koalas/missing/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@
tolist = lambda f: f(
'tolist',
reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.")

__iter__ = lambda f: f(
'__iter__',
reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.")
2 changes: 2 additions & 0 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ class _MissingPandasLikeIndex(object):
memory_usage = common.memory_usage(unsupported_function)
to_list = common.to_list(unsupported_function)
tolist = common.tolist(unsupported_function)
__iter__ = common.__iter__(unsupported_function)


class _MissingPandasLikeMultiIndex(object):
Expand Down Expand Up @@ -234,6 +235,7 @@ class _MissingPandasLikeMultiIndex(object):
# Functions we won't support.
values = common.values(unsupported_property)
array = common.array(unsupported_property)
__iter__ = common.__iter__(unsupported_function)

# Properties we won't support.
memory_usage = common.memory_usage(unsupported_function)
Expand Down
1 change: 1 addition & 0 deletions databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ class _MissingPandasLikeSeries(object):
memory_usage = common.memory_usage(unsupported_function)
to_pickle = common.to_pickle(unsupported_function)
to_xarray = common.to_xarray(unsupported_function)
__iter__ = common.__iter__(unsupported_function)
4 changes: 3 additions & 1 deletion databricks/koalas/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
DoubleType, BooleanType, TimestampType, DecimalType, StringType, DateType, StructType

from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.
from databricks.koalas.base import IndexOpsMixin
from databricks.koalas.utils import default_session
from databricks.koalas.frame import DataFrame, _reduce_spark_multi
from databricks.koalas.internal import _InternalFrame, IndexMap
Expand Down Expand Up @@ -1370,7 +1371,8 @@ def concat(objs, axis=0, join='outer', ignore_index=False):
0 c 3
1 d 4
"""
if not isinstance(objs, Iterable): # TODO: support dict
if isinstance(objs, (DataFrame, IndexOpsMixin)) or \
not isinstance(objs, Iterable): # TODO: support dict
raise TypeError('first argument must be an iterable of koalas '
'objects, you passed an object of type '
'"{name}"'.format(name=type(objs).__name__))
Expand Down
5 changes: 4 additions & 1 deletion databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,7 +3076,7 @@ def __len__(self):
return len(self.to_dataframe())

def __getitem__(self, key):
return Series(self._scol.__getitem__(key), anchor=self._kdf, index=self._index_map)
return self._with_new_scol(self._scol.__getitem__(key))

def __getattr__(self, item: str_type) -> Any:
if item.startswith("__") or item.startswith("_pandas_") or item.startswith("_spark_"):
Expand Down Expand Up @@ -3127,6 +3127,9 @@ def __dir__(self):
fields = [f for f in self.schema.fieldNames() if ' ' not in f]
return super(Series, self).__dir__() + fields

def __iter__(self):
return _MissingPandasLikeSeries.__iter__(self)

def _pandas_orig_repr(self):
# TODO: figure out how to reuse the original one.
return 'Column<%s>' % self._scol._jc.toString().encode('utf8')
Expand Down