Skip to content

Commit

Permalink
Prepare Spark 3 (#1181)
Browse files Browse the repository at this point in the history
I use the manual build version of PySpark for development. As of apache/spark@ee8d661, we need some changes accordingly.
  • Loading branch information
HyukjinKwon authored Jan 9, 2020
1 parent 9519023 commit 15a6406
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
6 changes: 5 additions & 1 deletion databricks/koalas/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
from pyspark._globals import _NoValue, _NoValueType
from pyspark.sql import functions as F, Window
from pyspark.sql.functions import PandasUDFType, pandas_udf
from pyspark.sql.types import DataType, StructField, StructType, to_arrow_type, LongType
from pyspark.sql.types import DataType, StructField, StructType, LongType
try:
from pyspark.sql.types import to_arrow_type
except ImportError:
from pyspark.sql.pandas.types import to_arrow_type

from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.
from databricks.koalas.config import get_option
Expand Down
10 changes: 7 additions & 3 deletions databricks/koalas/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
from pyspark.sql import Column
from pyspark.sql.functions import pandas_udf
import pyspark.sql.types as types
try:
from pyspark.sql.types import to_arrow_type, from_arrow_type
except ImportError:
from pyspark.sql.pandas.types import to_arrow_type, from_arrow_type

from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.

Expand Down Expand Up @@ -150,7 +154,7 @@ def spark_type_to_pandas_dtype(spark_type):
if isinstance(spark_type, types.TimestampType):
return np.dtype('datetime64[ns]')
else:
return np.dtype(types.to_arrow_type(spark_type).to_pandas_dtype())
return np.dtype(to_arrow_type(spark_type).to_pandas_dtype())


def as_python_type(spark_tpe):
Expand All @@ -167,11 +171,11 @@ def infer_pd_series_spark_type(s: pd.Series) -> types.DataType:
if dt == np.dtype('object'):
if len(s) == 0 or s.isnull().all():
raise ValueError("can not infer schema from empty or null dataset")
return types.from_arrow_type(pa.Array.from_pandas(s).type)
return from_arrow_type(pa.Array.from_pandas(s).type)
elif is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
return types.TimestampType()
else:
return types.from_arrow_type(pa.from_numpy_dtype(dt))
return from_arrow_type(pa.from_numpy_dtype(dt))


def _make_fun(f: typing.Callable, return_type: types.DataType, *args, **kwargs) -> 'ks.Series':
Expand Down

0 comments on commit 15a6406

Please sign in to comment.