Prepare Spark 3 (#1181)

I use the manual build version of PySpark for development. As of apache/spark@ee8d661, we need some changes accordingly.
databricks · Jan 9, 2020 · 15a6406 · 15a6406
1 parent 9519023
commit 15a6406
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 4 deletions.
diff --git a/databricks/koalas/internal.py b/databricks/koalas/internal.py
@@ -28,7 +28,11 @@
 from pyspark._globals import _NoValue, _NoValueType
 from pyspark.sql import functions as F, Window
 from pyspark.sql.functions import PandasUDFType, pandas_udf
-from pyspark.sql.types import DataType, StructField, StructType, to_arrow_type, LongType
+from pyspark.sql.types import DataType, StructField, StructType, LongType
+try:
+    from pyspark.sql.types import to_arrow_type
+except ImportError:
+    from pyspark.sql.pandas.types import to_arrow_type
 
 from databricks import koalas as ks  # For running doctests and reference resolution in PyCharm.
 from databricks.koalas.config import get_option

diff --git a/databricks/koalas/typedef.py b/databricks/koalas/typedef.py
@@ -29,6 +29,10 @@
 from pyspark.sql import Column
 from pyspark.sql.functions import pandas_udf
 import pyspark.sql.types as types
+try:
+    from pyspark.sql.types import to_arrow_type, from_arrow_type
+except ImportError:
+    from pyspark.sql.pandas.types import to_arrow_type, from_arrow_type
 
 from databricks import koalas as ks  # For running doctests and reference resolution in PyCharm.
 
@@ -150,7 +154,7 @@ def spark_type_to_pandas_dtype(spark_type):
     if isinstance(spark_type, types.TimestampType):
         return np.dtype('datetime64[ns]')
     else:
-        return np.dtype(types.to_arrow_type(spark_type).to_pandas_dtype())
+        return np.dtype(to_arrow_type(spark_type).to_pandas_dtype())
 
 
 def as_python_type(spark_tpe):
@@ -167,11 +171,11 @@ def infer_pd_series_spark_type(s: pd.Series) -> types.DataType:
     if dt == np.dtype('object'):
         if len(s) == 0 or s.isnull().all():
             raise ValueError("can not infer schema from empty or null dataset")
-        return types.from_arrow_type(pa.Array.from_pandas(s).type)
+        return from_arrow_type(pa.Array.from_pandas(s).type)
     elif is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
         return types.TimestampType()
     else:
-        return types.from_arrow_type(pa.from_numpy_dtype(dt))
+        return from_arrow_type(pa.from_numpy_dtype(dt))
 
 
 def _make_fun(f: typing.Callable, return_type: types.DataType, *args, **kwargs) -> 'ks.Series':