From bc5f18838f6eeb7f1521176293bd6a8bacdcf966 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 1 Mar 2024 08:28:25 -0600 Subject: [PATCH] feat(pyspark): support connecting without an explicit pyspark session object --- ibis/backends/pyspark/__init__.py | 7 ++++++- ibis/backends/pyspark/tests/test_basic.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index 955bdfab38b5..39c4a015454b 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -130,7 +130,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._cached_dataframes = {} - def do_connect(self, session: SparkSession) -> None: + def do_connect(self, session: SparkSession | None = None) -> None: """Create a PySpark `Backend` for use with Ibis. Parameters @@ -147,6 +147,11 @@ def do_connect(self, session: SparkSession) -> None: """ + if session is None: + from pyspark.sql import SparkSession + + session = SparkSession.builder.getOrCreate() + self._context = session.sparkContext self._session = session diff --git a/ibis/backends/pyspark/tests/test_basic.py b/ibis/backends/pyspark/tests/test_basic.py index ff1631bf28e8..192d06e4f081 100644 --- a/ibis/backends/pyspark/tests/test_basic.py +++ b/ibis/backends/pyspark/tests/test_basic.py @@ -125,3 +125,9 @@ def test_string_literal_backslash_escaping(con): expr = ibis.literal("\\d\\e") result = con.execute(expr) assert result == "\\d\\e" + + +def test_connect_without_explicit_session(): + con = ibis.pyspark.connect() + result = con.sql("SELECT CAST(1 AS BIGINT) as foo").to_pandas() + tm.assert_frame_equal(result, pd.DataFrame({"foo": [1]}))