From 4e3fb6ad39a3ece7f83290382d409de40b254359 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 18 Jul 2024 12:02:15 -0400 Subject: [PATCH] chore(deps): add iceberg jar to pyspark install for catalog testing --- ibis/backends/pyspark/tests/conftest.py | 10 ++++++++++ poetry-overrides.nix | 17 ++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ibis/backends/pyspark/tests/conftest.py b/ibis/backends/pyspark/tests/conftest.py index 7ffce3ac297e..d4c068a03228 100644 --- a/ibis/backends/pyspark/tests/conftest.py +++ b/ibis/backends/pyspark/tests/conftest.py @@ -168,6 +168,16 @@ def connect(*, tmpdir, worker_id, **kw): .config("spark.sql.streaming.schemaInference", True) ) + config = ( + config.config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + ) + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.local.type", "hadoop") + .config("spark.sql.catalog.local.warehouse", "icehouse") + ) + try: from delta.pip_utils import configure_spark_with_delta_pip except ImportError: diff --git a/poetry-overrides.nix b/poetry-overrides.nix index e2e4d39a8cde..ee5829919c46 100644 --- a/poetry-overrides.nix +++ b/poetry-overrides.nix @@ -1 +1,16 @@ -_final: _prev: { } +final: prev: { + pyspark = prev.pyspark.overridePythonAttrs (attrs: + let + icebergJarUrl = "https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.5.2/iceberg-spark-runtime-3.5_2.12-1.5.2.jar"; + icebergJar = final.pkgs.fetchurl { + name = "iceberg-spark-runtime-3.5_2.12-1.5.2.jar"; + url = icebergJarUrl; + sha256 = "12v1704h0bq3qr2fci0mckg9171lyr8v6983wpa83k06v1w4pv1a"; + }; + in + { + postInstall = attrs.postInstall or "" + '' + cp ${icebergJar} $out/${final.python.sitePackages}/pyspark/jars/${icebergJar.name} + ''; + }); +}