From e04013aed6e344fcffa94c5998029edcd788435b Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Tue, 30 Apr 2024 18:52:45 -0400 Subject: [PATCH 1/2] Spark docs --- spiceaidocs/docs/data-connectors/spark.md | 109 ++++++++++++++++++ .../docs/reference/spicepod/datasets.md | 1 + 2 files changed, 110 insertions(+) create mode 100644 spiceaidocs/docs/data-connectors/spark.md diff --git a/spiceaidocs/docs/data-connectors/spark.md b/spiceaidocs/docs/data-connectors/spark.md new file mode 100644 index 000000000..c28ebcace --- /dev/null +++ b/spiceaidocs/docs/data-connectors/spark.md @@ -0,0 +1,109 @@ +--- +title: 'Apache Spark Connector' +sidebar_label: 'Apache Spark Connector' +description: 'Apache Spark Connector Documentation' +pagination_prev: null +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Apache Spark as a connector for federated SQL query against a Spark Cluster using [Spark Connect](https://spark.apache.org/docs/latest/spark-connect-overview.html) + +## Configuration + +The Apache Spark Connector can be used in two ways: specifying a plaintext connection string using the `spark_remote` parameter or specifying a `spark_remote` secret. The connector will fail if both configurations are set + + +### Parameters +- `spark_remote`: A spark remote connection URI + +### Auth + +If your Spark cluster is configured to only accept authenticated requests, setting `spark_remote` as a dataset param is not acceptable. In this use cases, you should use a secret named `spark` with keys `spark_remote`. + +Check [Secrets Stores](/secret-stores) for more details. + + + + ```bash + spice login spark --spark_remote + ``` + + Learn more about [File Secret Store](/secret-stores/file). + + + ```bash + SPICE_SECRET_SPARK_SPARK_REMOTE= \ + spice run + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: env + + # <...> + ``` + + Learn more about [Env Secret Store](/secret-stores/env). + + + ```bash + kubectl create secret generic spark \ + --from-literal=spark_remote='' + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: kubernetes + + # <...> + ``` + + Learn more about [Kubernetes Secret Store](/secret-stores/kubernetes). + + + Add new keychain entry (macOS), with user and password in JSON string + + ```bash + security add-generic-password -l "Spark Remote" \ + -a spiced -s spice_secret_spark \ + -w $(echo -n '{"spark_remote": "spark"}') + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: keyring + + # <...> + ``` + + Learn more about [Keyring Secret Store](/secret-stores/keyring). + + + +## Example + +```yaml +datasets: + - from: spark:spiceai.datasets.my_awesome_table + name: my_table + params: + spark_remote: sc://localhost + +``` diff --git a/spiceaidocs/docs/reference/spicepod/datasets.md b/spiceaidocs/docs/reference/spicepod/datasets.md index 08f1b6c41..28f8dd8a6 100644 --- a/spiceaidocs/docs/reference/spicepod/datasets.md +++ b/spiceaidocs/docs/reference/spicepod/datasets.md @@ -78,6 +78,7 @@ Where: - [`spiceai`](../../data-connectors/spiceai.md) - [`dremio`](../../data-connectors/dremio.md) + - [`spark`](../../data-connectors/spark.md) - [`databricks`](../../data-connectors/databricks.md) - [`s3`](../../data-connectors/s3.md) - [`postgres`](../../data-connectors/postgres/index.md) From ce1d2e5be8e0cb7ac8f1af78be2e314aef3cb770 Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Wed, 1 May 2024 12:20:42 +0900 Subject: [PATCH 2/2] Apply suggestions from code review --- spiceaidocs/docs/data-connectors/spark.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spiceaidocs/docs/data-connectors/spark.md b/spiceaidocs/docs/data-connectors/spark.md index c28ebcace..2868c4075 100644 --- a/spiceaidocs/docs/data-connectors/spark.md +++ b/spiceaidocs/docs/data-connectors/spark.md @@ -1,7 +1,7 @@ --- title: 'Apache Spark Connector' sidebar_label: 'Apache Spark Connector' -description: 'Apache Spark Connector Documentation' +description: 'Apache Spark Connector Documentation' pagination_prev: null --- @@ -12,15 +12,15 @@ Apache Spark as a connector for federated SQL query against a Spark Cluster usin ## Configuration -The Apache Spark Connector can be used in two ways: specifying a plaintext connection string using the `spark_remote` parameter or specifying a `spark_remote` secret. The connector will fail if both configurations are set +The Apache Spark Connector can be used in two ways: specifying a plaintext connection string using the `spark_remote` parameter or specifying a `spark_remote` secret. The connector will fail if both configurations are set. ### Parameters -- `spark_remote`: A spark remote connection URI +- `spark_remote`: A [spark remote](https://spark.apache.org/docs/latest/spark-connect-overview.html#set-sparkremote-environment-variable) connection URI ### Auth -If your Spark cluster is configured to only accept authenticated requests, setting `spark_remote` as a dataset param is not acceptable. In this use cases, you should use a secret named `spark` with keys `spark_remote`. +Spark clusters configured to accept authenticated requests should not set `spark_remote` as an inline dataset param, as it will contain sensitive data. For this case, use a secret named `spark` with key `spark_remote`. Check [Secrets Stores](/secret-stores) for more details. @@ -103,7 +103,7 @@ Check [Secrets Stores](/secret-stores) for more details. datasets: - from: spark:spiceai.datasets.my_awesome_table name: my_table - params: - spark_remote: sc://localhost + params: + spark_remote: sc://localhost ```