diff --git a/spiceaidocs/docs/data-connectors/databricks.md b/spiceaidocs/docs/data-connectors/databricks.md index c593d9e9f..ccf518755 100644 --- a/spiceaidocs/docs/data-connectors/databricks.md +++ b/spiceaidocs/docs/data-connectors/databricks.md @@ -8,21 +8,71 @@ pagination_prev: null import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -Databricks as a connector for federated SQL query against Databricks using [Spark Connect](https://www.databricks.com/blog/2022/07/07/introducing-spark-connect-the-power-of-apache-spark-everywhere.html) or directly from Delta Tables in S3. +Databricks as a connector for federated SQL query against Databricks using [Spark Connect](https://www.databricks.com/blog/2022/07/07/introducing-spark-connect-the-power-of-apache-spark-everywhere.html) or directly from [Delta Lake](https://delta.io/) tables. ## Configuration -`spice login databricks` can be used to configure the Databricks access token for the Spice runtime. +`spice login databricks` can be used to configure the secrets needed for Databricks. + + + + ```yaml + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: spark_connect + databricks_cluster_id: 1234-567890-abcde123 + ``` + + ```bash + spice login databricks --token + ``` + + + + ```yaml + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: delta_lake + ``` + + ```bash + spice login databricks --token --aws-region --aws-access-key-id --aws-secret-access-key + ``` + + + + ```yaml + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: delta_lake + ``` + + ```bash + spice login databricks --token --azure-storage-account-name --azure-storage-access-key + ``` + + + + ```yaml + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: delta_lake + ``` + + ```bash + spice login databricks --token --google-service-account-path /path/to/service-account.json + ``` + + + ### Parameters - `endpoint`: The endpoint of the Databricks instance. - `mode`: The execution mode for querying against Databricks. The default is `spark_connect`. Possible values: - - `spark_connect`: Use Spark Connect to query against Databricks. - - `s3`: Query directly from Delta Tables in S3. -- `format`: The format of the data to query. The default is `deltalake`. Only valid when `mode` is `s3`. Possible values: - - `deltalake`: Query Delta Tables. -- `databricks-cluster-id`: The ID of the compute cluster in Databricks to use for the query. Only valid when `mode` is `spark_connect`. + - `spark_connect`: Use Spark Connect to query against Databricks. Requires a Spark cluster to be available. + - `delta_lake`: Query directly from Delta Tables. Requires the object store credentials to be provided. +- `databricks_cluster_id`: The ID of the compute cluster in Databricks to use for the query. Only valid when `mode` is `spark_connect`. - `databricks_use_ssl`: If true, use a TLS connection to connect to the Databricks endpoint. Default is `true`. ### Auth @@ -114,9 +164,17 @@ Check [Secrets Stores](/secret-stores) for more details. ```yaml datasets: + # Example for Spark Connect + - from: databricks:spiceai.datasets.my_awesome_table // A reference to a table in the Databricks unity catalog + name: my_delta_lake_table + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: spark_connect + databricks_cluster_id: 1234-567890-abcde123 + # Example for Delta Lake - from: databricks:spiceai.datasets.my_awesome_table // A reference to a table in the Databricks unity catalog name: my_delta_lake_table - params: - endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com - databricks-cluster-id: 1234-567890-abcde123 + params: + endpoint: dbc-a1b2345c-d6e7.cloud.databricks.com + mode: delta_lake ``` diff --git a/spiceaidocs/docs/data-connectors/delta-lake.md b/spiceaidocs/docs/data-connectors/delta-lake.md new file mode 100644 index 000000000..865e6947d --- /dev/null +++ b/spiceaidocs/docs/data-connectors/delta-lake.md @@ -0,0 +1,138 @@ +--- +title: 'Delta Lake Data Connector' +sidebar_label: 'Delta Lake Data Connector' +description: 'Delta Lake Data Connector Documentation' +pagination_prev: null +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Query/accelerate [Delta Lake](https://delta.io/) tables in Spice. + +## Configuration + +`spice login delta_lake` can be used to configure the secrets needed for connecting to Delta Lake tables. + + + + ```bash + spice login delta_lake --aws-region --aws-access-key-id --aws-secret-access-key + ``` + + + + ```bash + spice login delta_lake --azure-storage-account-name --azure-storage-access-key + ``` + + + + ```bash + spice login delta_lake --google-service-account-path /path/to/service-account.json + ``` + + + + +## Example + +```yaml +datasets: + # Example for local Delta Lake + - from: delta_lake:/path/to/local/delta/table // A local filesystem path to a Delta Lake table + name: my_delta_lake_table + # Example for Delta Lake on S3 + - from: delta_lake:s3://my_bucket/path/to/s3/delta/table/ // A reference to a table in S3 + name: my_delta_lake_table + # Example for Delta Lake on Azure Blob + - from: delta_lake:abfss://my_container@my_account.dfs.core.windows.net/path/to/azure/delta/table/ // A reference to a table in Azure Blob + name: my_delta_lake_table +``` + +### Auth + +Object store credentials are required to access non-public Delta Lake tables. + +Check [Secrets Stores](/secret-stores) for more details. + + + + ```bash + spice login delta_lake --aws-region --aws-access-key-id --aws-secret-access-key + ``` + + Learn more about [File Secret Store](/secret-stores/file). + + + + ```bash + SPICE_SECRET_DELTA_LAKE_AWS_REGION= \ + SPICE_SECRET_DELTA_LAKE_AWS_ACCESS_KEY_ID= \ + SPICE_SECRET_DELTA_LAKE_AWS_SECRET_ACCESS_KEY= + ``` + + Learn more about [Env Secret Store](/secret-stores/env). + + + + ```bash + kubectl create secret generic delta_lake \ + --from-literal=aws-region='' + --from-literal=aws-access-key-id='' + --from-literal=aws-secret-access-key='' + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: kubernetes + + # <...> + ``` + + Learn more about [Kubernetes Secret Store](/secret-stores/kubernetes). + + + + Add new keychain entry (macOS), with secrets in JSON string + + ```bash + security add-generic-password -l "Delta Lake Secret" \ + -a spiced -s spice_secret_delta_lake \ + -w $(echo -n '{"aws-region": "", "aws-access-key-id": "", "aws-secret-access-key": ""}') + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: keyring + + # <...> + ``` + + Learn more about [Keyring Secret Store](/secret-stores/keyring). + + +