Databricks-Clusters-Cluster/databricks-clusters-cluster.json

{
    "typeName": "Databricks::Clusters::Cluster",
    "description": "Manage a Databricks Cluster",
    "sourceUrl": "https://github.com/aws-ia/cloudformation-databricks-resource-providers.git",
    "documentationUrl": "https://github.com/aws-ia/cloudformation-databricks-resource-providers",
    "typeConfiguration": {
        "properties": {
            "DatabricksAccess": {
                "$ref": "#/definitions/DatabricksAccess"
            }
        },
        "additionalProperties": false,
        "required": [
            "DatabricksAccess"
        ]
    },
    "definitions": {
        "DatabricksAccess": {
            "description": "Properties needed to access databricks.",
            "type": "object",
            "properties": {
                "DatabricksInstance": {
                    "type": "string",
                    "description": "Domain used to access Databricks"
                },
                "Token": {
                    "type": "string"
                }
            },
            "required": [
                "DatabricksInstance",
                "Token"
            ],
            "additionalProperties": false
        },
        "Autoscale": {
            "description": "Range defining the min and max number of cluster workers",
            "type": "object",
            "properties": {
                "MinWorkers": {
                    "type": "integer",
                    "description": "The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation."
                },
                "MaxWorkers": {
                    "type": "integer",
                    "description": "The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers."
                }
            },
            "required": [
                "MaxWorkers",
                "MinWorkers"
            ],
            "additionalProperties": false
        },
        "SparkConf": {
            "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs. You can also pass in a string of extra JVM options to the driver and the executors via spark.driver.extraJavaOptions and spark.executor.extraJavaOptions respectively. Example Spark confs: {\"spark.speculation\": true, \"spark.streaming.ui.retainedBatches\": 5} or {\"spark.driver.extraJavaOptions\": \"-verbose:gc -XX:+PrintGCDetails\"}",
            "type": "object"
        },
        "CustomTags": {
            "description": "An object containing a set of tags for cluster resources. Databricks tags all cluster resources (such as AWS instances and EBS volumes) with these tags in addition to default_tags.",
            "type": "object"
        },
        "ClusterLogConf": {
            "description": "Path to cluster log.",
            "type": "object",
            "properties": {
                "DbfsStorageInfo": {
                    "description": "DBFS destination. Example: dbfs:/my/path",
                    "type": "string"
                },
                "S3StorageInfo": {
                    "description": "File destination. Example: file:/my/file.sh",
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "DockerImage": {
            "description": "Docker image connection information",
            "type": "object",
            "properties": {
                "Url": {
                    "description": "URL for the Docker image.",
                    "type": "string"
                },
                "BasicAuth": {
                    "$ref": "#/definitions/DockerBasicAuth"
                }
            },
            "additionalProperties": false
        },
        "DockerBasicAuth": {
            "description": "Basic authentication information for Docker repository",
            "type": "object",
            "properties": {
                "Username": {
                    "description": "User name for the Docker repository",
                    "type": "string"
                },
                "Password": {
                    "description": "Password for the Docker repository.",
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "AwsAttributes": {
            "description": "Attributes set during cluster creation related to Amazon Web Services.",
            "type": "object",
            "properties": {
                "FirstOnDemand": {
                    "description": "The first first_on_demand nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, first_on_demand nodes will be placed on on-demand instances and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster.",
                    "type": "integer"
                },
                "Availability": {
                    "description": "Availability type used for all subsequent nodes past the first_on_demand ones. Note: If first_on_demand is zero, this availability type will be used for the entire cluster.",
                    "type": "string",
                    "enum": [
                        "SPOT",
                        "ON_DEMAND",
                        "SPOT_WITH_FALLBACK"
                    ]
                },
                "ZoneId": {
                    "description": "Identifier for the availability zone/datacenter in which the cluster resides. You have three options:\n\nSpecify an availability zone as a string, for example: 'us-west-2a'. The provided availability zone must be in the same region as the Databricks deployment. For example, 'us-west-2a' is not a valid zone ID if the Databricks deployment resides in the 'us-east-1' region.\n\nEnable automatic availability zone selection ('Auto-AZ'), by setting the value 'auto'. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.\n\nDo not specify a value. If not specified, a default zone will be used.",
                    "type": "string"
                },
                "SpotBidPricePercent": {
                    "description": "The max price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new i3.xlarge spot instance, then the max price is half of the price of on-demand i3.xlarge instances. Similarly, if this field is set to 200, the max price is twice the price of on-demand i3.xlarge instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than 10000.",
                    "type": "integer"
                },
                "EbsVolumeType": {
                    "description": "The type of EBS volumes that will be launched with this cluster.",
                    "type": "string",
                    "enum": [
                        "GENERAL_PURPOSE_SSD",
                        "THROUGHPUT_OPTIMIZED_HDD"
                    ]
                },
                "EbsVolumeCount": {
                    "description": "The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes.\n\nIf EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.",
                    "type": "integer"
                },
                "EbsVolumeSize": {
                    "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).",
                    "type": "integer"
                },
                "EbsVolumeIops": {
                    "description": "The number of IOPS per EBS gp3 volume.\n\nThis value must be between 3000 and 16000.\n\nThe value of IOPS and throughput is calculated based on AWS documentation to match the maximum performance of a gp2 volume with the same volume size.\n\nFor more information, see the EBS volume limit calculator.",
                    "type": "integer"
                },
                "EbsVolumeThroughput": {
                    "description": "The throughput per EBS gp3 volume, in MiB per second.\n\nThis value must be between 125 and 1000.",
                    "type": "integer"
                }
            },
            "additionalProperties": false
        },
        "Destination": {
            "type": "object",
            "properties": {
                "Destination": {
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "S3destination": {
            "type": "object",
            "properties": {
                "Destination": {
                    "type": "string"
                },
                "Region": {
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "InitScriptsListItem": {
            "type": "object",
            "properties": {
                "S3": {
                    "$ref": "#/definitions/S3destination"
                }
            },
            "additionalProperties": false
        }
    },
    "properties": {
        "ClusterId": {
            "description": "Canonical identifier for the cluster",
            "type": "string"
        },
        "NumWorkers": {
            "description": "If num_workers, number of worker nodes that this cluster should have. A cluster has one Spark driver and num_workers executors for a total of num_workers + 1 Spark nodes.",
            "type": "integer"
        },
        "Autoscale": {
            "$ref": "#/definitions/Autoscale"
        },
        "ClusterName": {
            "description": "Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.",
            "type": "string"
        },
        "SparkVersion": {
            "description": "The runtime version of the cluster. You can retrieve a list of available runtime versions by using the Runtime versions API call. This field is required.",
            "type": "string"
        },
        "SparkConf": {
            "$ref": "#/definitions/SparkConf"
        },
        "AwsAttributes":{
            "$ref": "#/definitions/AwsAttributes"
        },
        "NodeTypeId": {
            "description": "This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads A list of available node types can be retrieved by using the List node types API call. This field is required.",
            "type": "string"
        },
        "DriverNodeTypeId": {
            "description": "The node type of the Spark driver. This field is optional; if unset, the driver node type will be set as the same value as node_type_id defined above.",
            "type": "string"
        },
        "SshPublicKeys":{
            "description": "SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. Up to 10 keys can be specified.",
            "type": "array",
            "insertionOrder": false,
            "items": {
                "type": "string"
            }
        },
        "CustomTags": {
            "$ref": "#/definitions/CustomTags"
        },
        "InitScripts": {
            "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If cluster_log_conf is specified, init script logs are sent to <destination>/<cluster-ID>/init_scripts.",
            "type": "array",
            "insertionOrder": false,
            "items": {
                "$ref": "#/definitions/InitScriptsListItem"
            }
        },
        "SparkEnvVars": {
            "description": "An object containing a set of optional, user-specified environment variable key-value pairs. Key-value pairs of the form (X,Y) are exported as is (that is, export X='Y') while launching the driver and workers. In order to specify an additional set of SPARK_DAEMON_JAVA_OPTS, we recommend appending them to $SPARK_DAEMON_JAVA_OPTS as shown in the following example. This ensures that all default Databricks managed environmental variables are included as well. Example Spark environment variables: {\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"} or {\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS",
            "type": "object"
        },
        "AutoterminationMinutes": {
            "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable",
            "type": "integer"
        },
        "EnableElasticDisk": {
            "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to Autoscaling local storage for details.",
            "type": "boolean"
        },
        "DriverInstancePoolId": {
            "description": "The optional ID of the instance pool to use for the driver node. You must also specify instance_pool_id. Refer to Instance Pools API 2.0 for details.",
            "type": "string"
        },
        "InstancePoolId": {
            "description": "The optional ID of the instance pool to use for cluster nodes. If driver_instance_pool_id is present, instance_pool_id is used for worker nodes only. Otherwise, it is used for both the driver and worker nodes. Refer to Instance Pools API 2.0 for details.",
            "type": "string"
        },
        "IdempotencyToken": {
            "description": "An optional token that can be used to guarantee the idempotency of cluster creation requests. If the idempotency token is assigned to a cluster that is not in the TERMINATED state, the request does not create a new cluster but instead returns the ID of the existing cluster. Otherwise, a new cluster is created. The idempotency token is cleared when the cluster is terminated\n\nIf you specify the idempotency token, upon failure you can retry until the request succeeds. Databricks guarantees that exactly one cluster will be launched with that idempotency token.\n\nThis token should have at most 64 characters.",
            "type": "string"
        },
        "ApplyPolicyDefaultValues": {
            "description": "Whether to use policy default values for missing cluster attributes.\n\n",
            "type": "boolean"
        },
        "EnableLocalDiskEncryption": {
            "description": "Whether encryption of disks locally attached to the cluster is enabled.\n\n",
            "type": "boolean"
        },
        "RuntimeEngine": {
            "description": "The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:\n\nPHOTON: Use the Photon runtime engine type.\n\nSTANDARD: Use the standard runtime engine type.\n\nThis field is optional.",
            "type": "string"
        },
        "State": {
            "description": "The state of the cluster.",
            "type": "string"
        },
        "StateMessage": {
            "description": "The state of the cluster.",
            "type": "string"
        },
        "LastStateLossTime": {
            "type": "integer"
        },
        "LastActivityTime":{
            "type": "integer"
        },
        "LastRestartedTime": {
            "type": "integer"
        },
        "ClusterMemoryMb": {
            "type": "integer"
        },
        "ClusterCores": {
            "type": "integer"
        },
        "InstanceSource": {
            "type": "object",
            "properties": {
                "InstancePoolId": {
                    "type": "string"
                },
                "NodeTypeId": {
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "Driver": {
            "type": "object"
        },
        "DriverInstanceSource": {
            "type": "object",
            "properties": {
                "InstancePoolId": {
                    "type": "string"
                },
                "NodeTypeId": {
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "ClusterSource": {
            "type": "string"
        },
        "CreatorUserName": {
            "type": "string"
        },
        "DefaultTags": {
            "type": "object",
            "properties": {
                "ClusterId": {
                    "type": "string"
                },
                "ClusterName": {
                    "type": "string"
                },
                "Creator": {
                    "type": "string"
                },
                "Vendor": {
                    "type": "string"
                }
            },
            "additionalProperties": false
        },
        "EffectiveSparkVersion": {
            "type": "string"
        },
        "StartTime": {
            "type": "number"
        },
        "InitScriptsSafeMode": {
            "type": "boolean"
        }
    },
    "additionalProperties": false,
    "oneOf": [
        {"required": ["NumWorkers"]},
        {"required": ["Autoscale"]}
    ],
    "readOnlyProperties": [
        "/properties/ClusterId",
        "/properties/State",
        "/properties/StateMessage",
        "/properties/LastStateLossTime",
        "/properties/LastActivityTime",
        "/properties/LastRestartedTime",
        "/properties/InitScriptsSafeMode"
    ],
    "writeOnlyProperties": [
        "/properties/ApplyPolicyDefaultValues",
        "/properties/InitScripts",
        "/properties/SshPublicKeys",
        "/properties/SparkConf",
        "/properties/CustomTags",
        "/properties/IdempotencyToken",
        "/properties/RuntimeEngine",
        "/properties/SparkEnvVars"
    ],
    "primaryIdentifier": [
        "/properties/ClusterId"
    ],
    "tagging": {
        "taggable": false
    },
    "handlers": {
        "create": {
            "permissions": []
        },
        "read": {
            "permissions": []
        },
        "update": {
            "permissions": []
        },
        "delete": {
            "permissions": []
        },
        "list": {
            "permissions": []
        }
    }
}