-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdatabricks-clusters-cluster.json
410 lines (410 loc) · 19.9 KB
/
databricks-clusters-cluster.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
{
"typeName": "Databricks::Clusters::Cluster",
"description": "Manage a Databricks Cluster",
"sourceUrl": "https://github.com/aws-ia/cloudformation-databricks-resource-providers.git",
"documentationUrl": "https://github.com/aws-ia/cloudformation-databricks-resource-providers",
"typeConfiguration": {
"properties": {
"DatabricksAccess": {
"$ref": "#/definitions/DatabricksAccess"
}
},
"additionalProperties": false,
"required": [
"DatabricksAccess"
]
},
"definitions": {
"DatabricksAccess": {
"description": "Properties needed to access databricks.",
"type": "object",
"properties": {
"DatabricksInstance": {
"type": "string",
"description": "Domain used to access Databricks"
},
"Token": {
"type": "string"
}
},
"required": [
"DatabricksInstance",
"Token"
],
"additionalProperties": false
},
"Autoscale": {
"description": "Range defining the min and max number of cluster workers",
"type": "object",
"properties": {
"MinWorkers": {
"type": "integer",
"description": "The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation."
},
"MaxWorkers": {
"type": "integer",
"description": "The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers."
}
},
"required": [
"MaxWorkers",
"MinWorkers"
],
"additionalProperties": false
},
"SparkConf": {
"description": "An object containing a set of optional, user-specified Spark configuration key-value pairs. You can also pass in a string of extra JVM options to the driver and the executors via spark.driver.extraJavaOptions and spark.executor.extraJavaOptions respectively. Example Spark confs: {\"spark.speculation\": true, \"spark.streaming.ui.retainedBatches\": 5} or {\"spark.driver.extraJavaOptions\": \"-verbose:gc -XX:+PrintGCDetails\"}",
"type": "object"
},
"CustomTags": {
"description": "An object containing a set of tags for cluster resources. Databricks tags all cluster resources (such as AWS instances and EBS volumes) with these tags in addition to default_tags.",
"type": "object"
},
"ClusterLogConf": {
"description": "Path to cluster log.",
"type": "object",
"properties": {
"DbfsStorageInfo": {
"description": "DBFS destination. Example: dbfs:/my/path",
"type": "string"
},
"S3StorageInfo": {
"description": "File destination. Example: file:/my/file.sh",
"type": "string"
}
},
"additionalProperties": false
},
"DockerImage": {
"description": "Docker image connection information",
"type": "object",
"properties": {
"Url": {
"description": "URL for the Docker image.",
"type": "string"
},
"BasicAuth": {
"$ref": "#/definitions/DockerBasicAuth"
}
},
"additionalProperties": false
},
"DockerBasicAuth": {
"description": "Basic authentication information for Docker repository",
"type": "object",
"properties": {
"Username": {
"description": "User name for the Docker repository",
"type": "string"
},
"Password": {
"description": "Password for the Docker repository.",
"type": "string"
}
},
"additionalProperties": false
},
"AwsAttributes": {
"description": "Attributes set during cluster creation related to Amazon Web Services.",
"type": "object",
"properties": {
"FirstOnDemand": {
"description": "The first first_on_demand nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, first_on_demand nodes will be placed on on-demand instances and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster.",
"type": "integer"
},
"Availability": {
"description": "Availability type used for all subsequent nodes past the first_on_demand ones. Note: If first_on_demand is zero, this availability type will be used for the entire cluster.",
"type": "string",
"enum": [
"SPOT",
"ON_DEMAND",
"SPOT_WITH_FALLBACK"
]
},
"ZoneId": {
"description": "Identifier for the availability zone/datacenter in which the cluster resides. You have three options:\n\nSpecify an availability zone as a string, for example: 'us-west-2a'. The provided availability zone must be in the same region as the Databricks deployment. For example, 'us-west-2a' is not a valid zone ID if the Databricks deployment resides in the 'us-east-1' region.\n\nEnable automatic availability zone selection ('Auto-AZ'), by setting the value 'auto'. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.\n\nDo not specify a value. If not specified, a default zone will be used.",
"type": "string"
},
"SpotBidPricePercent": {
"description": "The max price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new i3.xlarge spot instance, then the max price is half of the price of on-demand i3.xlarge instances. Similarly, if this field is set to 200, the max price is twice the price of on-demand i3.xlarge instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than 10000.",
"type": "integer"
},
"EbsVolumeType": {
"description": "The type of EBS volumes that will be launched with this cluster.",
"type": "string",
"enum": [
"GENERAL_PURPOSE_SSD",
"THROUGHPUT_OPTIMIZED_HDD"
]
},
"EbsVolumeCount": {
"description": "The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes.\n\nIf EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.",
"type": "integer"
},
"EbsVolumeSize": {
"description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).",
"type": "integer"
},
"EbsVolumeIops": {
"description": "The number of IOPS per EBS gp3 volume.\n\nThis value must be between 3000 and 16000.\n\nThe value of IOPS and throughput is calculated based on AWS documentation to match the maximum performance of a gp2 volume with the same volume size.\n\nFor more information, see the EBS volume limit calculator.",
"type": "integer"
},
"EbsVolumeThroughput": {
"description": "The throughput per EBS gp3 volume, in MiB per second.\n\nThis value must be between 125 and 1000.",
"type": "integer"
}
},
"additionalProperties": false
},
"Destination": {
"type": "object",
"properties": {
"Destination": {
"type": "string"
}
},
"additionalProperties": false
},
"S3destination": {
"type": "object",
"properties": {
"Destination": {
"type": "string"
},
"Region": {
"type": "string"
}
},
"additionalProperties": false
},
"InitScriptsListItem": {
"type": "object",
"properties": {
"S3": {
"$ref": "#/definitions/S3destination"
}
},
"additionalProperties": false
}
},
"properties": {
"ClusterId": {
"description": "Canonical identifier for the cluster",
"type": "string"
},
"NumWorkers": {
"description": "If num_workers, number of worker nodes that this cluster should have. A cluster has one Spark driver and num_workers executors for a total of num_workers + 1 Spark nodes.",
"type": "integer"
},
"Autoscale": {
"$ref": "#/definitions/Autoscale"
},
"ClusterName": {
"description": "Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.",
"type": "string"
},
"SparkVersion": {
"description": "The runtime version of the cluster. You can retrieve a list of available runtime versions by using the Runtime versions API call. This field is required.",
"type": "string"
},
"SparkConf": {
"$ref": "#/definitions/SparkConf"
},
"AwsAttributes":{
"$ref": "#/definitions/AwsAttributes"
},
"NodeTypeId": {
"description": "This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads A list of available node types can be retrieved by using the List node types API call. This field is required.",
"type": "string"
},
"DriverNodeTypeId": {
"description": "The node type of the Spark driver. This field is optional; if unset, the driver node type will be set as the same value as node_type_id defined above.",
"type": "string"
},
"SshPublicKeys":{
"description": "SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. Up to 10 keys can be specified.",
"type": "array",
"insertionOrder": false,
"items": {
"type": "string"
}
},
"CustomTags": {
"$ref": "#/definitions/CustomTags"
},
"InitScripts": {
"description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If cluster_log_conf is specified, init script logs are sent to <destination>/<cluster-ID>/init_scripts.",
"type": "array",
"insertionOrder": false,
"items": {
"$ref": "#/definitions/InitScriptsListItem"
}
},
"SparkEnvVars": {
"description": "An object containing a set of optional, user-specified environment variable key-value pairs. Key-value pairs of the form (X,Y) are exported as is (that is, export X='Y') while launching the driver and workers. In order to specify an additional set of SPARK_DAEMON_JAVA_OPTS, we recommend appending them to $SPARK_DAEMON_JAVA_OPTS as shown in the following example. This ensures that all default Databricks managed environmental variables are included as well. Example Spark environment variables: {\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"} or {\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS",
"type": "object"
},
"AutoterminationMinutes": {
"description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable",
"type": "integer"
},
"EnableElasticDisk": {
"description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to Autoscaling local storage for details.",
"type": "boolean"
},
"DriverInstancePoolId": {
"description": "The optional ID of the instance pool to use for the driver node. You must also specify instance_pool_id. Refer to Instance Pools API 2.0 for details.",
"type": "string"
},
"InstancePoolId": {
"description": "The optional ID of the instance pool to use for cluster nodes. If driver_instance_pool_id is present, instance_pool_id is used for worker nodes only. Otherwise, it is used for both the driver and worker nodes. Refer to Instance Pools API 2.0 for details.",
"type": "string"
},
"IdempotencyToken": {
"description": "An optional token that can be used to guarantee the idempotency of cluster creation requests. If the idempotency token is assigned to a cluster that is not in the TERMINATED state, the request does not create a new cluster but instead returns the ID of the existing cluster. Otherwise, a new cluster is created. The idempotency token is cleared when the cluster is terminated\n\nIf you specify the idempotency token, upon failure you can retry until the request succeeds. Databricks guarantees that exactly one cluster will be launched with that idempotency token.\n\nThis token should have at most 64 characters.",
"type": "string"
},
"ApplyPolicyDefaultValues": {
"description": "Whether to use policy default values for missing cluster attributes.\n\n",
"type": "boolean"
},
"EnableLocalDiskEncryption": {
"description": "Whether encryption of disks locally attached to the cluster is enabled.\n\n",
"type": "boolean"
},
"RuntimeEngine": {
"description": "The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:\n\nPHOTON: Use the Photon runtime engine type.\n\nSTANDARD: Use the standard runtime engine type.\n\nThis field is optional.",
"type": "string"
},
"State": {
"description": "The state of the cluster.",
"type": "string"
},
"StateMessage": {
"description": "The state of the cluster.",
"type": "string"
},
"LastStateLossTime": {
"type": "integer"
},
"LastActivityTime":{
"type": "integer"
},
"LastRestartedTime": {
"type": "integer"
},
"ClusterMemoryMb": {
"type": "integer"
},
"ClusterCores": {
"type": "integer"
},
"InstanceSource": {
"type": "object",
"properties": {
"InstancePoolId": {
"type": "string"
},
"NodeTypeId": {
"type": "string"
}
},
"additionalProperties": false
},
"Driver": {
"type": "object"
},
"DriverInstanceSource": {
"type": "object",
"properties": {
"InstancePoolId": {
"type": "string"
},
"NodeTypeId": {
"type": "string"
}
},
"additionalProperties": false
},
"ClusterSource": {
"type": "string"
},
"CreatorUserName": {
"type": "string"
},
"DefaultTags": {
"type": "object",
"properties": {
"ClusterId": {
"type": "string"
},
"ClusterName": {
"type": "string"
},
"Creator": {
"type": "string"
},
"Vendor": {
"type": "string"
}
},
"additionalProperties": false
},
"EffectiveSparkVersion": {
"type": "string"
},
"StartTime": {
"type": "number"
},
"InitScriptsSafeMode": {
"type": "boolean"
}
},
"additionalProperties": false,
"oneOf": [
{"required": ["NumWorkers"]},
{"required": ["Autoscale"]}
],
"readOnlyProperties": [
"/properties/ClusterId",
"/properties/State",
"/properties/StateMessage",
"/properties/LastStateLossTime",
"/properties/LastActivityTime",
"/properties/LastRestartedTime",
"/properties/InitScriptsSafeMode"
],
"writeOnlyProperties": [
"/properties/ApplyPolicyDefaultValues",
"/properties/InitScripts",
"/properties/SshPublicKeys",
"/properties/SparkConf",
"/properties/CustomTags",
"/properties/IdempotencyToken",
"/properties/RuntimeEngine",
"/properties/SparkEnvVars"
],
"primaryIdentifier": [
"/properties/ClusterId"
],
"tagging": {
"taggable": false
},
"handlers": {
"create": {
"permissions": []
},
"read": {
"permissions": []
},
"update": {
"permissions": []
},
"delete": {
"permissions": []
},
"list": {
"permissions": []
}
}
}