-
Notifications
You must be signed in to change notification settings - Fork 235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dataproc cluster Update Failed #661
Comments
@sbihun Sorry you are experiencing the update failure. This update failure is caused by the
We will continue investigation and update this issue. |
@sbihun Sorry for the delayed response. We discussed the issue with Dataproc team internally, the best workaround we can provide at this moment is for you to:
An example YAML snippet will look like below. We realize specifying all the fields like this is not a good user experience and we are working with Dataproc team to see how this can be optimized in future. softwareConfig:
imageVersion: "2.0.39-debian10"
properties:
"capacity-scheduler:yarn.scheduler.capacity.root.default.ordering-policy": "fair"
"core:fs.gs.block.size": "134217728"
"core:fs.gs.metadata.cache.enable": "false"
"core:hadoop.security.groups.cache.secs": "250"
"core:hadoop.ssl.enabled.protocols": "TLSv1,TLSv1.1,TLSv1.2"
"distcp:mapreduce.map.java.opts": "-Xmx576m"
"distcp:mapreduce.map.memory.mb": "768"
"distcp:mapreduce.reduce.java.opts": "-Xmx576m"
"distcp:mapreduce.reduce.memory.mb": "768"
"hadoop-env:HADOOP_DATANODE_OPTS": "-Xmx512m"
"hdfs:dfs.datanode.address": "0.0.0.0:9866"
"hdfs:dfs.datanode.http.address": "0.0.0.0:9864"
"hdfs:dfs.datanode.https.address": "0.0.0.0:9865"
"hdfs:dfs.datanode.ipc.address": "0.0.0.0:9867"
"hdfs:dfs.namenode.handler.count": "20"
"hdfs:dfs.namenode.http-address": "0.0.0.0:9870"
"hdfs:dfs.namenode.https-address": "0.0.0.0:9871"
"hdfs:dfs.namenode.lifeline.rpc-address": "ljswvrphhucbbxsyifsbuq-m:8050"
"hdfs:dfs.namenode.secondary.http-address": "0.0.0.0:9868"
"hdfs:dfs.namenode.secondary.https-address": "0.0.0.0:9869"
"hdfs:dfs.namenode.service.handler.count": "10"
"hdfs:dfs.namenode.servicerpc-address": "ljswvrphhucbbxsyifsbuq-m:8051"
"hive:hive.fetch.task.conversion": "none"
"mapred-env:HADOOP_JOB_HISTORYSERVER_HEAPSIZE": "2048"
"mapred:mapred.tasktracker.map.tasks.maximum": "2"
"mapred:mapreduce.job.maps": "9"
"mapred:mapreduce.job.reduce.slowstart.completedmaps": "0.95"
"mapred:mapreduce.job.reduces": "3"
"mapred:mapreduce.jobhistory.recovery.store.class": "org.apache.hadoop.mapreduce.v2.hs.HistoryServerLeveldbStateStoreService"
"mapred:mapreduce.map.cpu.vcores": "1"
"mapred:mapreduce.map.java.opts": "-Xmx2621m"
"mapred:mapreduce.map.maxattempts": "10"
"mapred:mapreduce.map.memory.mb": "3277"
"mapred:mapreduce.map.sort.spill.percent": "0.90"
"mapred:mapreduce.reduce.cpu.vcores": "1"
"mapred:mapreduce.reduce.java.opts": "-Xmx2621m"
"mapred:mapreduce.reduce.maxattempts": "10"
"mapred:mapreduce.reduce.memory.mb": "3277"
"mapred:mapreduce.task.io.sort.mb": "256"
"mapred:mapreduce.tasktracker.reduce.tasks.maximum": "5"
"mapred:yarn.app.mapreduce.am.command-opts": "-Xmx2621m"
"mapred:yarn.app.mapreduce.am.resource.cpu-vcores": "1"
"mapred:yarn.app.mapreduce.am.resource.mb": "3277"
"spark-env:SPARK_DAEMON_MEMORY": "2048m"
"spark:spark.driver.maxResultSize": "1024m"
"spark:spark.driver.memory": "2048m"
"spark:spark.executor.cores": "1"
"spark:spark.executor.instances": "2"
"spark:spark.executor.memory": "2893m"
"spark:spark.executorEnv.OPENBLAS_NUM_THREADS": "1"
"spark:spark.extraListeners": "com.google.cloud.spark.performance.DataprocMetricsListener"
"spark:spark.scheduler.mode": "FAIR"
"spark:spark.sql.cbo.enabled": "true"
"spark:spark.stage.maxConsecutiveAttempts": "10"
"spark:spark.task.maxFailures": "10"
"spark:spark.ui.port": "0"
"spark:spark.yarn.am.attemptFailuresValidityInterval": "1h"
"spark:spark.yarn.am.memory": "640m"
"spark:spark.yarn.executor.failuresValidityInterval": "1h"
"yarn-env:YARN_NODEMANAGER_HEAPSIZE": "819"
"yarn-env:YARN_RESOURCEMANAGER_HEAPSIZE": "2048"
"yarn-env:YARN_TIMELINESERVER_HEAPSIZE": "2048"
"yarn:yarn.nodemanager.address": "0.0.0.0:8026"
"yarn:yarn.nodemanager.resource.cpu-vcores": "2"
"yarn:yarn.nodemanager.resource.memory-mb": "6554"
"yarn:yarn.resourcemanager.am.max-attempts": "10"
"yarn:yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs": "86400"
"yarn:yarn.scheduler.maximum-allocation-mb": "6554"
"yarn:yarn.scheduler.minimum-allocation-mb": "1" |
@diviner524 Thanks a lot, it works for me. |
Hello @diviner524 config-connector version is 1.74.0 |
@tpolekhin Sorry that you ran into this same issue again. Could you please try using
Make sure the full list of |
We have introduced a workaround in the underlying declarative resource library implementation and this issue should no longer be seen for Config Connector version 1.96.0+. Note that the fix is only for However it is still required to specify a full version string in field |
Checklist
Bug Description
Status:
Conditions:
Last Transition Time: 2022-05-18T08:51:03Z
Message: Update call failed: error applying desired state: infeasible update: ({true }) would require recreation
Reason: UpdateFailed
Status: False
Type: Ready
Additional Diagnostic Information
I tried to deploy the Dataproc cluster
GKE cluster:
Also, I updated Kubernetes Cluster to the latest versions and Config Connector, but it didn't help me.
Kubernetes Cluster Version
1.20.15-gke.6000
Config Connector Version
1.69.0
Config Connector Mode
cluster mode
Log Output
No response
Steps to reproduce the issue
Every deployed Dataproc cluster got a warning.
YAML snippets
The text was updated successfully, but these errors were encountered: