From ffe23da36813d26c6a20a5db5ecf8c3e326d1858 Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Wed, 1 Feb 2023 13:04:45 +0800 Subject: [PATCH 1/8] [fix-13476][Api] Improve DS load hdfs configuration automatically by using environment variable HADOOP_CONF_DIR This closes issue #13476 --- .../storage/hdfs/HdfsStorageOperator.java | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index f38565af5d10..4271a77aeaae 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -132,19 +132,24 @@ private void initHdfsPath() { private void init() throws NullPointerException { try { configuration = new HdfsConfiguration(); - - String hdfsUser = hdfsProperties.getUser(); + String hadoopConfDir = System.getenv("HADOOP_CONF_DIR"); + if(StringUtils.isNotBlank(hadoopConfDir)){ + configuration.addResource(new Path(hadoopConfDir,"hdfs-site.xml")); + configuration.addResource(new Path(hadoopConfDir,"core-site.xml")); + } + boolean isKerberosAuthen = false; if (CommonUtils.loadKerberosConf(configuration)) { - hdfsUser = ""; + isKerberosAuthen = true; } String defaultFS = getDefaultFS(); // first get key from core-site.xml hdfs-site.xml ,if null ,then try to get from properties file // the default is the local file system if (StringUtils.isNotBlank(defaultFS)) { - Map fsRelatedProps = PropertyUtils.getPrefixedProperties("fs."); configuration.set(Constants.HDFS_DEFAULT_FS, defaultFS); - fsRelatedProps.forEach((key, value) -> configuration.set(key, value)); + // todo : question - Is this convenient for users to configure HDFS? And Whether it is convenient enough to configure environment variables `HADOOP_CONF_DIR` + Map fsRelatedProps = PropertyUtils.getPrefixedProperties("resource.hdfs.fs."); + fsRelatedProps.forEach((key, value) -> configuration.set(key.substring(14), value)); } else { logger.error("property:{} can not to be empty, please set!", Constants.FS_DEFAULT_FS); throw new NullPointerException( @@ -152,19 +157,22 @@ private void init() throws NullPointerException { } if (!defaultFS.startsWith("file")) { - logger.info("get property:{} -> {}, from core-site.xml hdfs-site.xml ", Constants.FS_DEFAULT_FS, + logger.info("get property:{} -> {}, will use HDFS storage", Constants.FS_DEFAULT_FS, defaultFS); } - if (StringUtils.isNotEmpty(hdfsUser)) { + String hdfsUser = hdfsProperties.getUser(); + if (!isKerberosAuthen && StringUtils.isNotEmpty(hdfsUser)) { UserGroupInformation ugi = UserGroupInformation.createRemoteUser(hdfsUser); ugi.doAs((PrivilegedExceptionAction) () -> { fs = FileSystem.get(configuration); return true; }); } else { - logger.warn("resource.hdfs.root.user is not set value!"); fs = FileSystem.get(configuration); + if(!isKerberosAuthen){ + logger.warn("resource.hdfs.root.user is not set value!"); + } } } catch (Exception e) { @@ -184,7 +192,8 @@ public Configuration getConfiguration() { */ public String getDefaultFS() { String defaultFS = getConfiguration().get(Constants.FS_DEFAULT_FS); - if (StringUtils.isBlank(defaultFS)) { + if (StringUtils.isBlank(defaultFS) || defaultFS.equals("file:///")) { + logger.info("the property [{}] value from core-site.xml is [{}] . It is not the expected value, will use the config value in common.properties.", Constants.HDFS_DEFAULT_FS, defaultFS); defaultFS = hdfsProperties.getDefaultFS(); } return defaultFS; From 9a655d11cdb0316461e3e6e67fe2b186657691ac Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Wed, 1 Feb 2023 14:08:07 +0800 Subject: [PATCH 2/8] [fix-13476][Api] fix HdfsStorageOperator NullPointerException bug This closes issue #13476 --- .../plugin/storage/hdfs/HdfsStorageOperator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index 4271a77aeaae..3a7f782aa538 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -87,7 +87,7 @@ public class HdfsStorageOperator implements Closeable, StorageOperate { @Override public HdfsStorageOperator load(String key) throws Exception { - return new HdfsStorageOperator(hdfsProperties); + return new HdfsStorageOperator(); } }); From a055906e627eb6384e1a6cf2f3694d052d5a2e59 Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Thu, 2 Feb 2023 13:02:00 +0800 Subject: [PATCH 3/8] [fix-13476][Api] fix the code style & add unit test --- .../storage/hdfs/HdfsStorageOperator.java | 47 +++++++++++++--- .../storage/hdfs/HdfsStorageOperatorTest.java | 53 +++++++++++++++++++ script/env/dolphinscheduler_env.sh | 5 +- 3 files changed, 97 insertions(+), 8 deletions(-) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index 3a7f782aa538..36fb2893f631 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -53,6 +53,7 @@ import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Paths; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; @@ -79,6 +80,11 @@ public class HdfsStorageOperator implements Closeable, StorageOperate { private static final Logger logger = LoggerFactory.getLogger(HdfsStorageOperator.class); private static HdfsStorageProperties hdfsProperties; private static final String HADOOP_UTILS_KEY = "HADOOP_UTILS_KEY"; + public static final String HADOOP_HOME = "HADOOP_HOME"; + public static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR"; + public static final String HDFS_SITE_XML = "hdfs-site.xml"; + public static final String CORE_SITE_XML = "core-site.xml"; + private static final LoadingCache cache = CacheBuilder .newBuilder() @@ -132,11 +138,7 @@ private void initHdfsPath() { private void init() throws NullPointerException { try { configuration = new HdfsConfiguration(); - String hadoopConfDir = System.getenv("HADOOP_CONF_DIR"); - if(StringUtils.isNotBlank(hadoopConfDir)){ - configuration.addResource(new Path(hadoopConfDir,"hdfs-site.xml")); - configuration.addResource(new Path(hadoopConfDir,"core-site.xml")); - } + loadHdfsConfigurationFromEnv(configuration); boolean isKerberosAuthen = false; if (CommonUtils.loadKerberosConf(configuration)) { isKerberosAuthen = true; @@ -170,7 +172,7 @@ private void init() throws NullPointerException { }); } else { fs = FileSystem.get(configuration); - if(!isKerberosAuthen){ + if (!isKerberosAuthen) { logger.warn("resource.hdfs.root.user is not set value!"); } } @@ -180,6 +182,35 @@ private void init() throws NullPointerException { } } + /** + * auto load hdfs configuration from os env + * @param configuration + */ + private void loadHdfsConfigurationFromEnv(Configuration configuration) { + String hadoopConfDirEnv = System.getenv(HADOOP_CONF_DIR); + String hadoopHomeEnv = System.getenv(HADOOP_HOME); + String hadoopConfPath = getHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv); + if (StringUtils.isNotBlank(hadoopConfPath)) { + configuration.addResource(new Path(hadoopConfPath, HDFS_SITE_XML)); + configuration.addResource(new Path(hadoopConfPath, CORE_SITE_XML)); + } + } + + private String getHadoopConfPath(String hadoopConfDirEnv, String hadoopHomeEnv) { + String hadoopConfPath = null; + if (StringUtils.isBlank(hadoopConfDirEnv) || !Files.exists(java.nio.file.Paths.get(hadoopConfDirEnv))) { + if (StringUtils.isNotBlank(hadoopHomeEnv)) { + java.nio.file.Path confPath = Paths.get(hadoopHomeEnv, "conf"); + if (Files.exists(confPath)) { + hadoopConfPath = confPath.toString(); + } + } + } else { + hadoopConfPath = hadoopConfDirEnv; + } + return hadoopConfPath; + } + /** * @return Configuration */ @@ -193,7 +224,9 @@ public Configuration getConfiguration() { public String getDefaultFS() { String defaultFS = getConfiguration().get(Constants.FS_DEFAULT_FS); if (StringUtils.isBlank(defaultFS) || defaultFS.equals("file:///")) { - logger.info("the property [{}] value from core-site.xml is [{}] . It is not the expected value, will use the config value in common.properties.", Constants.HDFS_DEFAULT_FS, defaultFS); + logger.info( + "the property [{}] value from core-site.xml is [{}] . It is not the expected value, will use the config value in common.properties.", + Constants.HDFS_DEFAULT_FS, defaultFS); defaultFS = hdfsProperties.getDefaultFS(); } return defaultFS; diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java index 63931cba787c..f98555fed9bf 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java @@ -20,7 +20,15 @@ import org.apache.dolphinscheduler.common.utils.HttpUtils; import org.apache.dolphinscheduler.spi.enums.ResourceType; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.MockedStatic; @@ -76,4 +84,49 @@ public void getAppAddress() { } } + @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_CONF_DIR, and directory exist") + @Test + public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + String hadoopConfDirEnv = System.getProperty("user.dir"); + String hadoopHomeEnv = "/not_expected"; + Assertions.assertEquals(hadoopConfDirEnv, invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); + } + + @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_CONF_DIR, but directory not exist") + @Test + public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + String hadoopConfDirEnv = "/not_exist"; + String hadoopHomeEnv = null; + Assertions.assertNull(invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); + } + + @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_HOME, and directory exist") + @Test + public void testGetHadoopConfPathFromEnvByHADOOP_HOME1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException, IOException { + String hadoopConfDirEnv = null; + String hadoopHomeEnv = System.getProperty("user.dir"); + Path hoemConfPath = Paths.get(hadoopHomeEnv, "conf"); + Files.createDirectory(hoemConfPath); + Assertions.assertEquals(hoemConfPath.toString(), + invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); + Files.delete(hoemConfPath); + } + + @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_HOME, and directory not exist") + @Test + public void testGetHadoopConfPathFromEnvByHADOOP_HOME2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + String hadoopConfDirEnv = null; + String hadoopHomeEnv = "/not_exist"; + Assertions.assertNull(invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); + } + + private String invokeGetHadoopConfPath(String hadoopConfDirEnv, + String hadoopHomeEnv) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + Method method = + HdfsStorageOperator.class.getDeclaredMethod("getHadoopConfPath", String.class, String.class); + method.setAccessible(true); + HdfsStorageOperator hdfsStorageOperator = Mockito.mock(HdfsStorageOperator.class); + return (String) method.invoke(hdfsStorageOperator, hadoopConfDirEnv, hadoopHomeEnv); + } + } diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 967f324259c1..2c829e9fcccb 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -17,7 +17,10 @@ # Never put sensitive config such as database password here in your production environment, -# this file will be sourced everytime a new task is executed. + +# auto load hdfs configuration from environment +#export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} +#export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* From 73157dc5eb61d42185f8a20c1f561b2f35011ac5 Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Thu, 2 Feb 2023 13:14:28 +0800 Subject: [PATCH 4/8] [fix-13476][Api] fix the code style --- .../plugin/storage/hdfs/HdfsStorageOperator.java | 4 ++-- .../plugin/storage/hdfs/HdfsStorageOperatorTest.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index 36fb2893f631..1ceb7ab140f4 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -85,7 +85,6 @@ public class HdfsStorageOperator implements Closeable, StorageOperate { public static final String HDFS_SITE_XML = "hdfs-site.xml"; public static final String CORE_SITE_XML = "core-site.xml"; - private static final LoadingCache cache = CacheBuilder .newBuilder() .expireAfterWrite(HdfsStorageProperties.getKerberosExpireTime(), TimeUnit.HOURS) @@ -149,7 +148,8 @@ private void init() throws NullPointerException { // the default is the local file system if (StringUtils.isNotBlank(defaultFS)) { configuration.set(Constants.HDFS_DEFAULT_FS, defaultFS); - // todo : question - Is this convenient for users to configure HDFS? And Whether it is convenient enough to configure environment variables `HADOOP_CONF_DIR` + // todo : question - Is this convenient for users to configure HDFS? And Whether it is convenient enough + // to configure environment variables `HADOOP_CONF_DIR` Map fsRelatedProps = PropertyUtils.getPrefixedProperties("resource.hdfs.fs."); fsRelatedProps.forEach((key, value) -> configuration.set(key.substring(14), value)); } else { diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java index f98555fed9bf..3fd39f7e2c16 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java @@ -121,7 +121,7 @@ public void testGetHadoopConfPathFromEnvByHADOOP_HOME2() throws InvocationTarget } private String invokeGetHadoopConfPath(String hadoopConfDirEnv, - String hadoopHomeEnv) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + String hadoopHomeEnv) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { Method method = HdfsStorageOperator.class.getDeclaredMethod("getHadoopConfPath", String.class, String.class); method.setAccessible(true); From 95f3d47ef0f337afc67d4449a08ec065b889cd8c Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Fri, 3 Feb 2023 18:18:03 +0800 Subject: [PATCH 5/8] update docs for HADOOP_HOME and HADOOP_CONF_DIR --- docs/docs/en/architecture/configuration.md | 12 ++---------- docs/docs/zh/architecture/configuration.md | 11 ++--------- .../plugin/storage/hdfs/HdfsStorageOperator.java | 2 -- script/env/dolphinscheduler_env.sh | 9 ++++++--- 4 files changed, 10 insertions(+), 24 deletions(-) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 051a7a6f15fd..073402283a93 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -339,23 +339,15 @@ The default configuration is as follows: ### dolphinscheduler_env.sh [load environment variables configs] -When using shell to commit tasks, DolphinScheduler will export environment variables from `bin/env/dolphinscheduler_env.sh`. The -mainly configuration including `JAVA_HOME` and other environment paths. +Configure the running environment for the DS. The mainly configuration including `JAVA_HOME`,`HADOOP_HOME` and `HADOOP_CONF_DIR`. ```bash # JAVA_HOME, will use it to start DolphinScheduler server export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} -# Tasks related configurations, need to change the configuration if you use the related tasks. +#DS will auto load HDFS configuration from environment export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} -export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} -export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} -export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} -export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} -export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 4b52bceb2083..ebf142c7678a 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -332,22 +332,15 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId ## dolphinscheduler_env.sh [环境变量配置] -通过类似shell方式提交任务的的时候,会加载该配置文件中的环境变量到主机中。涉及到的 `JAVA_HOME` 任务类型的环境配置,其中任务类型主要有: Shell任务、Python任务、Spark任务、Flink任务、Datax任务等等。 +配置DS的运行环境. 主要的环境变量配置包括 `JAVA_HOME`,`HADOOP_HOME`,`HADOOP_CONF_DIR` ```bash # JAVA_HOME, will use it to start DolphinScheduler server export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} -# Tasks related configurations, need to change the configuration if you use the related tasks. +# DS will auto load HDFS configuration from environment export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} -export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} -export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} -export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} -export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} -export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index 1ceb7ab140f4..62299b705ac0 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -148,8 +148,6 @@ private void init() throws NullPointerException { // the default is the local file system if (StringUtils.isNotBlank(defaultFS)) { configuration.set(Constants.HDFS_DEFAULT_FS, defaultFS); - // todo : question - Is this convenient for users to configure HDFS? And Whether it is convenient enough - // to configure environment variables `HADOOP_CONF_DIR` Map fsRelatedProps = PropertyUtils.getPrefixedProperties("resource.hdfs.fs."); fsRelatedProps.forEach((key, value) -> configuration.set(key.substring(14), value)); } else { diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 2c829e9fcccb..0d1b6b90a276 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -18,9 +18,12 @@ # Never put sensitive config such as database password here in your production environment, -# auto load hdfs configuration from environment -#export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} -#export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} +# JAVA_HOME, will use it to start DolphinScheduler server +export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} + +#DS will auto load hdfs configuration from environment +export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* From 3e5fd3f7d41f383c1d86f5bf6847f7f48c9dac03 Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Fri, 3 Feb 2023 18:32:34 +0800 Subject: [PATCH 6/8] add compatibility for hadoop2.2 --- .../plugin/storage/hdfs/HdfsStorageOperator.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java index 62299b705ac0..28b19fef2263 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/main/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperator.java @@ -199,8 +199,11 @@ private String getHadoopConfPath(String hadoopConfDirEnv, String hadoopHomeEnv) if (StringUtils.isBlank(hadoopConfDirEnv) || !Files.exists(java.nio.file.Paths.get(hadoopConfDirEnv))) { if (StringUtils.isNotBlank(hadoopHomeEnv)) { java.nio.file.Path confPath = Paths.get(hadoopHomeEnv, "conf"); + java.nio.file.Path confPath2 = Paths.get(hadoopHomeEnv, "/etc/hadoop"); // hadoop 2.2 if (Files.exists(confPath)) { hadoopConfPath = confPath.toString(); + } else if (Files.exists(confPath2)) { + hadoopConfPath = confPath2.toString(); } } } else { From 836f3261ebb7ff737f2e44cc3b70bd0822bde18d Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Sat, 4 Feb 2023 19:28:39 +0800 Subject: [PATCH 7/8] remove controversial env variable JAVA_HOME --- script/env/dolphinscheduler_env.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 0d1b6b90a276..fd3a5fd4f670 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -18,9 +18,6 @@ # Never put sensitive config such as database password here in your production environment, -# JAVA_HOME, will use it to start DolphinScheduler server -export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} - #DS will auto load hdfs configuration from environment export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} From a5ac4bb59eda07c62de6e736b91e537dc86ac4e3 Mon Sep 17 00:00:00 2001 From: "xu.xiaojing" Date: Mon, 6 Feb 2023 15:34:32 +0800 Subject: [PATCH 8/8] remove 'public' modifier in HdfsStorageOperatorTest --- .../storage/hdfs/HdfsStorageOperatorTest.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java index 3fd39f7e2c16..c568a21b04e3 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-hdfs/src/test/java/org/apache/dolphinscheduler/plugin/storage/hdfs/HdfsStorageOperatorTest.java @@ -46,35 +46,35 @@ public class HdfsStorageOperatorTest { private static final Logger logger = LoggerFactory.getLogger(HdfsStorageOperatorTest.class); @Test - public void getHdfsTenantDir() { + void getHdfsTenantDir() { HdfsStorageOperator hdfsStorageOperator = new HdfsStorageOperator(); logger.info(hdfsStorageOperator.getHdfsTenantDir("1234")); Assertions.assertTrue(true); } @Test - public void getHdfsUdfFileName() { + void getHdfsUdfFileName() { HdfsStorageOperator hdfsStorageOperator = new HdfsStorageOperator(); logger.info(hdfsStorageOperator.getHdfsUdfFileName("admin", "file_name")); Assertions.assertTrue(true); } @Test - public void getHdfsResourceFileName() { + void getHdfsResourceFileName() { HdfsStorageOperator hdfsStorageOperator = new HdfsStorageOperator(); logger.info(hdfsStorageOperator.getHdfsResourceFileName("admin", "file_name")); Assertions.assertTrue(true); } @Test - public void getHdfsFileName() { + void getHdfsFileName() { HdfsStorageOperator hdfsStorageOperator = new HdfsStorageOperator(); logger.info(hdfsStorageOperator.getHdfsFileName(ResourceType.FILE, "admin", "file_name")); Assertions.assertTrue(true); } @Test - public void getAppAddress() { + void getAppAddress() { HdfsStorageOperator hdfsStorageOperator = new HdfsStorageOperator(); try (MockedStatic mockedHttpUtils = Mockito.mockStatic(HttpUtils.class)) { mockedHttpUtils.when(() -> HttpUtils.get("http://ds1:8088/ws/v1/cluster/info")) @@ -86,7 +86,7 @@ public void getAppAddress() { @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_CONF_DIR, and directory exist") @Test - public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { String hadoopConfDirEnv = System.getProperty("user.dir"); String hadoopHomeEnv = "/not_expected"; Assertions.assertEquals(hadoopConfDirEnv, invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); @@ -94,7 +94,7 @@ public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR1() throws InvocationTa @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_CONF_DIR, but directory not exist") @Test - public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { String hadoopConfDirEnv = "/not_exist"; String hadoopHomeEnv = null; Assertions.assertNull(invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv)); @@ -102,7 +102,7 @@ public void testGetHadoopConfPathFromEnvByHADOOP_CONF_DIR2() throws InvocationTa @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_HOME, and directory exist") @Test - public void testGetHadoopConfPathFromEnvByHADOOP_HOME1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException, IOException { + void testGetHadoopConfPathFromEnvByHADOOP_HOME1() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException, IOException { String hadoopConfDirEnv = null; String hadoopHomeEnv = System.getProperty("user.dir"); Path hoemConfPath = Paths.get(hadoopHomeEnv, "conf"); @@ -114,7 +114,7 @@ public void testGetHadoopConfPathFromEnvByHADOOP_HOME1() throws InvocationTarget @DisplayName("test load Hdfs Configuration by env avaliable HADOOP_HOME, and directory not exist") @Test - public void testGetHadoopConfPathFromEnvByHADOOP_HOME2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { + void testGetHadoopConfPathFromEnvByHADOOP_HOME2() throws InvocationTargetException, NoSuchMethodException, IllegalAccessException { String hadoopConfDirEnv = null; String hadoopHomeEnv = "/not_exist"; Assertions.assertNull(invokeGetHadoopConfPath(hadoopConfDirEnv, hadoopHomeEnv));