diff --git a/src/isilon_hadoop_tools/_scripts.py b/src/isilon_hadoop_tools/_scripts.py index eb32559..9b79568 100644 --- a/src/isilon_hadoop_tools/_scripts.py +++ b/src/isilon_hadoop_tools/_scripts.py @@ -33,7 +33,7 @@ def base_cli(parser=None): parser.add_argument( '--dist', help='the Hadoop distribution to be deployed', - choices=('cdh', 'hdp'), + choices=('cdh', 'cdp', 'hdp'), required=True, ) parser.add_argument( @@ -89,6 +89,7 @@ def isilon_create_users(argv=None): identities = { 'cdh': isilon_hadoop_tools.identities.cdh_identities, + 'cdp': isilon_hadoop_tools.identities.cdp_identities, 'hdp': isilon_hadoop_tools.identities.hdp_identities, }[args.dist](args.zone) @@ -146,6 +147,7 @@ def isilon_create_directories(argv=None): directories = { 'cdh': isilon_hadoop_tools.directories.cdh_directories, + 'cdp': isilon_hadoop_tools.directories.cdh_directories, 'hdp': isilon_hadoop_tools.directories.hdp_directories, }[args.dist](identity_suffix=suffix) diff --git a/src/isilon_hadoop_tools/directories.py b/src/isilon_hadoop_tools/directories.py index 831ac1a..c727645 100644 --- a/src/isilon_hadoop_tools/directories.py +++ b/src/isilon_hadoop_tools/directories.py @@ -16,6 +16,7 @@ # Functions 'cdh_directories', + 'cdp_directories', 'hdp_directories', # Objects @@ -132,6 +133,51 @@ def cdh_directories(identity_suffix=None): return directories +def cdp_directories(identity_suffix=None): + """Directories needed for Cloudera Data Platform""" + directories = [ + HDFSDirectory('/', 'hdfs', 'hadoop', 0o755), + HDFSDirectory('/hbase', 'hbase', 'hbase', 0o755), + HDFSDirectory('/solr', 'solr', 'solr', 0o775), + HDFSDirectory('/tmp', 'hdfs', 'supergroup', 0o1777), + HDFSDirectory('/tmp/hive', 'hive', 'supergroup', 0o777), + HDFSDirectory('/tmp/logs', 'yarn', 'hadoop', 0o1777), + HDFSDirectory('/user', 'hdfs', 'supergroup', 0o755), + HDFSDirectory('/user/flume', 'flume', 'flume', 0o775), + HDFSDirectory('/user/hdfs', 'hdfs', 'hdfs', 0o755), + HDFSDirectory('/user/history', 'mapred', 'hadoop', 0o777), + HDFSDirectory('/user/history/done_intermediate', 'mapred', 'hadoop', 0o1777), + HDFSDirectory('/user/hive', 'hive', 'hive', 0o775), + HDFSDirectory('/user/hive/warehouse', 'hive', 'hive', 0o1777), + HDFSDirectory('/user/hue', 'hue', 'hue', 0o755), + HDFSDirectory('/user/hue/.cloudera_manager_hive_metastore_canary', 'hue', 'hue', 0o777), + HDFSDirectory('/user/impala', 'impala', 'impala', 0o775), + HDFSDirectory('/user/livy', 'livy', 'livy', 0o775), + HDFSDirectory('/user/oozie', 'oozie', 'oozie', 0o775), + HDFSDirectory('/user/spark', 'spark', 'spark', 0o751), + HDFSDirectory('/user/spark/applicationHistory', 'spark', 'spark', 0o1777), + HDFSDirectory('/user/spark/driverLogs', 'spark', 'spark', 0o1777), + HDFSDirectory('/user/sqoop', 'sqoop', 'sqoop', 0o775), + HDFSDirectory('/user/sqoop2', 'sqoop2', 'sqoop', 0o775), + HDFSDirectory('/user/tez', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/user/yarn', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/user/yarn/mapreduce', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/user/yarn/mapreduce/mr-framework', 'yarn', 'hadoop', 0o775), + HDFSDirectory('/user/yarn/services', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/user/yarn/services/service-framework', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/user/zeppelin', 'zeppelin', 'zeppelin', 0o775), + HDFSDirectory('/warehouse', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/warehouse/tablespace/external', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/warehouse/tablespace/managed', 'hdfs', 'supergroup', 0o775), + HDFSDirectory('/warehouse/tablespace/external/hive', 'hive', 'hive', 0o1775), + HDFSDirectory('/warehouse/tablespace/managed/hive', 'hive', 'hive', 0o1775), + ] + if identity_suffix: + for directory in directories: + directory.apply_identity_suffix(identity_suffix) + return directories + + def hdp_directories(identity_suffix=None): """Directories needed for Hortonworks Data Platform""" directories = [ diff --git a/src/isilon_hadoop_tools/identities.py b/src/isilon_hadoop_tools/identities.py index de61e18..7ce982d 100644 --- a/src/isilon_hadoop_tools/identities.py +++ b/src/isilon_hadoop_tools/identities.py @@ -12,6 +12,7 @@ __all__ = [ # Functions 'cdh_identities', + 'cdp_identities', 'hdp_identities', 'iterate_identities', 'log_identities', @@ -388,6 +389,79 @@ def cdh_identities(zone): return identities +def cdp_identities(zone): + """Identities needed for Cloudera Data Platform""" + smoke_user = ('cloudera-scm', 'user') + identities = { + 'groups': set(), # Groups with no users in them. + 'users': { + 'accumulo': ('accumulo', set()), + 'anonymous': ('anonymous', set()), + 'apache': ('apache', set()), + 'atlas': ('atlas', {'hadoop', 'supergroup'}), + 'cloudera-scm': ('cloudera-scm', set()), + 'cmjobuser': ('cmjobuser', set()), + 'cruisecontrol': ('cruisecontrol', set()), + 'druid': ('druid', {'hadoop', 'supergroup'}), + 'flume': ('flume', set()), + 'hbase': ('hbase', {'hadoop', 'supergroup'}), + 'hdfs': ('hdfs', {'hadoop', 'supergroup'}), + 'hive': ('hive', set()), + 'HTTP': ('HTTP', {'hadoop', 'supergroup'}), + 'httpfs': ('httpfs', set()), + 'hue': ('hue', set()), + 'impala': ('impala', {'hive'}), + 'kafka': ('kafka', set()), + 'keytrustee': ('keytrustee', set()), + 'kms': ('kms', set()), + 'knox': ('knox', set()), + 'knoxui': ('knoxui', set()), + 'kudu': ('kudu', set()), + 'llama': ('llama', set()), + 'livy': ('livy', set()), + 'mapred': ('mapred', {'hadoop', 'supergroup'}), + 'oozie': ('oozie', set()), + 'phoenix': ('phoenix', set()), + 'ranger': ('ranger', {'hadoop', 'supergroup'}), + 'rangeradmin': ('rangeradmin', set()), + 'rangerlookup': ('rangerlookup', set()), + 'rangerraz': ('rangerraz', set()), + 'rangerrms': ('rangerrms', set()), + 'rangertagsync': ('rangertagsync', set()), + 'rangerusersync': ('rangerusersync', set()), + 'schemaregistry': ('schemaregistry', set()), + 'sentry': ('sentry', set()), + 'solr': ('solr', set()), + 'spark': ('spark', set()), + 'sqoop': ('sqoop', {'sqoop2'}), + 'sqoop2': ('sqoop2', {'sqoop'}), + 'streamsmsgmgr': ('streamsmsgmgr', set()), + 'tez': ('tez', set()), + 'superset': ('superset', set()), + 'yarn': ('yarn', {'hadoop', 'supergroup'}), + 'zeppelin': ('zeppelin', set()), + 'zookeeper': ('zookeeper', set()), + }, + 'proxy_users': { + 'flume': {smoke_user, ('hadoop', 'group')}, + 'hive': {smoke_user, ('hadoop', 'group')}, + 'hue': {smoke_user, ('hadoop', 'group')}, + 'impala': {smoke_user, ('hadoop', 'group')}, + 'mapred': {smoke_user, ('hadoop', 'group')}, + 'oozie': {smoke_user, ('hadoop', 'group')}, + 'phoenix': {smoke_user, ('hadoop', 'group')}, + 'yarn': {smoke_user, ('hadoop', 'group')}, + 'knox': {smoke_user, ('hadoop', 'group')}, + 'hdfs': {smoke_user, ('hadoop', 'group')}, + 'livy': {smoke_user, ('hadoop', 'group')}, + 'HTTP': {smoke_user}, + }, + } + if zone.lower() != 'system': + identities['users']['admin'] = ('admin', set()) + return identities + + def hdp_identities(zone): """Identities needed for Hortonworks Data Platform""" smoke_user = ('ambari-qa', 'user')