Skip to content
This repository has been archived by the owner on Jan 8, 2019. It is now read-only.

Commit

Permalink
Zookeeper updates (#1233)
Browse files Browse the repository at this point in the history
* Add zookeeper metrics

* Correct errant Zookeeper log dir errors for users

* Provide Zookeeper GC controls and logging

* Correct ZK GC log locations

* Roll HDFS GC logs

* Correct ZK GC log size to match HBase

* Add ZK GC dir

* Add locking_resource to zookeeper-server

* rubocop-ify zookeeper_impl

* Correct GC opts to have spaces

* Add leader does not serve client requests if more than 3 ZK nodes

* More correctness and Rubocop Changes

* Update to move calculation in to recipes
  • Loading branch information
cbaenziger authored and aespinosa committed Jul 12, 2018
1 parent 26adfba commit e80fd28
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 42 deletions.
3 changes: 3 additions & 0 deletions cookbooks/bcpc-hadoop/attributes/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@
'-XX:+HeapDumpOnOutOfMemoryError ' \
'-XX:+PrintTenuringDistribution ' \
'-XX:+ExitOnOutOfMemoryError ' \
'-XX:+UseGCLogFileRotation ' \
'-XX:GCLogFileSize=20M ' \
'-XX:NumberOfGCLogFiles=20 ' \
"-agentpath:#{node['bcpc-hadoop']['jvmkill']['lib_file']}"

# GC Options for DataNode
Expand Down
21 changes: 18 additions & 3 deletions cookbooks/bcpc-hadoop/attributes/jmxtrans_agent.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@
]

# zookeeper
default['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['xml'] = '/etc/hadoop/conf/jmxtrans_agent_zookeeper.xml'
default['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['xml'] = '/etc/zookeeper/conf/jmxtrans_agent_zookeeper.xml'
default['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['name_prefix'] = 'jmx.zookeeper'
default['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['queries'] = default['bcpc']['hadoop']['jmxtrans_agent']['basic']['queries'] + [
{
Expand All @@ -1162,10 +1162,25 @@
'attributes' => 'QuorumSize'
},
{
'objectName' => 'org.apache.ZooKeeperService:name0=ReplicatedServer_id*,name1=replica.*,name2=Follower,name3=InMemoryDataTree',
'objectName' => 'org.apache.ZooKeeperService:name0=ReplicatedServer_id*,name1=replica.*',
'resultAlias' => 'zookeeper.#attribute#',
'type' => 'gauge',
'attributes' => 'NodeCount'
'attributes' =>
'AvgRequestLatency,' \
'MaxRequestLatency,' \
'MinRequestLatency,' \
'NumAliveConnections,' \
'OutstandingRequests,' \
'PacketsReceived,' \
'PacketsSent'
},
{
'objectName' => 'org.apache.ZooKeeperService:name0=ReplicatedServer_id*,name1=replica.*,name2=*,name3=InMemoryDataTree',
'resultAlias' => 'zookeeper.#attribute#',
'type' => 'gauge',
'attributes' =>
'NodeCount,' \
'WatchCount'
}
]

Expand Down
33 changes: 33 additions & 0 deletions cookbooks/bcpc-hadoop/attributes/zookeeper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,36 @@

# ZooKeeper snapshot purge interval in hours
default[:bcpc][:hadoop][:zookeeper][:snap][:purge_interval] = 24

# ZooKeeper memory controls
default['bcpc']['hadoop']['zookeeper']['xmx']['max_size'] = 4_096
default['bcpc']['hadoop']['zookeeper']['xmx']['max_ratio'] = 0.10

common_opts =
'-XX:+UseGCLogFileRotation ' \
'-XX:GCLogFileSize=20M ' \
'-XX:NumberOfGCLogFiles=20 ' \
'-XX:+UseParNewGC ' \
'-XX:+UseConcMarkSweepGC ' \
'-verbose:gc -XX:+PrintHeapAtGC ' \
'-XX:+PrintGCDetails ' \
'-XX:+PrintGCTimeStamps ' \
'-XX:+PrintGCDateStamps ' \
'-XX:+UseNUMA ' \
'-XX:+PrintGCApplicationStoppedTime ' \
'-XX:+UseCompressedOops ' \
'-XX:+PrintClassHistogram ' \
'-XX:+PrintGCApplicationConcurrentTime ' \
'-XX:+UseCMSInitiatingOccupancyOnly ' \
'-XX:+HeapDumpOnOutOfMemoryError ' \
'-XX:+PrintTenuringDistribution ' \
'-XX:+ExitOnOutOfMemoryError ' \
"-agentpath:#{node['bcpc-hadoop']['jvmkill']['lib_file']}"

# GC Options for DataNode
default['bcpc']['hadoop']['zookeeper']['gc_opts'] =
'-server -XX:ParallelGCThreads=4 ' \
'-XX:CMSInitiatingOccupancyFraction=70 ' \
'-Xloggc:/var/log/zookeeper/gc/gc.log-$$-$(hostname)-$(date +\'%Y%m%d%H%M\').log ' \
'-XX:HeapDumpPath=/var/log/zookeeper/heap-dump-$$-$(hostname)-$(date +\'%Y%m%d%H%M\').hprof ' +
common_opts
105 changes: 67 additions & 38 deletions cookbooks/bcpc-hadoop/recipes/zookeeper_impl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,92 +6,121 @@
include_recipe 'bcpc-hadoop::zookeeper_packages'
include_recipe 'bach_krb5::keytab_directory'

user_ulimit "zookeeper" do
filehandle_limit 65536
user_ulimit 'zookeeper' do
filehandle_limit 65_536
end

configure_kerberos 'zookeeper_kerb' do
service_name 'zookeeper'
end

directory "/var/run/zookeeper" do
owner "zookeeper"
group "zookeeper"
mode "0755"
directory '/var/run/zookeeper' do
owner 'zookeeper'
group 'zookeeper'
mode 0755
action :create
end

link "/usr/bin/zookeeper-server-initialize" do
to "/usr/hdp/current/zookeeper-client/bin/zookeeper-server-initialize"
link '/usr/bin/zookeeper-server-initialize' do
to "/usr/hdp/#{node['bcpc']['hadoop']['distribution']['active_release']}/zookeeper-client/bin/zookeeper-server-initialize"
end

#Install jolokia's jvm agent to node['bcpc']['jolokia']['path']
if node[:bcpc][:jolokia][:enable] == true
# Install jolokia's jvm agent to node['bcpc']['jolokia']['path']
if node['bcpc']['jolokia']['enable'] == true
include_recipe 'bcpc-hadoop::jolokia'
end

template "#{node[:bcpc][:hadoop][:zookeeper][:conf_dir]}/zookeeper-env.sh" do
source "zk_zookeeper-env.sh.erb"
zk_env_path = \
"#{node['bcpc']['hadoop']['zookeeper']['conf_dir']}/zookeeper-env.sh"
template zk_env_path do
auto_size = (node['memory']['total'].to_i *
node['bcpc']['hadoop']['zookeeper']['xmx']['max_ratio']/1024)
heap = [node['bcpc']['hadoop']['zookeeper']['xmx']['max_size'],
auto_size.floor].min
newsize = [(0.125*heap).ceil, 3072].min
source 'zk_zookeeper-env.sh.erb'
mode 0644
variables(
zk_jmx_port: node[:bcpc][:hadoop][:zookeeper][:jmx][:port],
zk_jmx_port: node['bcpc']['hadoop']['zookeeper']['jmx']['port'],
jmxtrans_agent_lib: node['bcpc']['jmxtrans_agent']['lib_file'],
jmxtrans_agent_xml: node['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['xml']
jmxtrans_agent_xml: node['bcpc']['hadoop']['jmxtrans_agent']['zookeeper']['xml'],
auto_size: auto_size,
heap: heap,
newsize: newsize
)
end

directory node[:bcpc][:hadoop][:zookeeper][:data_dir] do
directory node['bcpc']['hadoop']['zookeeper']['data_dir'] do
recursive true
owner node[:bcpc][:hadoop][:zookeeper][:owner]
group node[:bcpc][:hadoop][:zookeeper][:group]
owner node['bcpc']['hadoop']['zookeeper']['owner']
group node['bcpc']['hadoop']['zookeeper']['group']
mode 0755
end

template "/usr/hdp/#{node[:bcpc][:hadoop][:distribution][:active_release]}/zookeeper/bin/zkServer.sh" do
source "zk_zkServer.sh.erb"
zkServer_path = "/usr/hdp/#{node['bcpc']['hadoop']['distribution']['active_release']}/zookeeper/bin/zkServer.sh"
template zkServer_path do
source 'zk_zkServer.sh.erb'
end

link '/etc/init.d/zookeeper-server' do
to "/usr/hdp/#{node[:bcpc][:hadoop][:distribution][:active_release]}/zookeeper/etc/init.d/zookeeper-server"
to "/usr/hdp/#{node['bcpc']['hadoop']['distribution']['active_release']}/zookeeper/etc/init.d/zookeeper-server"
notifies :run, 'bash[kill zookeeper-org-apache-zookeeper-server-quorum-QuorumPeerMain]', :immediate
end

bash "kill zookeeper-org-apache-zookeeper-server-quorum-QuorumPeerMain" do
code "pkill -u zookeeper -f org.apache.zookeeper.server.quorum.QuorumPeerMain"
bash 'kill zookeeper-org-apache-zookeeper-server-quorum-QuorumPeerMain' do
code 'pkill -u zookeeper -f org.apache.zookeeper.server.quorum.QuorumPeerMain'
action :nothing
returns [0, 1]
end

directory '/var/log/zookeeper/gc/' do
user node['bcpc']['hadoop']['zookeeper']['owner']
group node['bcpc']['hadoop']['zookeeper']['group']
action :create
end

my_id_path = "#{node['bcpc']['hadoop']['zookeeper']['data_dir']}/myid"
bash 'init-zookeeper' do
code "service zookeeper-server init " +
code 'service zookeeper-server init ' +
"--myid=#{bcpc_8bit_node_number}"

not_if do
::File.exists?("#{node[:bcpc][:hadoop][:zookeeper][:data_dir]}/myid")
::File.exists?(my_id_path)
end

# race immediate run of restarting ZK on initial stand-up
subscribes :run, "link[/etc/init.d/zookeeper-server]", :immediate
subscribes :run, 'link[/etc/init.d/zookeeper-server]', :immediate
end

file "#{node[:bcpc][:hadoop][:zookeeper][:data_dir]}/myid" do
file my_id_path do
content bcpc_8bit_node_number.to_s
owner node[:bcpc][:hadoop][:zookeeper][:owner]
group node[:bcpc][:hadoop][:zookeeper][:group]
owner node['bcpc']['hadoop']['zookeeper']['owner']
group node['bcpc']['hadoop']['zookeeper']['group']
mode 0644
# race immediate run of restarting ZK on initial stand-up
subscribes :create, "bash[init-zookeeper]", :immediate
subscribes :create, 'bash[init-zookeeper]', :immediate
end

service "zookeeper-server" do
service 'zookeeper-server' do
supports :status => true, :restart => true, :reload => false
action [:enable, :start]
subscribes :restart, "link[/etc/init.d/zookeeper-server]", :immediate
subscribes :restart, "template[#{node[:bcpc][:hadoop][:zookeeper][:conf_dir]}/zoo.cfg]", :delayed
subscribes :restart, "template[#{node[:bcpc][:hadoop][:zookeeper][:conf_dir]}/zookeeper-env.sh]", :delayed
subscribes :restart, "link[/usr/lib/zookeeper/bin/zkServer.sh]", :delayed
subscribes :restart, "file[#{node[:bcpc][:hadoop][:zookeeper][:data_dir]}/myid]", :delayed
subscribes :restart, "user_ulimit[zookeeper]", :delayed
subscribes :restart, "bash[hdp-select zookeeper-server]", :delayed
subscribes :restart, "log[jdk-version-changed]", :delayed
end

locking_resource 'zookeeper-server' do
process_identifier = 'org.apache.zookeeper.server.quorum.QuorumPeerMain'
resource 'service[zookeeper-server]'
process_pattern {command_string process_identifier
user node['bcpc']['hadoop']['zookeeper']['owner']
full_cmd true}
perform :restart
action :serialize_process
subscribes :serialize_process, 'link[/etc/init.d/zookeeper-server]', :immediate
subscribes :serialize_process, "template[#{node['bcpc']['hadoop']['zookeeper']['conf_dir']}/zoo.cfg]", :delayed
subscribes :serialize_process, "template[#{zk_env_path}]", :delayed
subscribes :serialize_process, "link[#{zkServer_path}]", :delayed
subscribes :serialize_process, "file[#{my_id_path}]", :delayed
subscribes :serialize_process, 'user_ulimit[zookeeper]', :delayed
subscribes :serialize_process, 'bash[hdp-select zookeeper-server]', :delayed
subscribes :serialize_process, 'log[jdk-version-changed]', :delayed
subscribes :serialize_process, 'directory[/var/log/zookeeper/gc]', :delayed
end
5 changes: 5 additions & 0 deletions cookbooks/bcpc-hadoop/templates/default/zk_zoo.cfg.erb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ dataLogDir=<%= node[:bcpc][:hadoop][:zookeeper][:data_log_dir] %>
clientPort=<%= node[:bcpc][:hadoop][:zookeeper][:port] %>
clientPortAddress=<%= float_host(node[:hostname]) %>

<% if @zk_hosts.length > 3 %>
<%= "# more than three ZK servers, so leader will not serve client connections" %>
<%= "leaderServes=no" %>
<% end %>

<% @zk_hosts.sort{ |aa,bb| aa[:fqdn] <=> bb[:fqdn] }.each do |ss| %>
<%="server.#{bcpc_8bit_node_number(ss)}=#{float_host(ss[:fqdn])}:#{node[:bcpc][:hadoop][:zookeeper][:leader_connect][:port]}:#{node[:bcpc][:hadoop][:zookeeper][:leader_elect][:port]}" %>
<% end %>
Expand Down
10 changes: 9 additions & 1 deletion cookbooks/bcpc-hadoop/templates/default/zk_zookeeper-env.sh.erb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
################################################

export JAVA_HOME=<%= node['bcpc']['hadoop']['java'] %>
export ZOO_LOG_DIR=<%= node['bcpc']['hadoop']['zookeeper']['log_dir'] %>
if [ $(whoami) == 'zookeeper' ]; then
export ZOO_LOG_DIR=<%= node['bcpc']['hadoop']['zookeeper']['log_dir'] %>
else
export ZOO_LOG_DIR=~/
fi
export ZOO_PID_DIR=/var/run/zookeeper
export ZOOPIDFILE=$ZOO_PID_DIR/zookeeper-server
export ZOOCFGDIR=<%= node['bcpc']['hadoop']['zookeeper']['conf_dir'] %>
Expand All @@ -32,5 +36,9 @@ SERVER_JVMFLAGS="$SERVER_JVMFLAGS -Djava.security.auth.login.config=/etc/zookeep
CLIENT_JVMFLAGS="$CLIENT_JVMFLAGS -Djava.security.auth.login.config=/etc/zookeeper/conf/zookeeper-client.jaas"
<% end %>

SERVER_JVMFLAGS="$SERVER_JVMFLAGS -Xms<%= @heap %>m -Xmx<%= @heap %>m " \
"-Xmn<%= @newsize %>m " \
"<%= node['bcpc']['hadoop']['zookeeper']['gc_opts'] %>"

export SERVER_JVMFLAGS
export CLIENT_JVMFLAGS

0 comments on commit e80fd28

Please sign in to comment.