Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add scripts/pegasus_offline_node_list.sh & update rdsn #20

Merged
merged 1 commit into from
Apr 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions scripts/pegasus_offline_node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#

if [ $# -le 2 ]; then
echo "USAGE: $0 <cluster-name> <cluster-meta-list> <start_task_id>"
echo "USAGE: $0 <cluster-name> <cluster-meta-list> <replica-task-id>"
echo
echo "For example:"
echo " $0 onebox 127.0.0.1:34601,127.0.0.1:34602 0"
Expand All @@ -14,7 +14,7 @@ fi

cluster=$1
meta_list=$2
start_task_id=$3
replica_task_id=$3

pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
Expand Down Expand Up @@ -59,7 +59,7 @@ if [ "$cname" != "$cluster" ]; then
exit -1
fi
pmeta=`grep primary_meta_server /tmp/$UID.pegasus.offline_node.cluster_info | grep -o '[0-9.:]*$'`
if [ "$pmeta" == ""]; then
if [ "$pmeta" == "" ]; then
echo "ERROR: extract primary_meta_server by shell failed"
exit -1
fi
Expand Down Expand Up @@ -92,7 +92,7 @@ echo
while read line
do
task_id=`echo $line | awk '{print $1}'`
if [ $task_id -ne $start_task_id ]; then
if [ $task_id -ne $replica_task_id ]; then
continue
fi
node_str=`echo $line | awk '{print $2}'`
Expand Down Expand Up @@ -190,5 +190,5 @@ fi
echo

all_finish_time=$((`date +%s`))
echo "Offline replica server task $start_task_id done."
echo "Offline replica server task $replica_task_id done."
echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."
132 changes: 132 additions & 0 deletions scripts/pegasus_offline_node_list.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/bin/bash
#
# Offline replica servers using minos.
#

if [ $# -le 2 ]; then
echo "USAGE: $0 <cluster-name> <cluster-meta-list> <replica-task-id-list>"
echo
echo "For example:"
echo " $0 onebox 127.0.0.1:34601,127.0.0.1:34602 1,2,3"
echo
exit -1
fi

cluster=$1
meta_list=$2
replica_task_id_list=$3

pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
minos_config_dir=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus
minos_client_dir=/home/work/pegasus/infra/minos/client
cd $shell_dir

minos_config=$minos_config_dir/pegasus-${cluster}.cfg
if [ ! -f $minos_config ]; then
echo "ERROR: minos config \"$minos_config\" not found"
exit -1
fi

minos_client=$minos_client_dir/deploy
if [ ! -f $minos_client ]; then
echo "ERROR: minos client \"$minos_client\" not found"
exit -1
fi

echo "Start time: `date`"
all_start_time=$((`date +%s`))
echo

echo "Generating /tmp/$UID.pegasus.offline_node_list.minos.show..."
cd $minos_client_dir
./deploy show pegasus $cluster &>/tmp/$UID.pegasus.offline_node_list.minos.show

echo "Generating /tmp/$UID.pegasus.offline_node_list.rs.list..."
grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.pegasus.offline_node_list.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.pegasus.offline_node_list.rs.list
replica_server_count=`cat /tmp/$UID.pegasus.offline_node_list.rs.list | wc -l`
if [ $replica_server_count -eq 0 ]; then
echo "ERROR: replica server count is 0 by minos show"
exit -1
fi
cd $shell_dir

echo "Generating /tmp/$UID.pegasus.offline_node_list.cluster_info..."
echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.cluster_info
cname=`grep zookeeper_root /tmp/$UID.pegasus.offline_node_list.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
if [ "$cname" != "$cluster" ]; then
echo "ERROR: cluster name and meta list not matched"
exit -1
fi
pmeta=`grep primary_meta_server /tmp/$UID.pegasus.offline_node_list.cluster_info | grep -o '[0-9.:]*$'`
if [ "$pmeta" == "" ]; then
echo "ERROR: extract primary_meta_server by shell failed"
exit -1
fi

echo "Generating /tmp/$UID.pegasus.offline_node_list.nodes..."
echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.nodes
rs_port=`grep '^[0-9.]*:' /tmp/$UID.pegasus.offline_node_list.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
if [ "$rs_port" == "" ]; then
echo "ERROR: extract replica server port by shell failed"
exit -1
fi

echo "Checking replica task id list..."
address_list=""
id_list=""
for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
if [ "$id_list" != "" ]; then
if echo "$id_list" | grep -q "\<$id\>" ; then
echo "ERROR: duplicate replica task id $id"
exit -1;
fi
fi
pair=`grep "^$id " /tmp/$UID.pegasus.offline_node_list.rs.list`
if [ "$pair" == "" ]; then
echo "ERROR: replica task id $id not found, refer to /tmp/$UID.pegasus.offline_node_list.minos.show"
exit -1;
fi
address=`echo $pair | awk '{print $2}'`
if [ "$id_list" != "" ]; then
id_list="$id_list $id"
address_list="$address_list,$address:$rs_port"
else
id_list="$id"
address_list="$address:$rs_port"
fi
done

echo "Set lb.assign_secondary_black_list..."
echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list $address_list" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list
set_ok=`grep "set ok" /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: set lb.assign_secondary_black_list failed, refer to /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list"
exit -1
fi

echo
for id in $id_list
do
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
./scripts/pegasus_offline_node.sh $cluster $meta_list $id
if [ $? -ne 0 ]; then
echo "ERROR: offline replica task $id failed"
exit -1
fi
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "sleep for 10 seconds"
sleep 10
done

echo "Clear lb.assign_secondary_black_list..."
echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list clear" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list
set_ok=`grep "clear ok" /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: clear lb.assign_secondary_black_list failed, refer to /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list"
exit -1
fi

all_finish_time=$((`date +%s`))
echo "Offline replica server task list done."
echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."