Skip to content
This repository has been archived by the owner on Jan 3, 2023. It is now read-only.

Commit

Permalink
add ec test
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoriaaa committed Apr 28, 2019
1 parent b69fe56 commit 982b928
Show file tree
Hide file tree
Showing 14 changed files with 1,075 additions and 0 deletions.
450 changes: 450 additions & 0 deletions supports/ec-performance-test/TestDFSIO_results.log

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions supports/ec-performance-test/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# These are test cases which specify file size and num. The base sync dir name is "size_num".

# The files for these case should be created under them in advance.

# declare -A CASES=(["10MB"]="10" ["100MB"]="10")

declare -A CASES=(["500MB"]="10000")

# SSM home, e.g., SMART_HOME=~/smart-data-1.4.0

SMART_HOME=



# PAT home, e.g., PAT_HOME=~/PAT

PAT_HOME=/root/rui/PAT-master/

# File path
FILE_PATH=

# The src node
SRC_NODE="sr613"

# The src cluster hdfs url for sync, e.g., SRC_CLUSTER=hdfs://sr519:9000

SRC_CLUSTER=hdfs://sr613:9000

# The dest node

DEST_NODE="sr613"

# The dest cluster hdfs url for sync, e.g., DEST_CLUSTER=hdfs://sr518:9000

DEST_CLUSTER=hdfs://sr613:9000



# The namenode's hostname for remote HDFS cluster

REMOTE_NAMENODE="sr613"



# The hosts require dropping cache, e.g., HOSTS="host1 host2"

HOSTS="sr605 sr606 sr607 sr608 sr609 sr610 sr611 sr612 sr613 sr614"



# the number of mapper for distcp, e.g., MAPPER_NUM="30 60 90"

MAPPER_NUM="90"
4 changes: 4 additions & 0 deletions supports/ec-performance-test/distcp.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Test case 500MB_10000 with 90 mappers:
ec
3900s 3584s 3694s
Test case 500MB_10000 with mapper is finished!
18 changes: 18 additions & 0 deletions supports/ec-performance-test/drop_cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash



. ./config


drop_cache="sync;echo 3 > /proc/sys/vm/drop_caches"

# drop cache for all cluster hosts

echo "drop cache for ${HOSTS}."

for host in ${HOSTS}; do

ssh $host "${drop_cache}"

done
27 changes: 27 additions & 0 deletions supports/ec-performance-test/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash



. ./config



# generate data
for size in "${!CASES[@]}"; do

num=${CASES[$size]}

dir="${size}_${num}"

ssh ${REMOTE_NAMENODE} "hdfs dfs -mkdir /${dir}"

hadoop jar /root/rui/hadoop-3.2.0-SNAPSHOT/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.2.0-SNAPSHOT-tests.jar TestDFSIO -write -nrFiles $(($num)) -size ${size}

echo "move"

ssh ${REMOTE_NAMENODE} "hdfs dfs -mv /benchmarks/TestDFSIO/io_data/* /"${size}_$num""

ssh ${REMOTE_NAMENODE} "hdfs dfs -rm -r /benchmarks"

done

27 changes: 27 additions & 0 deletions supports/ec-performance-test/prepare_ec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash



. ./config



# mkdir and ec
for size in "${!CASES[@]}"; do

num=${CASES[$size]}

dir="${size}_${num}"

# delete historical data

echo "delete data in dest dir."

# unset ec policy

ssh ${DEST_NODE} "hdfs dfs -rm -r /dest/${dir}; hdfs dfs -mkdir /dest/${dir}"

ssh ${DEST_NODE} "hdfs ec -setPolicy -path /dest/${dir}"

done

25 changes: 25 additions & 0 deletions supports/ec-performance-test/prepare_replica.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash



. ./config



# mkdir and ec
for size in "${!CASES[@]}"; do

num=${CASES[$size]}

dir="${size}_${num}"

# delete historical data

echo "delete data in dest dir."

ssh ${SRC_NODE} "hdfs ec -setPolicy -path /${dir}"

ssh ${SRC_NODE} "hdfs dfs -rm -r /replica/${dir}; hdfs dfs -mkdir /replica/${dir}"

done

52 changes: 52 additions & 0 deletions supports/ec-performance-test/run_ssm_ec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import sys
import time
from util import *


size = sys.argv[1]
num = sys.argv[2]
case = size + "_" + num
log = sys.argv[3]
action = sys.argv[4]

if action == "ec":
#rid = submit_rule("file: every 30min | path matches \"/" + case + "/*\" | ec -policy RS-6-3-1024k")
rid = submit_rule("file: path matches \"/" + case + "/*\" | ec -policy RS-6-3-1024k")
elif action == "unec":
# rid = submit_rule("file: every 30min | path matches \"/" + case + "/*\" | unec")
rid = submit_rule("file: path matches \"/" + case + "/*\" | unec")

start_rule(rid)
start_time = time.time()

rule = get_rule(rid)
last_checked = rule['numChecked']
last_cmdsgen = rule['numCmdsGen']

time.sleep(.1)
rule = get_rule(rid)
while not ((rule['numChecked'] > last_checked) and (rule['numCmdsGen'] == last_cmdsgen)):
time.sleep(.1)
rule = get_rule(rid)
last_checked = rule['numChecked']
last_cmdsgen = rule['numCmdsGen']
time.sleep(.1)
rule = get_rule(rid)

cids = get_cids_of_rule(rid)

#while True:
# rule = get_rule(rid)
# print(rule)
# if rule['numCmdsGen'] == int(num):
# break
# time.sleep(1)
failed_cids = wait_for_cmdlets(cids)
if len(failed_cids) != 0:
print "Not all ec actions succeed!"
end_time = time.time()
stop_rule(rid)
# append result to log file
f = open(log, 'a')
f.write(str(int(end_time - start_time)) + "s" + " " + '\n')
f.close()
1 change: 1 addition & 0 deletions supports/ec-performance-test/ssm.log
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Test case 500MB_10000(unec):
75 changes: 75 additions & 0 deletions supports/ec-performance-test/test_distcp_ec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env bash



echo "Get configuration from config."

. config



echo "------------------ Your configuration ------------------"

echo "PAT home is ${PAT_HOME}."

echo "Test case:"

for size in ${!CASES[@]}; do

echo ${size} ${CASES[$size]}

done

echo "--------------------------------------------------------"



bin=$(dirname "${BASH_SOURCE-$0}")

bin=$(cd "${bin}">/dev/null; pwd)

log="${bin}/distcp.log"

# remove historical data in log file

printf "" > ${log}




for size in "${!CASES[@]}"; do

case=${size}_${CASES[$size]}

printf "Test case ${case} with ${MAPPER_NUM} mappers:\n ec\n" >> ${log}

for i in {1..3}; do

echo "==================== test case: $case, mapper num: ${MAPPER_NUM}, test round: $i ============================"

sh drop_cache.sh

sh prepare_ec.sh

cd ${PAT_HOME}/PAT-collecting-data

echo "hadoop distcp -skipcrccheck -m ${MAPPER_NUM} ${SRC_CLUSTER}/${case}/* ${DEST_CLUSTER}/dest/${case}/"

echo "start_time=\`date +%s\`;\
hadoop distcp -skipcrccheck -m ${MAPPER_NUM} ${SRC_CLUSTER}/${case}/* ${DEST_CLUSTER}/dest/${case}/ > results/$case_${MAPPER_NUM}_$i.log 2>&1;\
end_time=\`date +%s\`;\
printf \"\$((end_time-start_time))s \" >> ${log}" > cmd.sh

./pat run "${case}_"ec"_${MAPPER_NUM}_${i}"

cd ${bin}

done

printf "\nTest case ${case} with $m mapper is finished!\n" >> ${log}

done

74 changes: 74 additions & 0 deletions supports/ec-performance-test/test_distcp_replica.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env bash



echo "Get configuration from config."

. config



echo "------------------ Your configuration ------------------"

echo "PAT home is ${PAT_HOME}."

echo "Test case:"

for size in ${!CASES[@]}; do

echo ${size} ${CASES[$size]}

done

echo "--------------------------------------------------------"



bin=$(dirname "${BASH_SOURCE-$0}")

bin=$(cd "${bin}">/dev/null; pwd)

log="${bin}/distcp.log"

# remove historical data in log file

printf "" > ${log}


# 3 replica
for size in "${!CASES[@]}"; do

case=${size}_${CASES[$size]}

printf "Test case ${case} with ${MAPPER_NUM} mappers:\n replica\n" >> ${log}

for i in {1..1}; do

echo "==================== test case: $case, mapper num: ${MAPPER_NUM}, test round: $i ============================"

sh drop_cache.sh

sh prepare_replica.sh

cd ${PAT_HOME}/PAT-collecting-data

echo "hadoop distcp -skipcrccheck -m ${MAPPER_NUM} ${DEST_CLUSTER}/dest/${case}/* ${SRC_CLUSTER}/replica/${case}/"

echo "start_time=\`date +%s\`;\
hadoop distcp -skipcrccheck -m ${MAPPER_NUM} ${DEST_CLUSTER}/dest/${case}/* ${SRC_CLUSTER}/replica/${case}/ > results/$case_${MAPPER_NUM}_$i.log 2>&1;\
end_time=\`date +%s\`;\
printf \"\$((end_time-start_time))s \" >> ${log}" > cmd.sh

./pat run "${case}_"replica"_${MAPPER_NUM}_${i}"

cd ${bin}

done

printf "\nTest case ${case} with $m mapper is finished!\n" >> ${log}

done

Loading

0 comments on commit 982b928

Please sign in to comment.