diff --git a/zipkin-hadoop/src/scripts/run.sh b/zipkin-hadoop/src/scripts/run.sh new file mode 100755 index 00000000000..52cc424654b --- /dev/null +++ b/zipkin-hadoop/src/scripts/run.sh @@ -0,0 +1,71 @@ +#! /bin/bash + +usage() { +cat << EOF +usage: $0 options + +This script runs the hadoop queries + +OPTIONS: + -h Show this message + -d Date, either as one date, or start date and end date, separated by commas + -o Output directory +EOF +} + +TIMES= +STARTTIME= +ENDTIME= +OUTPUT= +while getopts “hn:d:o:” OPTION +do + case $OPTION in + h) + usage + exit 1 + ;; + d) + TIMES=(`echo $OPTARG | tr ',' ' '`) + ;; + o) + OUTPUT=$OPTARG + ;; + esac +done + +size=${#TIMES[@]} +echo "Dates: ${TIMES[@]}" + +if [[ -z $OUTPUT ]] || [[ $size < 0 ]] || [[ $size > 2 ]] +then + usage + exit 1 +fi + +if [ $size == 1 ]; then + STARTTIME=${TIMES[0]} + ENDTIME=${TIMES[0]} +else + STARTTIME=${TIMES[0]} + ENDTIME=${TIMES[1]} +fi + + +echo "Output: $OUTPUT" +echo "Start and end time: $STARTTIME - $ENDTIME" + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +echo $DIR + +$DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.sources.Preprocessed --date $STARTTIME $ENDTIME +$DIR/run_job.sh -j WorstRuntimes -d $ENDTIME -o $OUTPUT/WorstRuntimes & +$DIR/run_job.sh -j MemcacheRequest -d $ENDTIME -o $OUTPUT/MemcacheRequest & +$DIR/run_job.sh -j FindNames -p -d $ENDTIME +$DIR/run_job.sh -j PopularKeys -d $ENDTIME -o $OUTPUT/PopularKeys & +$DIR/run_job.sh -j PopularAnnotations -d $ENDTIME -o $OUTPUT/PopularAnnotations & +$DIR/run_job.sh -j FindIDtoName -p -d $ENDTIME +$DIR/run_job.sh -j DependencyTree -d $ENDTIME -o $OUTPUT/DependencyTree & +$DIR/run_job.sh -j MostCommonCalls -d $ENDTIME -o $OUTPUT/MostCommonCalls & +$DIR/run_job.sh -j Timeouts -s "--error_type finagle.timeout" -o $OUTPUT/Timeouts -d $ENDTIME & +$DIR/run_job.sh -j Timeouts -s "--error_type finagle.retry" -o $OUTPUT/Retries -d $ENDTIME & diff --git a/zipkin-hadoop/src/scripts/run_job.sh b/zipkin-hadoop/src/scripts/run_job.sh new file mode 100755 index 00000000000..48bfb544711 --- /dev/null +++ b/zipkin-hadoop/src/scripts/run_job.sh @@ -0,0 +1,71 @@ +#! /bin/bash + +usage() { +cat << EOF +usage: $0 options + +This script runs the hadoop queries + +OPTIONS: + -h Show this message + -j Name of the job + -p Flag to denote if the job being run is a preprocessor + -s Any settings you want to pass along + -d Date + -o Output directory +EOF +} + +JOBNAME= +ISPREPROCESSOR= +DATE= +SETTINGS= +OUTPUTDIR= +while getopts “hj:ps:d:n:o:” OPTION +do + case $OPTION in + h) + usage() + exit 1 + ;; + j) + JOBNAME=$OPTARG + ;; + p) + ISPREPROCESSOR=1 + ;; + s) + SETTINGS=$OPTARG + ;; + d) + DATE=$OPTARG + ;; + o) + OUTPUTDIR=$OPTARG + ;; + esac +done +echo "Job: $JOBNAME" +echo "Is Pre: $ISPREPROCESSOR" +echo "Settings: $SETTINGS" +echo "Output: $OUTPUTDIR" +echo "Date: $DATE" + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$ISPREPROCESSOR" == "1" ]; then + $DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.sources.$JOBNAME $SETTINGS --date $DATE + if [ "$?" != 0 ]; then + echo "Job $JOBNAME failed; exiting" + exit 1 + fi +else + $DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.$JOBNAME $SETTINGS --date $DATE --output $OUTPUTDIR + if [ "$?" == "0" ]; then + echo "Job $JOBNAME succesfully completed" + else + echo "Job $JOBNAME failed; exiting" + exit 1 + fi +fi +