Skip to content

Commit

Permalink
Script run jobs
Browse files Browse the repository at this point in the history
Move error code to WhaleReport, fixed typo in run.sh and added comments to
run_job.sh

Author: @jerryli9876
Fixes #70
URL: #70
  • Loading branch information
jerryli9876 committed Jul 18, 2012
1 parent 7b48932 commit cfba24b
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 0 deletions.
71 changes: 71 additions & 0 deletions zipkin-hadoop/src/scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#! /bin/bash

usage() {
cat << EOF
usage: $0 options
This script runs the hadoop queries
OPTIONS:
-h Show this message
-d Date, either as one date, or start date and end date, separated by commas
-o Output directory
EOF
}

TIMES=
STARTTIME=
ENDTIME=
OUTPUT=
while getopts “hn:d:o:” OPTION
do
case $OPTION in
h)
usage
exit 1
;;
d)
TIMES=(`echo $OPTARG | tr ',' ' '`)
;;
o)
OUTPUT=$OPTARG
;;
esac
done

size=${#TIMES[@]}
echo "Dates: ${TIMES[@]}"

if [[ -z $OUTPUT ]] || [[ $size < 0 ]] || [[ $size > 2 ]]
then
usage
exit 1
fi

if [ $size == 1 ]; then
STARTTIME=${TIMES[0]}
ENDTIME=${TIMES[0]}
else
STARTTIME=${TIMES[0]}
ENDTIME=${TIMES[1]}
fi


echo "Output: $OUTPUT"
echo "Start and end time: $STARTTIME - $ENDTIME"

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

echo $DIR

$DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.sources.Preprocessed --date $STARTTIME $ENDTIME
$DIR/run_job.sh -j WorstRuntimes -d $ENDTIME -o $OUTPUT/WorstRuntimes &
$DIR/run_job.sh -j MemcacheRequest -d $ENDTIME -o $OUTPUT/MemcacheRequest &
$DIR/run_job.sh -j FindNames -p -d $ENDTIME
$DIR/run_job.sh -j PopularKeys -d $ENDTIME -o $OUTPUT/PopularKeys &
$DIR/run_job.sh -j PopularAnnotations -d $ENDTIME -o $OUTPUT/PopularAnnotations &
$DIR/run_job.sh -j FindIDtoName -p -d $ENDTIME
$DIR/run_job.sh -j DependencyTree -d $ENDTIME -o $OUTPUT/DependencyTree &
$DIR/run_job.sh -j MostCommonCalls -d $ENDTIME -o $OUTPUT/MostCommonCalls &
$DIR/run_job.sh -j Timeouts -s "--error_type finagle.timeout" -o $OUTPUT/Timeouts -d $ENDTIME &
$DIR/run_job.sh -j Timeouts -s "--error_type finagle.retry" -o $OUTPUT/Retries -d $ENDTIME &
71 changes: 71 additions & 0 deletions zipkin-hadoop/src/scripts/run_job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#! /bin/bash

usage() {
cat << EOF
usage: $0 options
This script runs the hadoop queries
OPTIONS:
-h Show this message
-j Name of the job
-p Flag to denote if the job being run is a preprocessor
-s Any settings you want to pass along
-d Date
-o Output directory
EOF
}

JOBNAME=
ISPREPROCESSOR=
DATE=
SETTINGS=
OUTPUTDIR=
while getopts “hj:ps:d:n:o:” OPTION
do
case $OPTION in
h)
usage()
exit 1
;;
j)
JOBNAME=$OPTARG
;;
p)
ISPREPROCESSOR=1
;;
s)
SETTINGS=$OPTARG
;;
d)
DATE=$OPTARG
;;
o)
OUTPUTDIR=$OPTARG
;;
esac
done
echo "Job: $JOBNAME"
echo "Is Pre: $ISPREPROCESSOR"
echo "Settings: $SETTINGS"
echo "Output: $OUTPUTDIR"
echo "Date: $DATE"

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

if [ "$ISPREPROCESSOR" == "1" ]; then
$DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.sources.$JOBNAME $SETTINGS --date $DATE
if [ "$?" != 0 ]; then
echo "Job $JOBNAME failed; exiting"
exit 1
fi
else
$DIR/scald.rb --hdfs com.twitter.zipkin.hadoop.$JOBNAME $SETTINGS --date $DATE --output $OUTPUTDIR
if [ "$?" == "0" ]; then
echo "Job $JOBNAME succesfully completed"
else
echo "Job $JOBNAME failed; exiting"
exit 1
fi
fi

0 comments on commit cfba24b

Please sign in to comment.