Skip to content
This repository has been archived by the owner on Jan 15, 2022. It is now read-only.

Commit

Permalink
Issue #87 Updating jobFileProcessor.sh with latest arguments and addi…
Browse files Browse the repository at this point in the history
…ng a sample cost file
  • Loading branch information
Vrushali Channapattan committed May 16, 2014
1 parent 22e2c25 commit d1b12f8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
10 changes: 6 additions & 4 deletions bin/etl/jobFileProcessor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
# Run on the daemon node per specific cluster
# This script runs on the HBase cluster
# Usage ./jobFileProcessor.sh [hadoopconfdir]
# [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize]
# [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize] [machinetype] [costfile]
# a sample cost file can be found in the conf dir as sampleCostDetails.properties

if [ $# -ne 6 ]
if [ $# -ne 8 ]
then
echo "Usage: `basename $0` [hbaseconfdir] [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize]"
echo "Usage: `basename $0` [hbaseconfdir] [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize] [machinetype] [costfile]"
exit 1
fi

Expand All @@ -43,4 +44,5 @@ fi
create_pidfile $HRAVEN_PID_DIR
trap 'cleanup_pidfile_and_exit $HRAVEN_PID_DIR' INT TERM EXIT

hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.JobFileProcessor -libjars=$LIBJARS -Dmapred.fairscheduler.pool=$2 -d -p $3 -c $4 -t $5 -b $6
hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.JobFileProcessor -libjars=$LIBJARS -Dmapred.fairscheduler.pool=$2 -d -p $3 -c $4 -t $5 -b $6 -m $7 -z $8

14 changes: 14 additions & 0 deletions conf/sampleCostDetails.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# properties file to store cost data
# used in calculating cost of a job in the processing step
#
# machine type is the hardware name of node that the job runs on
#
# compute cost is the part of dollar amount of total cost of operating a machine
# allocated to compute
#
# machinememory is the max amount of memory at run time in
# megabytes available to a hadoop job
#
default.computecost=10
default.machinememory=24576
#

0 comments on commit d1b12f8

Please sign in to comment.