forked from jeroenjanssens/data-science-at-the-command-line
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rio
executable file
·92 lines (85 loc) · 1.93 KB
/
Rio
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
# Rio: Load CSV from stdin into R as a data frame, execute given commands, and get the output as CSV or PNG on stdout
#
# Example usage:
# curl -s 'https://raw.github.com/pydata/pandas/master/pandas/tests/data/iris.csv' > iris.csv
# < iris.csv Rio -e 'summary(df)'
# < iris.csv Rio -se 'sqldf("select Name from df where df.SepalLength > 7")'
# < iris.csv Rio -ge 'g+geom_point(aes(x=SepalLength,y=SepalWidth,colour=Name))' > iris.png
#
# Dependency: R (with optionally the R packages ggplot2, plyr, and sqldf)
#
# Author: http://jeroenjanssens.com
usage() {
cat << EOF
Rio: Load CSV from stdin into R as a data frame, execute given commands, and get the output as CSV on stdout
usage: Rio OPTIONS
OPTIONS:
-d Delimiter
-e Commands to execute
-h Show this message
-g Import ggplot2
-n CSV has no header
-p Import plyr
-s Import sqldf
-v Verbose
EOF
}
callR() {
Rscript --vanilla -e "df<-read.csv('${IN}',header=${HEADER},sep='${DELIMITER}',stringsAsFactors=F);${REQUIRES}${SCRIPT}last<-.Last.value;if(is.data.frame(last)){write.table(last,'${OUT}',sep=',',quote=F,row.names=F,col.names=${HEADER});}else if(exists('is.ggplot')&&is.ggplot(last)){ggsave('${OUT}',last,dpi=72);}else{sink('${OUT}');print(last);}"
}
SCRIPT=
REQUIRES=
DELIMITER=","
HEADER="T"
VERBOSE=false
IN=$(mktemp)
OUT=$(mktemp).png
while getopts "d:hgnpsve:" OPTION
do
case $OPTION in
d)
DELIMITER=$OPTARG
;;
e)
SCRIPT=$OPTARG
if ! echo $SCRIPT | grep -qe "; *$"
then
SCRIPT="${SCRIPT};"
fi
;;
h)
usage
exit 1
;;
g)
REQUIRES="${REQUIRES}require(ggplot2);g<-ggplot(df);"
;;
n)
HEADER="F"
;;
p)
REQUIRES="${REQUIRES}require(plyr);"
;;
s)
REQUIRES="${REQUIRES}require(sqldf);"
;;
v)
VERBOSE=true
;;
?)
usage
exit
;;
esac
done
cat /dev/stdin > $IN
if $VERBOSE
then
callR
else
callR >/dev/null 2>&1
fi
cat $OUT
rm $IN $OUT
rm -f Rplots.pdf