-
Notifications
You must be signed in to change notification settings - Fork 89
/
repro.sh
70 lines (54 loc) · 1.98 KB
/
repro.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# full repro on Ubuntu 16.04
sudo apt-get -qq update
sudo apt-get -qq install -y lsb-release software-properties-common wget curl vim htop git byobu libcurl4-openssl-dev libssl-dev
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
sudo add-apt-repository "deb [arch=amd64,i386] https://cloud.r-project.org/bin/linux/ubuntu `lsb_release -sc`-cran35/"
sudo apt-get -qq update
sudo apt-get -qq install -y r-base-dev virtualenv
cd /usr/local/lib/R
sudo chmod o+w site-library
cd ~
mkdir -p .R
echo 'CFLAGS=-O3 -mtune=native' >> ~/.R/Makevars
echo 'CXXFLAGS=-O3 -mtune=native' >> ~/.R/Makevars
mkdir -p git
cd git
git clone http://github.com/h2oai/datatable
git clone http://github.com/h2oai/db-benchmark
cd db-benchmark
cd pydatatable
virtualenv py-pydatatable --python=/usr/bin/python3.6
cd ../pandas
virtualenv py-pandas --python=/usr/bin/python3.6
cd ../modin
virtualenv py-modin --python=/usr/bin/python3.6
cd ..
Rscript -e 'install.packages(c("jsonlite","bit64","devtools","rmarkdown"), repos="https://cloud.r-project.org")'
byobu
source ./pandas/py-pandas/bin/activate
python -m pip install --upgrade psutil
python -m pip install --upgrade pandas
deactivate
source ./modin/py-modin/bin/activate
python -m pip install --upgrade modin
deactivate
source ./pydatatable/py-pydatatable/bin/activate
pip install --upgrade git+https://github.com/h2oai/datatable
deactivate
# install dplyr
Rscript -e 'devtools::install_github(c("tidyverse/readr","tidyverse/dplyr"))'
# install data.table
Rscript -e 'install.packages("data.table", repos="https://Rdatatable.github.io/data.table")'
# benchmark
cd db-benchmark
# generate data for groupby
Rscript _data/groupby-datagen.R 1e7 1e2 0 0
Rscript _data/groupby-datagen.R 1e8 1e2 0 0
Rscript _data/groupby-datagen.R 1e9 1e2 0 0
# set only groupby task
vim run.conf
# set data sizes
[ ! -f ./_control/orgdata.csv ] && cp ./_control/data.csv ./_control/orgdata.csv
vim ./_control/data.csv
# running db-benchmark
./run.sh