forked from pablo14/data-science-live-book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
book.bib
executable file
·219 lines (188 loc) · 8.55 KB
/
book.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
@article{ReshefEtAl2011,
author = {Reshef, David N. and
Reshef, Yakir A. and
Finucane, Hilary K. and
Grossman, Sharon R. and
McVean, Gilean and
Turnbaugh, Peter J. and
Lander, Eric S. and
Mitzenmacher, Michael and
Sabeti, Pardis C.},
title = {Detecting Novel Associations in Large Data Sets},
volume = {334},
number = {6062},
pages = {1518-1524},
year = {2011},
doi = {10.1126/science.1205438},
abstract ={Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.},
URL = {http://www.sciencemag.org/content/334/6062/1518.abstract},
eprint = {http://www.sciencemag.org/content/334/6062/1518.full.pdf},
journal = {Science}
}
@misc{ wiki:occam_razor,
author = "Wikipedia",
title = "Occam's razor",
year = "2017",
url = "https://en.wikipedia.org/wiki/Occam's_razor#Probability_theory_and_statistics",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{ caret_feat_elimination,
author = "Max Kuhn",
title = "Recursive Feature Elimination in R package caret",
year = "2017",
url = "https://topepo.github.io/caret/recursive-feature-elimination.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{ stackoverflow_entropy,
author = "stackoverflow.com",
title = "What is entropy and information gain?",
year = "2017",
url = "http://stackoverflow.com/questions/1859554/what-is-entropy-and-information-gain",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{ stats.stackexchange_gini,
author = "stats.stackexchange.com",
title = "How to interpret Mean Decrease in Accuracy and Mean Decrease GINI in Random Forest models",
year = "2017",
url = "http://stats.stackexchange.com/questions/197827/how-to-interpret-mean-decrease-in-accuracy-and-mean-decrease-gini-in-random-fore",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{tukey_outliers,
author = "DataPig Technologies",
title = "Highlighting Outliers in your Data with the Tukey Method",
year = "2014",
url = "http://datapigtechnologies.com/blog/index.php/highlighting-outliers-in-your-data-with-the-tukey-method/",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{finding_outliers,
author = "ExploringDataBlog",
title = "Finding outliers in numerical data",
year = "2013",
url = "http://exploringdatablog.blogspot.com.ar/2013/02/finding-outliers-in-numerical-data.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{evaluate_model,
author = "Sebastian Raschka",
title = "Machine Learning FAQ",
year = "2017",
url = "http://sebastianraschka.com/faq/docs/evaluate-a-model.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{do_we_need_hundred_models,
author = "Manuel Fernandez-Delgado",
title = "Do we Need Hundreds of Classifiers to Solve Real World Classification Problems?",
year = "2014",
url = "http://jmlr.csail.mit.edu/papers/volume15/delgado14a/delgado14a.pdf",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{why_cross_validation,
author = "Rob J. Hyndman",
title = "Why every statistician should know about cross-validation?",
year = "2010",
url = "https://robjhyndman.com/hyndsight/crossvalidation/",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{bias_variance_tradeoff,
author = "Scott Fortmann",
title = "Understanding the Bias-Variance Tradeoff",
year = "2012",
url = "http://scott.fortmann-roe.com/docs/BiasVariance.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{more_data_or_better_algorithms,
author = "Xavier Amatriain",
title = "In Machine Learning, What is Better: More Data or better Algorithms",
year = "2015",
url = "http://www.kdnuggets.com/2015/06/machine-learning-more-data-better-algorithms.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{gbm_vs_random_forest,
author = "stats.stackexchange.com",
title = "Gradient boosting machine vs random forest",
year = "2015",
url = "https://stats.stackexchange.com/questions/173390/gradient-boosting-tree-vs-random-forest",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{arima_modeling_in_r,
author = "Rob J. Hyndman",
title = "ARIMA modelling in R",
year = "2017",
url = "https://www.otexts.org/fpp/8/",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{white_noise,
author = "Wikipedia",
title = "White noise - Time series analysis and regression",
year = "2017",
url = "https://en.wikipedia.org/wiki/White_noise",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{skew_kurt_1,
author = "Bill McNeese",
title = "Are the Skewness and Kurtosis Useful Statistics?",
year = "2016",
url = "https://www.spcforexcel.com/knowledge/basic-statistics/are-skewness-and-kurtosis-useful-statistics",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{skew_kurt_2,
author = "Engineering Statistics Handbook",
title = "Measures of Skewness and Kurtosis",
year = "2013",
url = "http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm",
note = "[Online; accessed 01-Ago-2017]"
}
@Article{Caban2012,
author={Caban, Jesus J.
and Bagci, Ulas
and Mehari, Alem
and Alam, Shoaib
and Fontana, Joseph R.
and Kato, Gregory J.
and Mollura, Daniel J.},
title={Characterizing Non-Linear Dependencies Among Pairs of Clinical Variables and Imaging Data},
journal={Conf Proc IEEE Eng Med Biol Soc},
year={2012},
month={Aug},
volume={2012},
pages={2700-2703},
abstract={Advances in computer-aided diagnosis (CAD) systems have shown the benefits of using computer-based techniques to obtain quantitative image measurements of the extent of a particular disease. Such measurements provide more accurate information that can be used to better study the associations between anatomical changes and clinical findings. Unfortunately, even with the use of quantitative image features, the correlations between anatomical changes and clinical findings are often not apparent and definite conclusions are difficult to reach. This paper uses nonparametric exploration techniques to demonstrate that even when the associations between two-variables seems weak, advanced properties of the associations can be studied and used to better understand the relationships between individual measurements. This paper uses quantitative imaging findings and clinical measurements of 85 patients with pulmonary fibrosis to demonstrate the advantages of non-linear dependency analysis. Results show that even when the correlation coefficients between imaging and clinical findings seem small, statistical measurements such as the maximum asymmetry score (MAS) and maximum edge value (MEV) can be used to better understand the hidden associations between the variables.},
note={23366482[pmid]},
issn={1557-170X},
doi={10.1109/EMBC.2012.6346521},
url={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3561932/}
}
@misc{monotonic_function,
author = "Wikipedia",
title = "Monotonic function",
year = "2017",
url = "https://en.wikipedia.org/wiki/Monotonic_function",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{img_time_series,
author = "Mike Izbicki",
title = "Converting images into time series for data mining",
year = "2011",
url = "https://izbicki.me/blog/converting-images-into-time-series-for-data-mining.html",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{perc_quan_quar,
author = "stats.stackexchange.com",
title = "Percentile vs quantile vs quartile",
year = "2017",
url = "https://stats.stackexchange.com/questions/156778/percentile-vs-quantile-vs-quartile",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{distr_wealth,
author = "Wikipedia",
title = "Distribution of wealth",
year = "2017",
url = "https://en.wikipedia.org/wiki/Distribution_of_wealth",
note = "[Online; accessed 01-Ago-2017]"
}
@misc{global_wealth,
author = "Credit Suisse",
title = "Global Wealth Report 2013",
year = "2013",
url = "https://publications.credit-suisse.com/tasks/render/file/?fileID=BCDB1364-A105-0560-1332EC9100FF5C83",
note = "[Online; accessed 01-Ago-2017]"
}