diff --git a/NEWS.md b/NEWS.md index 2fe127e5d..21a5b5027 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +##### Version 0.4.2 +* Add Windows multithreading support for new toolchain + ##### Version 0.4.1 * Runtime improvement for regression forests on classification data diff --git a/README.md b/README.md index d252a22c3..1a521a5e1 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ To install the Ranger R package from CRAN, just run install.packages("ranger”) ``` -R version >= 3.1 is required. Note that, for now, no multithreading is supported in the R version on Windows platforms (the compiler in RTools is too old). +R version >= 3.1 is required. Note that, for now, R-devel and the new RTools toolchain is required for multithreading on Windows platforms (or install a binary version). To install the C++ version of Ranger in Linux or Mac OS X you will need a compiler supporting C++11 (i.e. gcc >= 4.7 or Clang >= 3.0) and Cmake. To build start a terminal from the Ranger main directory and run the following commands @@ -55,6 +55,7 @@ ranger --verbose --file data.dat --depvarname Species --treetype 1 --ntree 1000 If you find any bugs, or if you experience any crashes, please report to us. If you have any questions just ask, we won't bite. ### References +* Wright, M. N. & Ziegler, A. (2016). ranger: A Fast Implementation of Random Forests for High Dimensional Data in C++ and R. Journal of Statistical Software, in press. http://arxiv.org/abs/1508.04409. * Breiman, L. (2001). Random forests. Machine learning, 45(1), 5-32. * Ishwaran, H., Kogalur, U. B., Blackstone, E. H., & Lauer, M. S. (2008). Random survival forests. The Annals of Applied Statistics, 841-860. * Malley, J. D., Kruppa, J., Dasgupta, A., Malley, K. G., & Ziegler, A. (2012). Probability machines: consistent probability estimation using nonparametric learning machines. Methods Inf Med, 51(1), 74. diff --git a/ranger-r-package/ranger/DESCRIPTION b/ranger-r-package/ranger/DESCRIPTION index 5d0cdb403..082997de6 100644 --- a/ranger-r-package/ranger/DESCRIPTION +++ b/ranger-r-package/ranger/DESCRIPTION @@ -1,8 +1,8 @@ Package: ranger Type: Package Title: A Fast Implementation of Random Forests -Version: 0.4.1 -Date: 2016-04-28 +Version: 0.4.2 +Date: 2016-05-02 Author: Marvin N. Wright Maintainer: Marvin N. Wright Description: A fast implementation of Random Forests, particularly suited for high dimensional data. Ensembles diff --git a/ranger-r-package/ranger/NEWS b/ranger-r-package/ranger/NEWS index 44a2e1a89..9abe759c7 100644 --- a/ranger-r-package/ranger/NEWS +++ b/ranger-r-package/ranger/NEWS @@ -1,3 +1,6 @@ +##### Version 0.4.2 +* Add Windows multithreading support for new toolchain + ##### Version 0.4.1 * Runtime improvement for regression forests on classification data diff --git a/ranger-r-package/ranger/R/ranger.R b/ranger-r-package/ranger/R/ranger.R index 4acf152d2..f452bc212 100644 --- a/ranger-r-package/ranger/R/ranger.R +++ b/ranger-r-package/ranger/R/ranger.R @@ -58,11 +58,12 @@ ##' To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. ##' Note that missing values are treated as an extra category while splitting. ##' -##' See \url{https://github.com/mnwright/ranger} for the development version. +##' See \url{https://github.com/imbs-hl/ranger} for the development version. ##' -##' Notes: +##' To use multithreading on Microsoft Windows platforms, there are currently two options: ##' \itemize{ -##' \item Multithreading is currently not supported for Microsoft Windows platforms. +##' \item Use R-devel with the new toolchain +##' \item Install a binary version of ranger, download: \url{https://github.com/imbs-hl/ranger/releases} ##' } ##' ##' @title Ranger diff --git a/ranger-r-package/ranger/man/ranger.Rd b/ranger-r-package/ranger/man/ranger.Rd index 185299fc2..d755862b4 100644 --- a/ranger-r-package/ranger/man/ranger.Rd +++ b/ranger-r-package/ranger/man/ranger.Rd @@ -119,11 +119,12 @@ All SNPs in the \code{GenABEL} object will be used for splitting. To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. Note that missing values are treated as an extra category while splitting. -See \url{https://github.com/mnwright/ranger} for the development version. +See \url{https://github.com/imbs-hl/ranger} for the development version. -Notes: +To use multithreading on Microsoft Windows platforms, there are currently two options: \itemize{ - \item Multithreading is currently not supported for Microsoft Windows platforms. + \item Use R-devel with the new toolchain + \item Install a binary version of ranger, download: \url{https://github.com/imbs-hl/ranger/releases} } } \examples{ diff --git a/ranger-r-package/ranger/src/AAA_check_cpp11.cpp b/ranger-r-package/ranger/src/AAA_check_cpp11.cpp index 51a57ee2d..7b0edb498 100644 --- a/ranger-r-package/ranger/src/AAA_check_cpp11.cpp +++ b/ranger-r-package/ranger/src/AAA_check_cpp11.cpp @@ -1,7 +1,6 @@ - #ifndef WIN_R_BUILD #if __cplusplus < 201103L -#error Error: ranger requires a real C++11 compiler. You probably have to update gcc. +#error Error: ranger requires a real C++11 compiler. You probably have to update gcc. #endif #endif diff --git a/source/src/Forest/Forest.cpp b/source/src/Forest/Forest.cpp index 50802cbdb..f30e6e633 100644 --- a/source/src/Forest/Forest.cpp +++ b/source/src/Forest/Forest.cpp @@ -32,7 +32,7 @@ #include #include #include -#ifndef WIN_R_BUILD +#ifndef OLD_WIN_R_BUILD #include #include #endif @@ -191,7 +191,7 @@ void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, D // Set number of threads if (num_threads == DEFAULT_NUM_THREADS) { -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD this->num_threads = 1; #else this->num_threads = std::thread::hardware_concurrency(); @@ -413,7 +413,7 @@ void Forest::grow() { variable_importance.resize(num_independent_variables, 0); // Grow trees in multiple threads -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD progress = 0; clock_t start_time = clock(); clock_t lap_time = clock(); @@ -476,7 +476,7 @@ void Forest::grow() { void Forest::predict() { // Predict trees in multiple threads and join the threads with the main thread -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD progress = 0; clock_t start_time = clock(); clock_t lap_time = clock(); @@ -516,7 +516,7 @@ void Forest::predict() { void Forest::computePredictionError() { // Predict trees in multiple threads -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD progress = 0; clock_t start_time = clock(); clock_t lap_time = clock(); @@ -550,7 +550,7 @@ void Forest::computePredictionError() { void Forest::computePermutationImportance() { // Compute tree permutation importance in multiple threads -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD progress = 0; clock_t start_time = clock(); clock_t lap_time = clock(); @@ -637,7 +637,7 @@ void Forest::computePermutationImportance() { } } -#ifndef WIN_R_BUILD +#ifndef OLD_WIN_R_BUILD void Forest::growTreesInThread(uint thread_idx, std::vector* variable_importance) { if (thread_ranges.size() > thread_idx + 1) { for (size_t i = thread_ranges[thread_idx]; i < thread_ranges[thread_idx + 1]; ++i) { @@ -813,7 +813,7 @@ void Forest::setAlwaysSplitVariables(std::vector& always_split_vari } } -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD void Forest::showProgress(std::string operation, clock_t start_time, clock_t& lap_time) { // Check for user interrupt diff --git a/source/src/Forest/Forest.h b/source/src/Forest/Forest.h index 63d88d05e..c4120a723 100644 --- a/source/src/Forest/Forest.h +++ b/source/src/Forest/Forest.h @@ -33,7 +33,7 @@ #include #include #include -#ifndef WIN_R_BUILD +#ifndef OLD_WIN_R_BUILD #include #include #include @@ -172,7 +172,7 @@ class Forest { void setAlwaysSplitVariables(std::vector& always_split_variable_names); // Show progress every few seconds -#ifdef WIN_R_BUILD +#ifdef OLD_WIN_R_BUILD void showProgress(std::string operation, clock_t start_time, clock_t& lap_time); #else void showProgress(std::string operation); @@ -207,7 +207,7 @@ class Forest { // Multithreading uint num_threads; std::vector thread_ranges; -#ifndef WIN_R_BUILD +#ifndef OLD_WIN_R_BUILD std::mutex mutex; std::condition_variable condition_variable; #endif diff --git a/source/src/globals.h b/source/src/globals.h index 2f06674b2..c078e9949 100644 --- a/source/src/globals.h +++ b/source/src/globals.h @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------------- This file is part of Ranger. - + Ranger is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -14,13 +14,13 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Ranger. If not, see . -Written by: +Written by: Marvin N. Wright Institut für Medizinische Biometrie und Statistik Universität zu Lübeck Ratzeburger Allee 160 -23562 Lübeck +23562 Lübeck http://www.imbs-luebeck.de wright@imbs.uni-luebeck.de @@ -33,6 +33,15 @@ wright@imbs.uni-luebeck.de TypeName(const TypeName&); \ void operator=(const TypeName&) +// Old/new Win build +#ifdef WIN_R_BUILD + #if __cplusplus < 201103L + #define OLD_WIN_R_BUILD + #else + #define NEW_WIN_R_BUILD + #endif +#endif + typedef unsigned int uint; // Tree types, probability is not selected by ID diff --git a/source/src/version.h b/source/src/version.h index e84ef4e09..f0a43b76d 100644 --- a/source/src/version.h +++ b/source/src/version.h @@ -1,3 +1,3 @@ #ifndef RANGER_VERSION -#define RANGER_VERSION "0.4.1" +#define RANGER_VERSION "0.4.2" #endif