-
Notifications
You must be signed in to change notification settings - Fork 28.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-16107] [R] group glm methods in documentation #13820
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj")) | |
#' @note KMeansModel since 2.0.0 | ||
setClass("KMeansModel", representation(jobj = "jobj")) | ||
|
||
#' Fits a generalized linear model | ||
#' Generalized Linear Models | ||
#' | ||
#' Fits a generalized linear model against a Spark DataFrame. | ||
#' Fit generalized linear model against a Spark DataFrame. Can print, make predictions on the | ||
#' produced model and save the model to the input path. | ||
#' | ||
#' @param data SparkDataFrame for training. | ||
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
|
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
#' @param tol Positive convergence tolerance of iterations. | ||
#' @param maxIter Integer giving the maximal number of IRLS iterations. | ||
#' @return a fitted generalized linear model | ||
#' @return \code{spark.glm} returns a fitted generalized linear model | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since this is the page for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd view |
||
#' @rdname spark.glm | ||
#' @name spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
|
@@ -76,7 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
#' df <- createDataFrame(iris) | ||
#' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian") | ||
#' summary(model) | ||
#' | ||
#' # fitted values on training data | ||
#' fitted <- predict(model, df) | ||
#' head(select(fitted, "Sepal_Length", "prediction")) | ||
#' | ||
#' # save fitted model to input path | ||
#' path <- "path/to/model" | ||
#' write.ml(model, path) | ||
#' | ||
#' # can also read back the saved model and print | ||
#' savedModel <- read.ml(path) | ||
#' summary(savedModel) | ||
#' } | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
#' @note spark.glm since 2.0.0 | ||
setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | ||
function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) { | ||
|
@@ -99,10 +115,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
return(new("GeneralizedLinearRegressionModel", jobj = jobj)) | ||
}) | ||
|
||
#' Fits a generalized linear model (R-compliant). | ||
#' | ||
#' Fits a generalized linear model, similarly to R's glm(). | ||
#' | ||
#' @title Fit a generalized linear model | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original title is okay. Shall we change this to |
||
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
#' operators are supported, including '~', '.', ':', '+', and '-'. | ||
#' @param data SparkDataFrame for training. | ||
|
@@ -112,36 +125,23 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please put a see also and link it to |
||
#' @param epsilon Positive convergence tolerance of iterations. | ||
#' @param maxit Integer giving the maximal number of IRLS iterations. | ||
#' @return a fitted generalized linear model | ||
#' @return \code{spark.glm} returns a fitted generalized linear model. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
#' @rdname glm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is fine to put |
||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' sparkR.session() | ||
#' data(iris) | ||
#' df <- createDataFrame(iris) | ||
#' model <- glm(Sepal_Length ~ Sepal_Width, df, family = "gaussian") | ||
#' summary(model) | ||
#' } | ||
#' @note glm since 1.5.0 | ||
setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), | ||
function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) { | ||
spark.glm(data, formula, family, tol = epsilon, maxIter = maxit) | ||
}) | ||
|
||
#' Get the summary of a generalized linear model | ||
#' | ||
#' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary(). | ||
#' | ||
#' @title Return a summary of the produced generalized linear model | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't show up anywhere, does it? I think it is okay to use this trick to document methods inline if we don't have other ways to prevent it showing up in the description section. cc: @shivaram @felixcheung |
||
#' @param object A fitted generalized linear model | ||
#' @return coefficients the model's coefficients, intercept | ||
#' @rdname summary | ||
#' @return \code{summary} returns a summary object of the fitted model, a list of components | ||
#' including at least the coefficients, null/residual deviance, null/residual degrees | ||
#' of freedom, AIC and number of iterations IRLS takes. | ||
#' | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' summary(model) | ||
#' } | ||
#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 | ||
setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | ||
function(object, ...) { | ||
|
@@ -173,10 +173,9 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | |
return(ans) | ||
}) | ||
|
||
#' Print the summary of GeneralizedLinearRegressionModel | ||
#' | ||
#' @rdname print | ||
#' @name print.summary.GeneralizedLinearRegressionModel | ||
#' @title Print the summary of the produced generalized linear model | ||
#' @rdname spark.glm | ||
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function | ||
#' @export | ||
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 | ||
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | ||
|
@@ -205,22 +204,11 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | |
invisible(x) | ||
} | ||
|
||
#' Predicted values based on model | ||
#' | ||
#' Makes predictions from a generalized linear model produced by glm() or spark.glm(), | ||
#' similarly to R's predict(). | ||
#' | ||
#' @param object A fitted generalized linear model | ||
#' @title Make predictions using the produced generalized linear model | ||
#' @param newData SparkDataFrame for testing | ||
#' @return SparkDataFrame containing predicted labels in a column named "prediction" | ||
#' @rdname predict | ||
#' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction" | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' predicted <- predict(model, testData) | ||
#' showDF(predicted) | ||
#' } | ||
#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 | ||
setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), | ||
function(object, newData) { | ||
|
@@ -471,25 +459,16 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c | |
invisible(callJMethod(writer, "save", path)) | ||
}) | ||
|
||
#' Save fitted MLlib model to the input path | ||
#' | ||
#' Save the generalized linear model to the input path. | ||
#' @title Save fitted generalized linear model to the input path | ||
#' | ||
#' @param object A fitted generalized linear model | ||
#' @param path The directory where the model is saved | ||
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE | ||
#' which means throw exception if the output path exists. | ||
#' | ||
#' @rdname write.ml | ||
#' @name write.ml | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' path <- "path/to/model" | ||
#' write.ml(model, path) | ||
#' } | ||
#' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 | ||
#' @seealso \link{read.ml} | ||
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), | ||
function(object, path, overwrite = FALSE) { | ||
writer <- callJMethod(object@jobj, "write") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fit
->Fits
Can print
->Users can print