-
Notifications
You must be signed in to change notification settings - Fork 28.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-16107] [R] group glm methods in documentation #13820
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj")) | |
#' @note KMeansModel since 2.0.0 | ||
setClass("KMeansModel", representation(jobj = "jobj")) | ||
|
||
#' Fits a generalized linear model | ||
#' Generalized Linear Models | ||
#' | ||
#' Fits a generalized linear model against a Spark DataFrame. | ||
#' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the | ||
#' produced model and save the model to the input path. | ||
#' | ||
#' @param data SparkDataFrame for training. | ||
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
|
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
#' @param tol Positive convergence tolerance of iterations. | ||
#' @param maxIter Integer giving the maximal number of IRLS iterations. | ||
#' @return a fitted generalized linear model | ||
#' @return \code{spark.glm} returns a fitted generalized linear model | ||
#' @rdname spark.glm | ||
#' @name spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
|
@@ -76,8 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
#' df <- createDataFrame(iris) | ||
#' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian") | ||
#' summary(model) | ||
#' | ||
#' # fitted values on training data | ||
#' fitted <- predict(model, df) | ||
#' head(select(fitted, "Sepal_Length", "prediction")) | ||
#' | ||
#' # save fitted model to input path | ||
#' path <- "path/to/model" | ||
#' write.ml(model, path) | ||
#' | ||
#' # can also read back the saved model and print | ||
#' savedModel <- read.ml(path) | ||
#' summary(savedModel) | ||
#' } | ||
#' @note spark.glm since 2.0.0 | ||
#' @seealso \link{glm}, \link{read.ml} | ||
setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | ||
function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) { | ||
if (is.character(family)) { | ||
|
@@ -99,10 +114,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
return(new("GeneralizedLinearRegressionModel", jobj = jobj)) | ||
}) | ||
|
||
#' Fits a generalized linear model (R-compliant). | ||
#' Generalized Linear Models (R-compliant) | ||
#' | ||
#' Fits a generalized linear model, similarly to R's glm(). | ||
#' | ||
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
#' operators are supported, including '~', '.', ':', '+', and '-'. | ||
#' @param data SparkDataFrame for training. | ||
|
@@ -112,7 +126,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
#' @param epsilon Positive convergence tolerance of iterations. | ||
#' @param maxit Integer giving the maximal number of IRLS iterations. | ||
#' @return a fitted generalized linear model | ||
#' @return \code{glm} returns a fitted generalized linear model. | ||
#' @rdname glm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is fine to put |
||
#' @export | ||
#' @examples | ||
|
@@ -124,24 +138,21 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
#' summary(model) | ||
#' } | ||
#' @note glm since 1.5.0 | ||
#' @seealso \link{spark.glm} | ||
setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), | ||
function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) { | ||
spark.glm(data, formula, family, tol = epsilon, maxIter = maxit) | ||
}) | ||
|
||
#' Get the summary of a generalized linear model | ||
#' | ||
#' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary(). | ||
# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary(). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. insert an empty line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I get this is intentional, but I'd suggest adding extra empty newline between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
#' | ||
#' @param object A fitted generalized linear model | ||
#' @return coefficients the model's coefficients, intercept | ||
#' @rdname summary | ||
#' @return \code{summary} returns a summary object of the fitted model, a list of components | ||
#' including at least the coefficients, null/residual deviance, null/residual degrees | ||
#' of freedom, AIC and number of iterations IRLS takes. | ||
#' | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' summary(model) | ||
#' } | ||
#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 | ||
setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | ||
function(object, ...) { | ||
|
@@ -173,10 +184,10 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | |
return(ans) | ||
}) | ||
|
||
#' Print the summary of GeneralizedLinearRegressionModel | ||
# Prints the summary of GeneralizedLinearRegressionModel | ||
#' | ||
#' @rdname print | ||
#' @name print.summary.GeneralizedLinearRegressionModel | ||
#' @rdname spark.glm | ||
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function | ||
#' @export | ||
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 | ||
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | ||
|
@@ -205,22 +216,13 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | |
invisible(x) | ||
} | ||
|
||
#' Predicted values based on model | ||
# Makes predictions from a generalized linear model produced by glm() or spark.glm(), | ||
# similarly to R's predict(). | ||
#' | ||
#' Makes predictions from a generalized linear model produced by glm() or spark.glm(), | ||
#' similarly to R's predict(). | ||
#' | ||
#' @param object A fitted generalized linear model | ||
#' @param newData SparkDataFrame for testing | ||
#' @return SparkDataFrame containing predicted labels in a column named "prediction" | ||
#' @rdname predict | ||
#' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction" | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' predicted <- predict(model, testData) | ||
#' showDF(predicted) | ||
#' } | ||
#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 | ||
setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), | ||
function(object, newData) { | ||
|
@@ -471,24 +473,14 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c | |
invisible(callJMethod(writer, "save", path)) | ||
}) | ||
|
||
#' Save fitted MLlib model to the input path | ||
#' | ||
#' Save the generalized linear model to the input path. | ||
# Saves the generalized linear model to the input path. | ||
#' | ||
#' @param object A fitted generalized linear model | ||
#' @param path The directory where the model is saved | ||
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE | ||
#' which means throw exception if the output path exists. | ||
#' | ||
#' @rdname write.ml | ||
#' @name write.ml | ||
#' @rdname spark.glm | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' model <- glm(y ~ x, trainingData) | ||
#' path <- "path/to/model" | ||
#' write.ml(model, path) | ||
#' } | ||
#' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 | ||
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), | ||
function(object, path, overwrite = FALSE) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since this is the page for
spark.glm
I think it's ok if it simply saysreturns a fitted...
it seems a bit odd to reference
spark.glm
again.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd view
spark.glm
as a method under this group. It might be clearer to have the method name in theValue
section. Also, I'm not sure about how roxygen2 defines the ordering. Having the names would help.