diff --git a/R/feglm.R b/R/feglm.R index cf28304..f84c785 100644 --- a/R/feglm.R +++ b/R/feglm.R @@ -95,10 +95,10 @@ feglm <- function( tmp.var <- temp_var_(data) # Drop observations that do not contribute to the log likelihood ---- - data <- drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control) + drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control) # Transform fixed effects and clusters to factors ---- - data <- transform_fe_(data, formula, k.vars) + transform_fe_(data, formula, k.vars) # Determine the number of dropped observations ---- nt <- nrow(data) @@ -162,6 +162,5 @@ feglm <- function( ) # Return result list ---- - reslist <- structure(reslist, class = "feglm") - return(reslist) + structure(reslist, class = "feglm") } diff --git a/R/fepoisson.R b/R/fepoisson.R index b8942c0..36b6fad 100644 --- a/R/fepoisson.R +++ b/R/fepoisson.R @@ -18,10 +18,8 @@ fepoisson <- function( beta.start = NULL, eta.start = NULL, control = NULL) { - return( - feglm( - formula = formula, data = data, weights = weights, family = poisson(), - beta.start = beta.start, eta.start = eta.start, control = control - ) + feglm( + formula = formula, data = data, weights = weights, family = poisson(), + beta.start = beta.start, eta.start = eta.start, control = control ) } diff --git a/R/helpers.R b/R/helpers.R index ae08d0a..1e40885 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -11,22 +11,34 @@ check_factor_ <- function(x) { # Higher-order partial derivatives ---- partial_mu_eta_ <- function(eta, family, order) { + # Safeguard eta if necessary + if (family[["link"]] != "logit") { + eta <- family[["linkfun"]](family[["linkinv"]](eta)) + } + f <- family[["mu.eta"]](eta) + if (order == 2L) { + # Second-order derivative if (family[["link"]] == "logit") { f * (1.0 - 2.0 * family[["linkinv"]](eta)) } else if (family[["link"]] == "probit") { -eta * f - } else { + } else if (family[["link"]] == "cloglog") { f * (1.0 - exp(eta)) + } else { + -2.0 * eta / (1.0 + eta^2) * f } } else { + # Third-order derivative if (family[["link"]] == "logit") { f * ((1.0 - 2.0 * family[["linkinv"]](eta))^2 - 2.0 * f) } else if (family[["link"]] == "probit") { (eta^2 - 1.0) * f - } else { + } else if (family[["link"]] == "cloglog") { f * (1.0 - exp(eta)) * (2.0 - exp(eta)) - f + } else { + (6.0 * eta^2 - 2.0) / (1.0 + eta^2)^2 * f } } } @@ -51,8 +63,6 @@ check_formula_ <- function(formula) { } else if (!inherits(formula, "formula")) { stop("'formula' has to be of class 'formula'.", call. = FALSE) } - - return(TRUE) } check_data_ <- function(data) { @@ -70,9 +80,7 @@ check_control_ <- function(control) { stop("'control' has to be a list.", call. = FALSE) } - control <- do.call(feglm_control, control) - - return(control) + do.call(feglm_control, control) } check_family_ <- function(family) { @@ -84,8 +92,6 @@ check_family_ <- function(family) { } else if (startsWith(family[["family"]], "Negative Binomial")) { stop("Please use 'fenegbin' instead.", call. = FALSE) } - - return(TRUE) } update_formula_ <- function(formula) { @@ -98,7 +104,7 @@ update_formula_ <- function(formula) { ), call. = FALSE) } - return(formula) + formula } model_frame_ <- function(data, formula, weights) { @@ -118,8 +124,6 @@ model_frame_ <- function(data, formula, weights) { assign("lhs", lhs, envir = parent.frame()) assign("nobs.na", nobs.na, envir = parent.frame()) assign("nobs.full", nobs.full, envir = parent.frame()) - - return(TRUE) } check_response_ <- function(data, lhs, family) { @@ -149,14 +153,12 @@ check_response_ <- function(data, lhs, family) { if (data[, any(get(lhs) <= 0.0)]) { stop("Model response has to be strictly positive.", call. = FALSE) } - } else { + } else if (family[["family"]] != "gaussian") { # Check if 'y' is positive if (data[, any(get(lhs) < 0.0)]) { stop("Model response has to be positive.", call. = FALSE) } } - - return(TRUE) } drop_by_link_type_ <- function(data, lhs, family, tmp.var, k.vars, control) { @@ -182,8 +184,6 @@ drop_by_link_type_ <- function(data, lhs, family, tmp.var, k.vars, control) { } } } - - return(data) } transform_fe_ <- function(data, formula, k.vars) { @@ -193,18 +193,14 @@ transform_fe_ <- function(data, formula, k.vars) { add.vars <- attr(terms(formula, rhs = 3L), "term.labels") data[, (add.vars) := lapply(.SD, check_factor_), .SDcols = add.vars] } - - return(data) } nobs_ <- function(nobs.full, nobs.na, nt) { - return( - c( - nobs.full = nobs.full, - nobs.na = nobs.na, - nobs.pc = nobs.full - nt, - nobs = nt - ) + c( + nobs.full = nobs.full, + nobs.na = nobs.na, + nobs.pc = nobs.full - nt, + nobs = nt ) } @@ -219,16 +215,12 @@ model_response_ <- function(data, formula) { assign("X", X, envir = parent.frame()) assign("nms.sp", nms.sp, envir = parent.frame()) assign("p", p, envir = parent.frame()) - - return(TRUE) } check_linear_dependence_ <- function(X, p) { if (qr_rank_(X) < p) { stop("Linear dependent terms detected.", call. = FALSE) } - - return(TRUE) } check_weights_ <- function(wt) { @@ -238,8 +230,6 @@ check_weights_ <- function(wt) { if (any(wt < 0.0)) { stop("negative weights are not allowed.", call. = FALSE) } - - return(TRUE) } init_theta_ <- function(init.theta, link) { @@ -255,7 +245,7 @@ init_theta_ <- function(init.theta, link) { family <- negative.binomial(init.theta, link) } - return(family) + family } start_guesses_ <- function( @@ -315,6 +305,4 @@ start_guesses_ <- function( assign("beta", beta, envir = parent.frame()) assign("eta", eta, envir = parent.frame()) - - return(TRUE) } diff --git a/R/internals.R b/R/internals.R index 8b3efa4..ed191ca 100644 --- a/R/internals.R +++ b/R/internals.R @@ -167,7 +167,7 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) { if (keep.mx) reslist[["MX"]] <- MX # Return result list - return(reslist) + reslist } # Efficient offset algorithm to update the linear predictor ---- @@ -314,42 +314,7 @@ getScoreMatrix <- function(object) { MX * (nu * w) } - -# Higher-order partial derivatives for 'binomial()' -partial_mu_eta_ <- function(eta, family, order) { - # Safeguard \eta if necessary - if (family[["link"]] != "logit") { - eta <- family[["linkfun"]](family[["linkinv"]](eta)) - } - - # Second- and third-order derivatives - f <- family[["mu.eta"]](eta) - if (order == 2L) { - # Second-order derivative - if (family[["link"]] == "logit") { - f * (1.0 - 2.0 * family[["linkinv"]](eta)) - } else if (family[["link"]] == "probit") { - -eta * f - } else if (family[["link"]] == "cloglog") { - f * (1.0 - exp(eta)) - } else { - -2.0 * eta / (1.0 + eta^2) * f - } - } else { - # Third-order derivative - if (family[["link"]] == "logit") { - f * ((1.0 - 2.0 * family[["linkinv"]](eta))^2 - 2.0 * f) - } else if (family[["link"]] == "probit") { - (eta^2 - 1.0) * f - } else if (family[["link"]] == "cloglog") { - f * (1.0 - exp(eta)) * (2.0 - exp(eta)) - f - } else { - (6.0 * eta^2 - 2.0) / (1.0 + eta^2)^2 * f - } - } -} - -# Returns suitable name for a temporary variable +# Returns suitable name for a tempordrop_by_link_type_ary variable temp_var_ <- function(data) { repeat { tmp.var <- paste0(sample(letters, 5L, replace = TRUE), collapse = "") diff --git a/dev/check_bottlenecks.R b/dev/check_bottlenecks.R index 80b7c59..0c9709b 100644 --- a/dev/check_bottlenecks.R +++ b/dev/check_bottlenecks.R @@ -26,7 +26,9 @@ unique(d$year) # Fit 'feglm()' load_all() -profvis::profvis(feglm(trade_100 ~ lang + clny + rta | year, d, family = binomial())) +# profvis::profvis(feglm(trade_100 ~ lang + clny + rta | year, d, family = binomial())) +mod = feglm(trade_100 ~ lang + clny + rta | year, d, family = binomial()) # Compute average partial effects # bench::mark(apes(mod)) +apes(mod) diff --git a/docs/404.html b/docs/404.html index 348e34b..3eb0fa7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@
diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 94ed46f..113fed8 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/index.html b/docs/articles/index.html index bbecbe7..28ddf05 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/intro.html b/docs/articles/intro.html index 45520f8..3148e22 100644 --- a/docs/articles/intro.html +++ b/docs/articles/intro.html @@ -40,7 +40,7 @@ diff --git a/docs/authors.html b/docs/authors.html index 9ef102c..330a85c 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ @@ -74,14 +74,14 @@Vargas Sepulveda M (2024). capybara: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional Fixed Effects. -R package version 0.3.5, https://github.com/pachadotdev/capybara, https://pacha.dev/capybara/. +R package version 0.4, https://github.com/pachadotdev/capybara, https://pacha.dev/capybara/.
@Manual{, title = {capybara: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional Fixed Effects}, author = {Mauricio {Vargas Sepulveda}}, year = {2024}, - note = {R package version 0.3.5, https://github.com/pachadotdev/capybara}, + note = {R package version 0.4, https://github.com/pachadotdev/capybara}, url = {https://pacha.dev/capybara/}, }diff --git a/docs/index.html b/docs/index.html index fa71a2e..d22ec93 100644 --- a/docs/index.html +++ b/docs/index.html @@ -46,7 +46,7 @@ diff --git a/docs/news/index.html b/docs/news/index.html index 0419980..ad13881 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ @@ -57,6 +57,9 @@
NEWS.md
+
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 17fa80f..2b54fe9 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -3,5 +3,5 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
intro: intro.html
-last_built: 2024-03-04T01:22Z
+last_built: 2024-03-04T05:22Z
diff --git a/docs/reference/apes.html b/docs/reference/apes.html
index 6f0e2f0..90061c4 100644
--- a/docs/reference/apes.html
+++ b/docs/reference/apes.html
@@ -1,14 +1,14 @@
R/apes.R
apes.Rd
apes
is a post-estimation routine that can be used
- to estimate average partial effects with respect to all covariates in the
- model and the corresponding covariance matrix. The estimation of the
- covariance is based on a linear approximation (delta method) plus an
- optional finite population correction. Note that the command automatically
- determines which of the regressors are binary or non-binary.
Remark: The routine currently does not allow to compute average - partial effects based on functional forms like interactions and polynomials.
+partial effects based on functional forms like interactions and polynomials.The function apes
returns a named list of class
"apes"
.
The function apes
returns a named list of class
+"apes"
.
Cruz-Gonzalez, M., I. Fernández-Val, and M. Weidner (2017). "Bias - corrections for probit and logit models with two-way fixed effects". The - Stata Journal, 17(3), 517-545.
+corrections for probit and logit models with two-way fixed effects". The +Stata Journal, 17(3), 517-545.Czarnowske, D. and A. Stammann (2020). "Fixed Effects Binary - Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints.
+Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints.Fernández-Val, I. and M. Weidner (2016). "Individual and time - effects in nonlinear panel models with large N, T". Journal of Econometrics, - 192(1), 291-312.
+effects in nonlinear panel models with large N, T". Journal of Econometrics, +192(1), 291-312.Fernández-Val, I. and M. Weidner (2018). "Fixed effects - estimation of large-t panel data models". Annual Review of Economics, 10, - 109-138.
+estimation of large-t panel data models". Annual Review of Economics, 10, +109-138.Hinz, J., A. Stammann, and J. Wanner (2020). "State Dependence - and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv - e-prints.
+and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv +e-prints.Neyman, J. and E. L. Scott (1948). "Consistent estimates based on - partially consistent observations". Econometrica, 16(1), 1-32.
+partially consistent observations". Econometrica, 16(1), 1-32.R/bias_corr.R
bias_corr.Rd
Post-estimation routine to substantially reduce the incidental
- parameter bias problem. Applies the analytical bias correction derived by
- Fernández-Val and Weidner (2016) and Hinz, Stammann, and Wanner (2020) to
- obtain bias-corrected estimates of the structural parameters and is
- currently restricted to binomial
with 1,2,3-way fixed
- effects.
binomial
with 1,2,3-way fixed
+effects.
Czarnowske, D. and A. Stammann (2020). "Fixed Effects Binary - Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints.
+Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints.Fernández-Val, I. and M. Weidner (2016). "Individual and time - effects in nonlinear panel models with large N, T". Journal of Econometrics, - 192(1), 291-312.
+effects in nonlinear panel models with large N, T". Journal of Econometrics, +192(1), 291-312.Fernández-Val, I. and M. Weidner (2018). "Fixed effects - estimation of large-t panel data models". Annual Review of Economics, 10, - 109-138.
+estimation of large-t panel data models". Annual Review of Economics, 10, +109-138.Hahn, J. and G. Kuersteiner (2011). "Bias reduction for dynamic - nonlinear panel models with fixed effects". Econometric Theory, 27(6), - 1152-1191.
+nonlinear panel models with fixed effects". Econometric Theory, 27(6), +1152-1191.Hinz, J., A. Stammann, and J. Wanner (2020). "State Dependence - and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv - e-prints.
+and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv +e-prints.Neyman, J. and E. L. Scott (1948). "Consistent estimates based on - partially consistent observations". Econometrica, 16(1), 1-32.
+partially consistent observations". Econometrica, 16(1), 1-32.R/capybara-package.R
capybara-package.Rd
feglm
can be used to fit generalized linear models
- with many high-dimensional fixed effects. The estimation procedure is based
- on unconditional maximum likelihood and can be interpreted as a
- “weighted demeaning” approach.
Remark: The term fixed effect is used in econometrician's sense of - having intercepts for each level in each category.
+having intercepts for each level in each category.If feglm
does not converge this is often a sign of
- linear dependence between one or more regressors and a fixed effects
- category. In this case, you should carefully inspect your model
- specification.
Gaure, S. (2013). "OLS with Multiple High Dimensional Category - Variables". Computational Statistics and Data Analysis, 66.
+Variables". Computational Statistics and Data Analysis, 66.Marschner, I. (2011). "glm2: Fitting generalized linear models - with convergence problems". The R Journal, 3(2).
+with convergence problems". The R Journal, 3(2).Stammann, A., F. Heiss, and D. McFadden (2016). "Estimating Fixed - Effects Logit Models with Large Panel Data". Working paper.
+Effects Logit Models with Large Panel Data". Working paper.Stammann, A. (2018). "Fast and Feasible Estimation of Generalized - Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints.
+Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints.feglm
Control ParametersSet and change parameters used for fitting feglm
.
- Termination conditions are similar to glm
.
glm
.
A wrapper for feglm
with
- family = gaussian()
.
family = gaussian()
.
The function felm
returns a named list of class
"felm"
.
The function felm
returns a named list of class
+"felm"
.
Gaure, S. (2013). "OLS with Multiple High Dimensional Category - Variables". Computational Statistics and Data Analysis, 66.
+Variables". Computational Statistics and Data Analysis, 66.Marschner, I. (2011). "glm2: Fitting generalized linear models - with convergence problems". The R Journal, 3(2).
+with convergence problems". The R Journal, 3(2).Stammann, A., F. Heiss, and D. McFadden (2016). "Estimating Fixed - Effects Logit Models with Large Panel Data". Working paper.
+Effects Logit Models with Large Panel Data". Working paper.Stammann, A. (2018). "Fast and Feasible Estimation of Generalized - Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints.
+Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints.R/fenegbin.R
fenegbin.Rd
A wrapper for feglm
with
- family = poisson()
.
family = poisson()
.
The system might not have a unique solution since we do not take - collinearity into account. If the solution is not unique, an estimable - function has to be applied to our solution to get meaningful estimates of - the fixed effects.
+collinearity into account. If the solution is not unique, an estimable +function has to be applied to our solution to get meaningful estimates of +the fixed effects.tolerance level for the stopping condition. The algorithm is
stopped at iteration \(i\) if \(||\boldsymbol{\alpha}_{i} -
-\boldsymbol{\alpha}_{i - 1}||_{2} < tol ||\boldsymbol{\alpha}_{i - 1}||
-{2}\). Default is 1.0e-08
.
1.0e-08
.
Stammann, A. (2018). "Fast and Feasible Estimation of Generalized - Linear Models with High-Dimensional k-way Fixed Effects". ArXiv e-prints.
+Linear Models with High-Dimensional k-way Fixed Effects". ArXiv e-prints.Gaure, S. (n. d.). "Multicollinearity, identification, and - estimable functions". Unpublished.
+estimable functions". Unpublished.apes()
Compute average partial effects after fitting binary choice models - with a 1,2,3-way error component
Asymptotic bias correction after fitting binary choice models with a - 1,2,3-way error component
Generalized Linear Models (GLMs) with high-dimensional k-way fixed - effects
fenegbin()
Negative Binomial model fitting with high-dimensional k-way fixed - effects
Covariance matrix for the estimator of the
- average partial effects from objects returned by apes
.
apes
.
Covariance matrix for the estimator of the
- average partial effects from objects returned by apes
.
apes
.