Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added solver to feols and created new test file for it #513

Merged
merged 11 commits into from
Jul 2, 2024
Binary file modified .coverage
Binary file not shown.
9 changes: 6 additions & 3 deletions pyfixest/estimation/feiv_.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class Feiv(Feols):
Names of the coefficients of Z.
collin_tol : float
Tolerance for collinearity check.
solver: str, default is 'np.linalg.solve'
Solver to use for the estimation. Alternative is 'np.linalg.lstsq'.
weights_name : Optional[str]
Name of the weights variable.
weights_type : Optional[str]
Expand Down Expand Up @@ -103,6 +105,7 @@ def __init__(
collin_tol: float,
weights_name: Optional[str],
weights_type: Optional[str],
solver: str = "np.linalg.solve",
) -> None:
super().__init__(
Y=Y,
Expand All @@ -112,6 +115,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

if self._has_weights:
Expand Down Expand Up @@ -144,6 +148,7 @@ def get_fit(self) -> None:
_Z = self._Z
_Y = self._Y
_endogvar = self._endogvar
_solver = self._solver

# Start First Stage

Expand Down Expand Up @@ -175,9 +180,7 @@ def get_fit(self) -> None:
H = self._tXZ @ self._tZZinv
A = H @ self._tZX
B = H @ self._tZy

# Estimate coefficients (beta_hat)
self._beta_hat = np.linalg.solve(A, B).flatten()
self._beta_hat = self.solve_ols(A, B, _solver)

# Predicted values and residuals
self._Y_hat_link = self._X @ self._beta_hat
Expand Down
49 changes: 40 additions & 9 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from formulaic import Formula
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import spsolve
from scipy.sparse.linalg import lsqr, spsolve
from scipy.stats import f, norm, t

from pyfixest.errors import VcovTypeNotSupportedError
Expand Down Expand Up @@ -62,6 +62,9 @@
weights_type : Optional[str]
Type of the weights variable. Either "aweights" for analytic weights or
"fweights" for frequency weights.
solver : str, optional.
The solver to use for the regression. Can be either "np.linalg.solve" or
"np.linalg.lstsq". Defaults to "np.linalg.solve".

Attributes
----------
Expand All @@ -86,6 +89,8 @@
Indices of collinear variables.
_Z : np.ndarray
Alias for the _X array, used for calculations.
_solver: str
The solver used for the regression.
_weights : np.ndarray
Array of weights for each observation.
_N : int
Expand Down Expand Up @@ -166,7 +171,8 @@
Adjusted R-squared value of the model.
_adj_r2_within : float
Adjusted R-squared value computed on demeaned dependent variable.

_solver: str
The solver used to fit the normal equation.
"""

def __init__(
Expand All @@ -178,6 +184,7 @@
coefnames: list[str],
weights_name: Optional[str],
weights_type: Optional[str],
solver: str = "np.linalg.solve",
) -> None:
self._method = "feols"
self._is_iv = False
Expand All @@ -198,7 +205,7 @@
self._X = X

self.get_nobs()

self._solver = solver
_feols_input_checks(Y, X, weights)

if self._X.shape[1] == 0:
Expand Down Expand Up @@ -295,6 +302,34 @@
self.summary = functools.partial(_tmp, models=[self])
self.summary.__doc__ = _tmp.__doc__

def solve_ols(self, tZX: np.ndarray, tZY: np.ndarray, solver: str):
"""
Solve the ordinary least squares problem using the specified solver.

Parameters
----------
tZX (array-like): Z'X.
tZY (array-like): Z'Y.
solver (str): The solver to use. Supported solvers are "np.linalg.lstsq",
"np.linalg.solve", and "scipy.sparse.linalg.lsqr".

Returns
-------
array-like: The solution to the ordinary least squares problem.

Raises
------
ValueError: If the specified solver is not supported.
"""
if solver == "np.linalg.lstsq":
return np.linalg.lstsq(tZX, tZY, rcond=None)[0].flatten()
elif solver == "np.linalg.solve":
return np.linalg.solve(tZX, tZY).flatten()
elif solver == "scipy.sparse.linalg.lsqr":
return lsqr(tZX, tZY)[0].flatten()
else:
raise ValueError(f"Solver {solver} not supported.")

Check warning on line 331 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L331

Added line #L331 was not covered by tests

def get_fit(self) -> None:
"""
Fit an OLS model.
Expand All @@ -306,15 +341,11 @@
_X = self._X
_Y = self._Y
_Z = self._Z

_solver = self._solver
self._tZX = _Z.T @ _X
self._tZy = _Z.T @ _Y

# self._tZXinv = np.linalg.inv(self._tZX)
self._beta_hat = np.linalg.solve(self._tZX, self._tZy).flatten()
# self._beta_hat, _, _, _ = lstsq(self._tZX, self._tZy, lapack_driver='gelsy')

# self._beta_hat = (self._tZXinv @ self._tZy).flatten()
self._beta_hat = self.solve_ols(self._tZX, self._tZy, _solver)

self._Y_hat_link = self._X @ self._beta_hat
self._u_hat = self._Y.flatten() - self._Y_hat_link.flatten()
Expand Down
10 changes: 9 additions & 1 deletion pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ class Fepois(Feols):
Maximum number of iterations for the IRLS algorithm.
tol : Optional[float], default=1e-08
Tolerance level for the convergence of the IRLS algorithm.
solver: str, default is 'np.linalg.solve'
Solver to use for the estimation. Alternative is 'np.linalg.lstsq'.
fixef_tol: float, default = 1e-08.
Tolerance level for the convergence of the demeaning algorithm.
solver:
weights_name : Optional[str]
Name of the weights variable.
weights_type : Optional[str]
Expand All @@ -68,6 +71,7 @@ def __init__(
maxiter: int = 25,
tol: float = 1e-08,
fixef_tol: float = 1e-08,
solver: str = "np.linalg.solve",
weights_name: Optional[str] = None,
weights_type: Optional[str] = None,
):
Expand All @@ -79,6 +83,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

# input checks
Expand Down Expand Up @@ -151,6 +156,7 @@ def get_fit(self) -> None:
_iwls_maxiter = 25
_tol = self.tol
_fixef_tol = self.fixef_tol
_solver = self._solver

def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
with warnings.catch_warnings():
Expand Down Expand Up @@ -215,7 +221,9 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
XWX = WX.transpose() @ WX
XWZ = WX.transpose() @ WZ

delta_new = np.linalg.solve(XWX, XWZ) # eq (10), delta_new -> reg_z
delta_new = self.solve_ols(XWX, XWZ, _solver).reshape(
(-1, 1)
) # eq (10), delta_new -> reg_z
resid = Z_resid - X_resid @ delta_new

mu_old = mu.copy()
Expand Down
165 changes: 165 additions & 0 deletions tests/test_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import numpy as np
import pytest

import pyfixest as pf
from pyfixest.estimation.feiv_ import Feiv
from pyfixest.estimation.feols_ import Feols
from pyfixest.estimation.fepois_ import Fepois


@pytest.fixture()
def data_feols():
data = pf.get_data().dropna()
Y = data["Y"].to_numpy().reshape((-1, 1))
X = data[["X1", "X2"]].to_numpy()
Z = data[["Z1", "X2"]].to_numpy()
weights = (
data["weights"].to_numpy().reshape((-1, 1))
) # Define the variable "weights"
return X, Y, Z, weights


@pytest.fixture()
def data_fepois():
data = pf.get_data(model="Fepois").dropna()
Y = data["Y"].to_numpy().reshape((-1, 1))
X = data[["X1", "X2"]].to_numpy()
weights = (
data["weights"].to_numpy().reshape((-1, 1))
) # Define the variable "weights"
return X, Y, weights


def test_solver_fepois(data_fepois):
"""
Test the equivalence of different solvers for the fepois class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, weights = data_fepois
obj = Fepois(
X=X,
Y=Y,
weights=weights,
coefnames=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
fe=None,
drop_singletons=False,
)
# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
X, Y, weights = data_fepois
obj = Fepois(
X=X,
Y=Y,
weights=weights,
coefnames=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
fe=None,
drop_singletons=False,
)
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()
# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_feiv(data_feols):
"""
Test the equivalence of different solvers for the feiv class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, Z, weights = data_feols

obj = Feiv(
Y=Y,
X=X,
Z=Z,
endogvar=X[:, 0].reshape((-1, 1)),
weights=weights,
coefnames_x=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
coefnames_z=["Z1"],
)

# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()

# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_feols(data_feols):
"""
Test the equivalence of different solvers for the feols class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, _, weights = data_feols

obj = Feols(
X=X,
Y=Y,
weights=weights,
collin_tol=1e-08,
coefnames=["X1", "X2"],
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
)

# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()

# scipy.sparse.linalg.lsqr solver
obj._solver = "scipy.sparse.linalg.lsqr"
obj.get_fit()
beta_hat_lsqr = obj._beta_hat.copy()

# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)

np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_lsqr, decimal=6, err_msg="Solvers' results differ"
)