Skip to content

Commit

Permalink
Added solver to feols and created new test file for it (#513)
Browse files Browse the repository at this point in the history
* Added solver to feols and created new test file for it

* Updated test file and feos class with new solver

* Adding solver to feiv and fepois, and upding test_solver file

* update tests

* move solve_ols to Feols class, delete in other methods; add docstrings

* fix tests except for Attribute error Fepois has no attribute _N

* Update fepois_.py

add reshape to delta_new

* fix tests

* add scipy.sparse.linalg.lsqr solver

---------

Co-authored-by: Alexander Fischer <[email protected]>
  • Loading branch information
saidamir and s3alfisc authored Jul 2, 2024
1 parent 88c87cd commit 56c4c6e
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 13 deletions.
Binary file modified .coverage
Binary file not shown.
9 changes: 6 additions & 3 deletions pyfixest/estimation/feiv_.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class Feiv(Feols):
Names of the coefficients of Z.
collin_tol : float
Tolerance for collinearity check.
solver: str, default is 'np.linalg.solve'
Solver to use for the estimation. Alternative is 'np.linalg.lstsq'.
weights_name : Optional[str]
Name of the weights variable.
weights_type : Optional[str]
Expand Down Expand Up @@ -103,6 +105,7 @@ def __init__(
collin_tol: float,
weights_name: Optional[str],
weights_type: Optional[str],
solver: str = "np.linalg.solve",
) -> None:
super().__init__(
Y=Y,
Expand All @@ -112,6 +115,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

if self._has_weights:
Expand Down Expand Up @@ -144,6 +148,7 @@ def get_fit(self) -> None:
_Z = self._Z
_Y = self._Y
_endogvar = self._endogvar
_solver = self._solver

# Start First Stage

Expand Down Expand Up @@ -175,9 +180,7 @@ def get_fit(self) -> None:
H = self._tXZ @ self._tZZinv
A = H @ self._tZX
B = H @ self._tZy

# Estimate coefficients (beta_hat)
self._beta_hat = np.linalg.solve(A, B).flatten()
self._beta_hat = self.solve_ols(A, B, _solver)

# Predicted values and residuals
self._Y_hat_link = self._X @ self._beta_hat
Expand Down
49 changes: 40 additions & 9 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from formulaic import Formula
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import spsolve
from scipy.sparse.linalg import lsqr, spsolve
from scipy.stats import f, norm, t

from pyfixest.errors import VcovTypeNotSupportedError
Expand Down Expand Up @@ -62,6 +62,9 @@ class Feols:
weights_type : Optional[str]
Type of the weights variable. Either "aweights" for analytic weights or
"fweights" for frequency weights.
solver : str, optional.
The solver to use for the regression. Can be either "np.linalg.solve" or
"np.linalg.lstsq". Defaults to "np.linalg.solve".
Attributes
----------
Expand All @@ -86,6 +89,8 @@ class Feols:
Indices of collinear variables.
_Z : np.ndarray
Alias for the _X array, used for calculations.
_solver: str
The solver used for the regression.
_weights : np.ndarray
Array of weights for each observation.
_N : int
Expand Down Expand Up @@ -166,7 +171,8 @@ class Feols:
Adjusted R-squared value of the model.
_adj_r2_within : float
Adjusted R-squared value computed on demeaned dependent variable.
_solver: str
The solver used to fit the normal equation.
"""

def __init__(
Expand All @@ -178,6 +184,7 @@ def __init__(
coefnames: list[str],
weights_name: Optional[str],
weights_type: Optional[str],
solver: str = "np.linalg.solve",
) -> None:
self._method = "feols"
self._is_iv = False
Expand All @@ -198,7 +205,7 @@ def __init__(
self._X = X

self.get_nobs()

self._solver = solver
_feols_input_checks(Y, X, weights)

if self._X.shape[1] == 0:
Expand Down Expand Up @@ -295,6 +302,34 @@ def __init__(
self.summary = functools.partial(_tmp, models=[self])
self.summary.__doc__ = _tmp.__doc__

def solve_ols(self, tZX: np.ndarray, tZY: np.ndarray, solver: str):
"""
Solve the ordinary least squares problem using the specified solver.
Parameters
----------
tZX (array-like): Z'X.
tZY (array-like): Z'Y.
solver (str): The solver to use. Supported solvers are "np.linalg.lstsq",
"np.linalg.solve", and "scipy.sparse.linalg.lsqr".
Returns
-------
array-like: The solution to the ordinary least squares problem.
Raises
------
ValueError: If the specified solver is not supported.
"""
if solver == "np.linalg.lstsq":
return np.linalg.lstsq(tZX, tZY, rcond=None)[0].flatten()
elif solver == "np.linalg.solve":
return np.linalg.solve(tZX, tZY).flatten()
elif solver == "scipy.sparse.linalg.lsqr":
return lsqr(tZX, tZY)[0].flatten()
else:
raise ValueError(f"Solver {solver} not supported.")

def get_fit(self) -> None:
"""
Fit an OLS model.
Expand All @@ -306,15 +341,11 @@ def get_fit(self) -> None:
_X = self._X
_Y = self._Y
_Z = self._Z

_solver = self._solver
self._tZX = _Z.T @ _X
self._tZy = _Z.T @ _Y

# self._tZXinv = np.linalg.inv(self._tZX)
self._beta_hat = np.linalg.solve(self._tZX, self._tZy).flatten()
# self._beta_hat, _, _, _ = lstsq(self._tZX, self._tZy, lapack_driver='gelsy')

# self._beta_hat = (self._tZXinv @ self._tZy).flatten()
self._beta_hat = self.solve_ols(self._tZX, self._tZy, _solver)

self._Y_hat_link = self._X @ self._beta_hat
self._u_hat = self._Y.flatten() - self._Y_hat_link.flatten()
Expand Down
10 changes: 9 additions & 1 deletion pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ class Fepois(Feols):
Maximum number of iterations for the IRLS algorithm.
tol : Optional[float], default=1e-08
Tolerance level for the convergence of the IRLS algorithm.
solver: str, default is 'np.linalg.solve'
Solver to use for the estimation. Alternative is 'np.linalg.lstsq'.
fixef_tol: float, default = 1e-08.
Tolerance level for the convergence of the demeaning algorithm.
solver:
weights_name : Optional[str]
Name of the weights variable.
weights_type : Optional[str]
Expand All @@ -68,6 +71,7 @@ def __init__(
maxiter: int = 25,
tol: float = 1e-08,
fixef_tol: float = 1e-08,
solver: str = "np.linalg.solve",
weights_name: Optional[str] = None,
weights_type: Optional[str] = None,
):
Expand All @@ -79,6 +83,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

# input checks
Expand Down Expand Up @@ -151,6 +156,7 @@ def get_fit(self) -> None:
_iwls_maxiter = 25
_tol = self.tol
_fixef_tol = self.fixef_tol
_solver = self._solver

def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
with warnings.catch_warnings():
Expand Down Expand Up @@ -215,7 +221,9 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
XWX = WX.transpose() @ WX
XWZ = WX.transpose() @ WZ

delta_new = np.linalg.solve(XWX, XWZ) # eq (10), delta_new -> reg_z
delta_new = self.solve_ols(XWX, XWZ, _solver).reshape(
(-1, 1)
) # eq (10), delta_new -> reg_z
resid = Z_resid - X_resid @ delta_new

mu_old = mu.copy()
Expand Down
165 changes: 165 additions & 0 deletions tests/test_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import numpy as np
import pytest

import pyfixest as pf
from pyfixest.estimation.feiv_ import Feiv
from pyfixest.estimation.feols_ import Feols
from pyfixest.estimation.fepois_ import Fepois


@pytest.fixture()
def data_feols():
data = pf.get_data().dropna()
Y = data["Y"].to_numpy().reshape((-1, 1))
X = data[["X1", "X2"]].to_numpy()
Z = data[["Z1", "X2"]].to_numpy()
weights = (
data["weights"].to_numpy().reshape((-1, 1))
) # Define the variable "weights"
return X, Y, Z, weights


@pytest.fixture()
def data_fepois():
data = pf.get_data(model="Fepois").dropna()
Y = data["Y"].to_numpy().reshape((-1, 1))
X = data[["X1", "X2"]].to_numpy()
weights = (
data["weights"].to_numpy().reshape((-1, 1))
) # Define the variable "weights"
return X, Y, weights


def test_solver_fepois(data_fepois):
"""
Test the equivalence of different solvers for the fepois class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, weights = data_fepois
obj = Fepois(
X=X,
Y=Y,
weights=weights,
coefnames=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
fe=None,
drop_singletons=False,
)
# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
X, Y, weights = data_fepois
obj = Fepois(
X=X,
Y=Y,
weights=weights,
coefnames=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
fe=None,
drop_singletons=False,
)
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()
# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_feiv(data_feols):
"""
Test the equivalence of different solvers for the feiv class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, Z, weights = data_feols

obj = Feiv(
Y=Y,
X=X,
Z=Z,
endogvar=X[:, 0].reshape((-1, 1)),
weights=weights,
coefnames_x=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
coefnames_z=["Z1"],
)

# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()

# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_feols(data_feols):
"""
Test the equivalence of different solvers for the feols class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, _, weights = data_feols

obj = Feols(
X=X,
Y=Y,
weights=weights,
collin_tol=1e-08,
coefnames=["X1", "X2"],
weights_name="weights",
weights_type="aweights",
solver="np.linalg.solve",
)

# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()

# scipy.sparse.linalg.lsqr solver
obj._solver = "scipy.sparse.linalg.lsqr"
obj.get_fit()
beta_hat_lsqr = obj._beta_hat.copy()

# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)

np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_lsqr, decimal=6, err_msg="Solvers' results differ"
)

0 comments on commit 56c4c6e

Please sign in to comment.