Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added solver to feols and created new test file for it #513

Merged
merged 11 commits into from
Jul 2, 2024
31 changes: 30 additions & 1 deletion pyfixest/estimation/feiv_.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def __init__(
collin_tol: float,
weights_name: Optional[str],
weights_type: Optional[str],
solver: str = "np.linalg.solve",
) -> None:
super().__init__(
Y=Y,
Expand All @@ -103,6 +104,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

if self._has_weights:
Expand All @@ -127,6 +129,32 @@ def __init__(
self._support_iid_inference = True
self._supports_cluster_causal_variance = False

def solve_ols(self, tZX, tZY, solver):
"""
Solve the ordinary least squares problem using the specified solver.

Parameters
----------
tZX (array-like): The design matrix.
tZY (array-like): The response variable.
solver (str): The solver to use. Supported solvers are "np.linalg.lstsq"
and "np.linalg.solve".

Returns
-------
array-like: The solution to the ordinary least squares problem.

Raises
------
ValueError: If the specified solver is not supported.
"""
if solver == "np.linalg.lstsq":
return np.linalg.lstsq(tZX, tZY, rcond=None)[0]
elif solver == "np.linalg.solve":
return np.linalg.solve(tZX, tZY).flatten()
else:
raise ValueError(f"Solver {solver} not supported.")

def get_fit(self) -> None:
"""Fit a IV model using a 2SLS estimator."""
_X = self._X
Expand All @@ -141,8 +169,9 @@ def get_fit(self) -> None:
H = self._tXZ @ self._tZZinv
A = H @ self._tZX
B = H @ self._tZy
solver = self._solver

self._beta_hat = np.linalg.solve(A, B).flatten()
self._beta_hat = self.solve_ols(A, B, solver)

self._Y_hat_link = self._X @ self._beta_hat
self._u_hat = self._Y.flatten() - self._Y_hat_link.flatten()
Expand Down
31 changes: 30 additions & 1 deletion pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(
maxiter: int = 25,
tol: float = 1e-08,
fixef_tol: float = 1e-08,
solver: str = "np.linalg.solve",
weights_name: Optional[str] = None,
weights_type: Optional[str] = None,
):
Expand All @@ -79,6 +80,7 @@ def __init__(
collin_tol=collin_tol,
weights_name=weights_name,
weights_type=weights_type,
solver=solver,
)

# input checks
Expand Down Expand Up @@ -113,6 +115,32 @@ def __init__(
self.deviance = None
self._Xbeta = np.array([])

def solve_ols(self, tZX, tZY, solver):
s3alfisc marked this conversation as resolved.
Show resolved Hide resolved
"""
Solve the ordinary least squares problem using the specified solver.

Parameters
----------
tZX (array-like): The design matrix.
tZY (array-like): The response variable.
solver (str): The solver to use. Supported solvers are "np.linalg.lstsq"
and "np.linalg.solve".

Returns
-------
array-like: The solution to the ordinary least squares problem.

Raises
------
ValueError: If the specified solver is not supported.
"""
if solver == "np.linalg.lstsq":
return np.linalg.lstsq(tZX, tZY, rcond=None)[0]
elif solver == "np.linalg.solve":
return np.linalg.solve(tZX, tZY).flatten()
else:
raise ValueError(f"Solver {solver} not supported.")

def get_fit(self) -> None:
"""
Fit a Poisson Regression Model via Iterated Weighted Least Squares (IWLS).
Expand Down Expand Up @@ -151,6 +179,7 @@ def get_fit(self) -> None:
_iwls_maxiter = 25
_tol = self.tol
_fixef_tol = self.fixef_tol
solver = self._solver

def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
with warnings.catch_warnings():
Expand Down Expand Up @@ -215,7 +244,7 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
XWX = WX.transpose() @ WX
XWZ = WX.transpose() @ WZ

delta_new = np.linalg.solve(XWX, XWZ) # eq (10), delta_new -> reg_z
delta_new = self.solve_ols(XWX, XWZ, solver) # eq (10), delta_new -> reg_z
resid = Z_resid - X_resid @ delta_new

mu_old = mu.copy()
Expand Down
82 changes: 79 additions & 3 deletions tests/test_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,95 @@
import pytest

import pyfixest as pf
from pyfixest.estimation.feiv_ import Feiv
from pyfixest.estimation.feols_ import Feols
from pyfixest.estimation.fepois_ import Fepois


@pytest.fixture()
def data():
data = pf.get_data()
Y = data["Y"]
Y = data["Y"].to_numpy()
X = data[["X1", "X2"]]
Z = data[["Z1"]]
weights = data["weights"] # Define the variable "weights"
Z = data[["Z1"]].to_numpy()
weights = data["weights"].to_numpy() # Define the variable "weights"
return X, Y, Z, weights


def test_solver_fepois(data):
"""
Test the equivalence of different solvers for the fepois class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, Z, weights = data
obj = Fepois(
X=X,
Y=Y,
weights=weights,
coefnames=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type=None,
solver="np.linalg.solve",
fe=None,
drop_singletons=False,
)
# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()
# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()
# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_feiv(data):
"""
Test the equivalence of different solvers for the feiv class.
This function initializes an object with test data and compares the results
obtained from two different solvers: np.linalg.lstsq
and np.linalg.solve. It asserts that
the results are identical or very close within a tolerance.
"""
X, Y, Z, weights = data

obj = Feiv(
Y=Y,
X=X,
Z=Z,
weights=weights,
coefnames_x=["X1", "X2"],
collin_tol=1e-08,
weights_name="weights",
weights_type=None,
solver="np.linalg.solve",
coefnames_z=["Z1"],
)

# Use np.linalg.lstsq solver
obj._solver = "np.linalg.lstsq"
obj.get_fit()
beta_hat_lstsq = obj._beta_hat.copy()

# Use np.linalg.solve solver
obj._solver = "np.linalg.solve"
obj.get_fit()
beta_hat_solve = obj._beta_hat.copy()

# Assert that the results are identical or very close within a tolerance
np.testing.assert_array_almost_equal(
beta_hat_lstsq, beta_hat_solve, decimal=6, err_msg="Solvers' results differ"
)


def test_solver_equivalence(data):
"""
Test the equivalence of different solvers for the feols class.
Expand Down