From 212a3f000252fd60c6dc4bbce814383193474b3a Mon Sep 17 00:00:00 2001 From: lstimpfl Date: Sun, 20 Oct 2024 07:32:22 +0200 Subject: [PATCH 01/12] small updates to documentation --- tests/test_poisson.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 43f2b2cf..829e9027 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -53,7 +53,7 @@ def test_separation(): ): fepois("Y ~ X1 + X2", data=example2, vcov="hetero", separation_check=["ir"]) # noqa: F841 - # ppmlhdfe test data sets: + # ppmlhdfe test data sets (check readme in data/ppmlhdfe_separation_examples) path = os.path.dirname(os.path.abspath(__file__)) folder = r"data/ppmlhdfe_separation_examples" fns = sorted([fn for fn in os.listdir(os.path.join(path, folder)) if fn.endswith(".csv")]) @@ -78,7 +78,7 @@ def test_separation(): fml += f" ~ {' + '.join(regressors)}" if fixed_effects.empty: - # separation checks are currently disabled if no fixed effects are specified + # TODO: separation checks are currently disabled if no fixed effects are specified; enable tests once we run separation check without fixed effects continue else: fml += f" | {' + '.join(fixed_effects)}" From 63a8a36cafb906a506fb47d67c7755da7ad8abe1 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 20 Oct 2024 15:26:46 +0200 Subject: [PATCH 02/12] rename example1 -> df --- tests/test_poisson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 1003eb70..174ed51b 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -31,7 +31,7 @@ def test_separation(): with pytest.warns( UserWarning, match="2 observations removed because of separation." ): - fepois("Y ~ X | fe1", data=example1, vcov="hetero", separation_check=["fe"]) # noqa: F841 + fepois("Y ~ X | fe1", data=df, vcov="hetero", separation_check=["fe"]) # noqa: F841 if False: # this example is taken from ppmlhdfe's primer on separation https://github.com/sergiocorreia/ppmlhdfe/blob/master/guides/separation_primer.md From a62b03345c7cea6ab942078ce2e65392b7a12f47 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 20 Oct 2024 13:29:50 +0000 Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_poisson.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 174ed51b..4668204c 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -52,7 +52,9 @@ def test_separation(): # ppmlhdfe test data sets (check readme in data/ppmlhdfe_separation_examples) path = os.path.dirname(os.path.abspath(__file__)) folder = r"data/ppmlhdfe_separation_examples" - fns = sorted([fn for fn in os.listdir(os.path.join(path, folder)) if fn.endswith(".csv")]) + fns = sorted( + [fn for fn in os.listdir(os.path.join(path, folder)) if fn.endswith(".csv")] + ) for fn in fns: if fn == "07.csv": # this case fails but is not tested in ppmlhdfe @@ -61,9 +63,13 @@ def test_separation(): data = pd.read_csv(os.path.join(path, folder, fn)) # build formula dynamically from dataframe # datasets have fixed structure of the form (y, x1, ..., xN, id1, ..., idM, separated) - fml = "y" # dependent variable y - regressors = data.columns[data.columns.str.startswith("x")] # regressors x1,...,xN - fixed_effects = data.columns[data.columns.str.startswith("id")] # fixed effects id1,...,id2 + fml = "y" # dependent variable y + regressors = data.columns[ + data.columns.str.startswith("x") + ] # regressors x1,...,xN + fixed_effects = data.columns[ + data.columns.str.startswith("id") + ] # fixed effects id1,...,id2 if data.separated.sum() == 0: # TODO: do not skip but update pytest.warn to confirm that no warning is produced continue From d30205fcec9a2e50bb0d0fb8477fc91fa04f62fe Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 20 Oct 2024 15:33:34 +0200 Subject: [PATCH 04/12] add import os --- tests/test_poisson.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 4668204c..b2341786 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -1,3 +1,4 @@ +import os import numpy as np import pandas as pd import pytest From 2307df820a1c5acf30d6f064ad5e347dcd3b2af6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 20 Oct 2024 13:34:16 +0000 Subject: [PATCH 05/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_poisson.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index b2341786..ae8ca42d 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -1,4 +1,5 @@ import os + import numpy as np import pandas as pd import pytest From 29143a796143197999ac23c62b236443dfcb2ad7 Mon Sep 17 00:00:00 2001 From: lstimpfl Date: Mon, 21 Oct 2024 07:44:50 +0200 Subject: [PATCH 06/12] fix bug after merge --- tests/test_poisson.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index ae8ca42d..e059c242 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -23,21 +23,23 @@ def test_separation(): """Test separation detection.""" - y = np.array([0, 0, 0, 1, 2, 3]) - df1 = np.array(["a", "a", "b", "b", "b", "c"]) - df2 = np.array(["c", "c", "d", "d", "d", "e"]) - x = np.random.normal(0, 1, 6) - - df = pd.DataFrame({"Y": y, "fe1": df1, "fe2": df2, "x": x}) - + example1 = pd.DataFrame.from_dict( + { + "Y": [0, 0, 0, 1, 2, 3], + "fe1": ["a", "a", "b", "b", "b", "c"], + "fe2": ["c", "c", "d", "d", "d", "e"], + "X": np.random.normal(0, 1, 6) + } + ) with pytest.warns( UserWarning, match="2 observations removed because of separation." ): - fepois("Y ~ X | fe1", data=df, vcov="hetero", separation_check=["fe"]) # noqa: F841 + fepois("Y ~ X | fe1", data=example1, vcov="hetero", separation_check=["fe"]) # noqa: F841 if False: # this example is taken from ppmlhdfe's primer on separation https://github.com/sergiocorreia/ppmlhdfe/blob/master/guides/separation_primer.md # disabled because we currently do not perform separation checks if no fixed effects are provided + # TODO: enable once separation checks without fixed effects are enabled example2 = pd.DataFrame.from_dict( { "Y": [0, 0, 0, 1, 2, 3], From 3261c6ec274c326ce9c5db0ea0d30ca0ce26e02b Mon Sep 17 00:00:00 2001 From: lstimpfl Date: Mon, 21 Oct 2024 07:59:28 +0200 Subject: [PATCH 07/12] add updates to separation checks back in --- pyfixest/estimation/fepois_.py | 233 +++++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 11 deletions(-) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 22c98f4d..687a36c3 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -1,5 +1,6 @@ import warnings -from typing import Optional, Union +from importlib import import_module +from typing import Optional, Protocol, Union import numpy as np import pandas as pd @@ -83,6 +84,7 @@ def __init__( lean: bool = False, sample_split_var: Optional[str] = None, sample_split_value: Optional[Union[str, int]] = None, + separation_check: Optional[list[str]] = None, ): super().__init__( FixestFormula, @@ -110,6 +112,7 @@ def __init__( self.tol = tol self._method = "fepois" self.convergence = False + self.separation_check = separation_check self._support_crv3_inference = True self._support_iid_inference = True @@ -133,12 +136,19 @@ def prepare_model_matrix(self): # check for separation na_separation: list[int] = [] - if self._fe is not None: - na_separation = _check_for_separation(Y=self._Y, fe=self._fe) - if na_separation: - warnings.warn( - f"{str(len(na_separation))} observations removed because of separation." - ) + if ( + self._fe is not None + and self.separation_check is not None + and self.separation_check # not an empty list + ): + na_separation = _check_for_separation( + Y=self._Y, + X=self._X, + fe=self._fe, + fml=self._fml, + data=self._data, + methods=self.separation_check, + ) if na_separation: self._Y.drop(na_separation, axis=0, inplace=True) @@ -405,27 +415,127 @@ def predict( raise ValueError("type must be one of 'response' or 'link'.") -def _check_for_separation(Y: pd.DataFrame, fe: pd.DataFrame) -> list[int]: +def _check_for_separation( + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + methods: Optional[list[str]] = None, +) -> list[int]: """ Check for separation. Check for separation of Poisson Regression. For details, see the ppmlhdfe - documentation on separation checks. Currently, only the "fe" check is implemented. + documentation on separation checks. Parameters ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. Y : pd.DataFrame Dependent variable. + X : pd.DataFrame + Independent variables. fe : pd.DataFrame Fixed effects. + methods: list[str], optional + Methods used to check for separation. One of fixed effects ("fe") or + iterative rectifier ("ir"). Executes all methods by default. Returns ------- list List of indices of observations that are removed due to separation. """ + valid_methods: dict[str, _SeparationMethod] = { + "fe": _check_for_separation_fe, + "ir": _check_for_separation_ir, + } + if methods is None: + methods = list(valid_methods) + + invalid_methods = [method for method in methods if method not in valid_methods] + if invalid_methods: + raise ValueError( + f"Invalid separation method. Expecting {list(valid_methods)}. Received {invalid_methods}" + ) + separation_na: set[int] = set() - if not (Y > 0).all(axis=0).all(): + for method in methods: + separation_na = separation_na.union( + valid_methods[method](fml=fml, data=data, Y=Y, X=X, fe=fe) + ) + + if separation_na: + warnings.warn( + f"{str(len(separation_na))} observations removed because of separation." + ) + + return list(separation_na) + + +class _SeparationMethod(Protocol): + def __call__( + self, + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + ) -> set[int]: + """ + Check for separation. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + + Returns + ------- + set + Set of indices of separated observations. + """ + ... + + +def _check_for_separation_fe( + fml: str, data: pd.DataFrame, Y: pd.DataFrame, X: pd.DataFrame, fe: pd.DataFrame +) -> set[int]: + """ + Check for separation using the "fe" check. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + + Returns + ------- + set + Set of indices of separated observations. + """ + separation_na: set[int] = set() + if fe is not None and not (Y > 0).all(axis=0).all(): Y_help = (Y > 0).astype(int).squeeze() # loop over all elements of fe @@ -446,7 +556,108 @@ def _check_for_separation(Y: pd.DataFrame, fe: pd.DataFrame) -> list[int]: dropset = set(fe[x][fe_in_droplist].index) separation_na = separation_na.union(dropset) - return list(separation_na) + return separation_na + + +def _check_for_separation_ir( + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + tol: float = 1e-4, + maxiter: int = 100, +) -> set[int]: + """ + Check for separation using the "iterative rectifier" algorithm + proposed by Correia et al. (2021). For details see http://arxiv.org/abs/1903.01633. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + tol : float + Tolerance to detect separated observation. Defaults to 1e-4. + maxiter : int + Maximum number of iterations. Defaults to 100. + + Returns + ------- + set + Set of indices of separated observations. + """ + # lazy load to avoid circular import + fixest_module = import_module("pyfixest.estimation") + feols = getattr(fixest_module, "feols") + # initialize + separation_na: set[int] = set() + tmp_suffix = "_separationTmp" + # build formula + name_dependent, rest = fml.split("~") + name_dependent_separation = "U" + if name_dependent_separation in data.columns: + name_dependent_separation += tmp_suffix + + fml_separation = f"{name_dependent_separation} ~ {rest}" + + dependent: pd.Series = data[name_dependent] + is_interior = dependent > 0 + if is_interior.all(): + # no boundary sample, can exit + return separation_na + + # initialize variables + tmp: pd.DataFrame = pd.DataFrame(index=data.index) + tmp["U"] = (dependent == 0).astype(float).rename("U") + # weights + N0 = (dependent > 0).sum() + K = N0 / tol**2 + tmp["omega"] = pd.Series( + np.where(dependent > 0, K, 1), name="omega", index=data.index + ) + # combine data + # TODO: avoid create new object? + tmp = data.join(tmp, how="left", validate="one_to_one", rsuffix=tmp_suffix) + # TODO: need to ensure that join doesn't create duplicated columns + # assert not tmp.columns.duplicated().any() + + iteration = 0 + has_converged = False + while iteration < maxiter: + iteration += 1 + # regress U on X + # TODO: check acceleration in ppmlhdfe's implementation: https://github.com/sergiocorreia/ppmlhdfe/blob/master/src/ppmlhdfe_separation_relu.mata#L135 + fitted = feols(fml_separation, data=tmp, weights="omega") + tmp["Uhat"] = pd.Series(fitted.predict(), index=fitted._data.index, name="Uhat") + Uhat = tmp["Uhat"] + # update when within tolerance of zero + # need to be more strict below zero to avoid false positives + within_zero = (Uhat > -0.1 * tol) & (Uhat < tol) + Uhat.where(~(is_interior | within_zero.fillna(True)), 0, inplace=True) + if (Uhat >= 0).all(): + # all separated observations have been identified + has_converged = True + break + tmp.loc[~is_interior, "U"] = np.fmax( + Uhat[~is_interior], 0 + ) # rectified linear unit (ReLU) + + if has_converged: + separation_na = set(dependent[Uhat > 0].index) + else: + warnings.warn( + "iterative rectivier separation check: maximum number of iterations reached before convergence" + ) + + return separation_na def _fepois_input_checks(drop_singletons: bool, tol: float, maxiter: int): From a481bc8efff42eb3edcaa2a089908924d8f6539d Mon Sep 17 00:00:00 2001 From: lstimpfl Date: Mon, 21 Oct 2024 08:17:17 +0200 Subject: [PATCH 08/12] add separation_cheks kwarg back in --- pyfixest/estimation/FixestMulti_.py | 10 ++++++++++ pyfixest/estimation/estimation.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py index 8d62e91e..004e2dc7 100644 --- a/pyfixest/estimation/FixestMulti_.py +++ b/pyfixest/estimation/FixestMulti_.py @@ -28,6 +28,7 @@ def __init__( seed: Optional[int], split: Optional[str], fsplit: Optional[str], + separation_check: Optional[list[str]] = None, ) -> None: """ Initialize a class for multiple fixed effect estimations. @@ -57,6 +58,9 @@ def __init__( seed : Optional[int] Option to provide a random seed. Default is None. Only relevant for wild cluster bootstrap for use_compression=True. + separation_check: list[str], optional + Only used in "fepois". Methods to identify and drop separated observations. + Either "fe" or "ir". Executes both by default. Returns ------- @@ -70,6 +74,7 @@ def __init__( self._use_compression = use_compression self._reps = reps if use_compression else None self._seed = seed if use_compression else None + self._separation_check = separation_check self._run_split = split is not None or fsplit is not None self._run_full = not (split and not fsplit) @@ -185,6 +190,7 @@ def _estimate_all_models( collin_tol: float = 1e-6, iwls_maxiter: int = 25, iwls_tol: float = 1e-08, + separation_check: Optional[list[str]] = None, ) -> None: """ Estimate multiple regression models. @@ -205,6 +211,9 @@ def _estimate_all_models( iwls_tol : float, optional The tolerance level for the IWLS algorithm. Default is 1e-8. Only relevant for non-linear estimation strategies. + separation_check: list[str], optional + Only used in "fepois". Methods to identify and drop separated observations. + Either "fe" or "ir". Executes both by default. Returns ------- @@ -312,6 +321,7 @@ def _estimate_all_models( lean=_lean, sample_split_value=sample_split_value, sample_split_var=_splitvar, + separation_check=separation_check, # solver=_solver ) FIT.prepare_model_matrix() diff --git a/pyfixest/estimation/estimation.py b/pyfixest/estimation/estimation.py index ed3b3827..b6e05c0d 100644 --- a/pyfixest/estimation/estimation.py +++ b/pyfixest/estimation/estimation.py @@ -414,6 +414,7 @@ def fepois( iwls_tol: float = 1e-08, iwls_maxiter: int = 25, collin_tol: float = 1e-10, + separation_check: Optional[list[str]] = ["fe"], drop_intercept: bool = False, i_ref1=None, copy_data: bool = True, @@ -465,6 +466,10 @@ def fepois( collin_tol : float, optional Tolerance for collinearity check, by default 1e-10. + separation_check: list[str], optional + Methods to identify and drop separated observations. + Either "fe" or "ir". Executes "fe" by default. + drop_intercept : bool, optional Whether to drop the intercept from the model, by default False. @@ -559,6 +564,7 @@ def fepois( seed=None, split=split, fsplit=fsplit, + separation_check=separation_check, ) fixest = FixestMulti( @@ -588,6 +594,7 @@ def fepois( iwls_tol=iwls_tol, iwls_maxiter=iwls_maxiter, collin_tol=collin_tol, + separation_check=separation_check, ) if fixest._is_multiple_estimation: @@ -614,6 +621,7 @@ def _estimation_input_checks( seed: Optional[int], split: Optional[str], fsplit: Optional[str], + separation_check: Optional[list[str]] = None, ): if not isinstance(fml, str): raise TypeError("fml must be a string") @@ -715,3 +723,14 @@ def _estimation_input_checks( if isinstance(fsplit, str) and fsplit not in data.columns: raise KeyError(f"Column '{fsplit}' not found in data.") + + if separation_check is not None: + if not isinstance(separation_check, list): + raise TypeError( + "The function argument `separation_check` must be of type list." + ) + + if not all(x in ["fe", "ir"] for x in separation_check): + raise ValueError( + "The function argument `separation_check` must be a list of strings containing 'fe' and/or 'ir'." + ) From 0990d5bc545ead4731b5b238f5e4cb94f998e5ca Mon Sep 17 00:00:00 2001 From: lstimpfl Date: Mon, 21 Oct 2024 08:19:12 +0200 Subject: [PATCH 09/12] add ppmlhdfe_separation_examples --- .../data/ppmlhdfe_separation_examples/01.csv | 101 +++++ .../data/ppmlhdfe_separation_examples/02.csv | 6 + .../data/ppmlhdfe_separation_examples/03.csv | 17 + .../data/ppmlhdfe_separation_examples/04.csv | 101 +++++ .../data/ppmlhdfe_separation_examples/05.csv | 10 + .../data/ppmlhdfe_separation_examples/06.csv | 12 + .../data/ppmlhdfe_separation_examples/07.csv | 19 + .../data/ppmlhdfe_separation_examples/08.csv | 10 + .../data/ppmlhdfe_separation_examples/09.csv | 8 + .../data/ppmlhdfe_separation_examples/10.csv | 11 + .../data/ppmlhdfe_separation_examples/11.csv | 359 ++++++++++++++++++ .../data/ppmlhdfe_separation_examples/12.csv | 19 + .../data/ppmlhdfe_separation_examples/13.csv | 19 + .../data/ppmlhdfe_separation_examples/14.csv | 15 + .../data/ppmlhdfe_separation_examples/15.csv | 24 ++ .../data/ppmlhdfe_separation_examples/16.csv | 101 +++++ .../data/ppmlhdfe_separation_examples/17.csv | 101 +++++ .../data/ppmlhdfe_separation_examples/18.csv | 101 +++++ .../ppmlhdfe_separation_examples/readme.md | 4 + 19 files changed, 1038 insertions(+) create mode 100644 tests/data/ppmlhdfe_separation_examples/01.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/02.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/03.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/04.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/05.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/06.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/07.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/08.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/09.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/10.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/11.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/12.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/13.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/14.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/15.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/16.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/17.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/18.csv create mode 100644 tests/data/ppmlhdfe_separation_examples/readme.md diff --git a/tests/data/ppmlhdfe_separation_examples/01.csv b/tests/data/ppmlhdfe_separation_examples/01.csv new file mode 100644 index 00000000..0639065c --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/01.csv @@ -0,0 +1,101 @@ +y,x1,x2,id1,id2,separated +0.0000000000,-0.9303550124,1,1,4,1 +0.0000000000,0.1835959703,1,2,1,1 +0.0000000000,-0.6371972561,0,2,6,0 +0.0000000000,-0.4237562418,0,2,7,0 +0.1527670026,-1.1799178123,0,8,4,0 +0.1553160399,0.8860545158,0,1,7,0 +0.1734523475,1.0502026081,0,8,3,0 +0.2217264324,-0.2490162849,0,9,1,0 +0.2260344625,0.9635434151,0,7,6,0 +0.2283350676,0.5023207068,0,3,5,0 +0.2368061543,0.9141282439,0,10,1,0 +0.2410950512,-1.3616287708,0,4,5,0 +0.2541858852,0.5753656030,0,3,3,0 +0.2637400925,-0.6333113909,0,3,10,0 +0.2677916288,1.0411013365,0,6,9,0 +0.2768439949,-1.1694648266,0,8,10,0 +0.2934476137,-0.7940499187,0,4,6,0 +0.3290584087,0.5041465163,0,2,4,0 +0.3606268466,-3.0584282875,0,3,8,0 +0.4013363719,0.6099517941,0,7,5,0 +0.4354907870,0.9624704719,0,6,1,0 +0.4908127189,-0.7442333698,0,5,2,0 +0.4976674914,-0.5138924718,0,6,4,0 +0.5012444854,-1.3591595888,0,9,7,0 +0.5456602573,0.0567612983,0,5,5,0 +0.5634447336,1.2903038263,0,7,8,0 +0.5983847380,-0.6872945428,0,6,5,0 +0.6183075905,0.7253564000,0,1,5,0 +0.6413634419,1.6118478775,0,4,3,0 +0.6482065916,1.2488127947,0,7,1,0 +0.6522977948,-0.4748489261,0,6,6,0 +0.6631931663,0.4219789803,0,4,8,0 +0.6953295469,-1.0251801014,0,10,6,0 +0.6986964941,-0.3038678169,0,9,9,0 +0.8503285050,1.8723217249,0,8,2,0 +0.9026033878,-1.0245078802,0,10,10,0 +0.9204394221,0.4229967892,0,6,10,0 +0.9228412509,0.4940861166,0,1,1,0 +0.9359286427,1.3081433773,0,9,2,0 +0.9685080647,1.2934249640,0,2,10,0 +0.9945486188,-0.5332730412,0,5,1,0 +1.0105472803,-0.1284428090,0,9,3,0 +1.0721468925,-1.5399883986,0,6,8,0 +1.1205748320,0.6894677877,0,8,5,0 +1.1252909899,-1.2204582691,0,1,10,0 +1.1561176777,-0.9787744284,0,9,8,0 +1.1946246624,-0.0799055845,0,10,8,0 +1.2046658993,-0.8231971860,0,6,7,0 +1.2189750671,0.5437637568,0,3,4,0 +1.2277959585,1.3177309036,0,9,10,0 +1.2413842678,0.6673717499,0,8,9,0 +1.2569460869,-0.0167010967,0,6,3,0 +1.2587834597,-0.4196293950,0,1,8,0 +1.2782599926,-0.6420007348,0,8,1,0 +1.2911227942,1.1136496067,0,9,4,0 +1.2973045111,-0.3824758530,0,7,9,0 +1.3675237894,1.2361305952,0,5,9,0 +1.3778325319,-1.0304020643,0,5,4,0 +1.3857760429,0.3235974312,0,2,3,0 +1.3960508108,-0.4157371819,0,2,5,0 +1.4190907478,0.9920675159,0,1,2,0 +1.4420653582,-0.9114651084,0,4,1,0 +1.5038720369,-1.0453398228,0,3,2,0 +1.5394419432,-0.1935533732,0,4,4,0 +1.5747014284,0.0698969364,0,9,6,0 +1.6199581623,1.3169367313,0,4,2,0 +1.6392902136,-0.3978092670,0,7,10,0 +1.6421631575,-0.7466211319,0,5,8,0 +1.6952790022,-0.0158907417,0,5,6,0 +1.7640979290,1.0598815680,0,7,4,0 +1.9505974054,0.0092241317,0,10,7,0 +2.0685675144,0.1434842199,0,8,8,0 +2.1190843582,0.6173521280,0,3,1,0 +2.1889939308,-1.9780639410,0,3,7,0 +2.2176725864,-1.5379956961,0,7,3,0 +2.2831020355,0.5082080960,0,2,2,0 +2.3055832386,1.0296376944,0,7,2,0 +2.3692295551,2.1091823578,0,10,2,0 +2.7510018349,0.2632481158,0,2,9,0 +2.7675759792,-0.0022486539,0,8,6,0 +2.7777233124,-1.3771806955,0,10,4,0 +2.7846245766,0.1415781677,0,10,5,0 +2.7860391140,-2.2442505360,0,4,9,0 +2.9671635628,0.2927849889,0,5,3,0 +2.9819300175,-0.8325243592,0,4,10,0 +3.1186814308,-0.4090226293,0,2,8,0 +3.2802021503,-0.4062994719,0,4,7,0 +3.4179122448,0.0959109142,0,8,7,0 +3.6803083420,2.3073217869,0,1,9,0 +3.7194297314,0.3930145800,0,3,9,0 +3.7777581215,0.2952103019,0,6,2,0 +4.1290211678,-1.4121559858,0,5,7,0 +4.2730326653,-0.5140260458,0,10,9,0 +4.2883334160,-1.0160779953,0,7,7,0 +4.3528537750,0.7201576829,0,10,3,0 +4.9981565475,0.2207358032,0,3,6,0 +5.0979351997,0.7166025043,0,9,5,0 +7.3969793320,2.1998977661,0,5,10,0 +8.4651517868,0.1178035960,0,1,6,0 +9.8326959610,0.7707119584,0,1,3,0 diff --git a/tests/data/ppmlhdfe_separation_examples/02.csv b/tests/data/ppmlhdfe_separation_examples/02.csv new file mode 100644 index 00000000..a7823886 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/02.csv @@ -0,0 +1,6 @@ +y,id1,id2,separated +0,1,1,0 +1,1,1,0 +0,2,1,1 +0,2,2,0 +1,2,2,0 diff --git a/tests/data/ppmlhdfe_separation_examples/03.csv b/tests/data/ppmlhdfe_separation_examples/03.csv new file mode 100644 index 00000000..0592255b --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/03.csv @@ -0,0 +1,17 @@ +y,id1,id2,id3,separated +0,5,3,6,1 +0,5,5,7,1 +1,1,3,1,0 +1,1,5,2,0 +1,2,4,1,0 +1,2,6,2,0 +1,3,1,3,0 +1,3,7,4,0 +1,4,2,3,0 +1,4,8,4,0 +1,5,1,5,0 +1,5,7,8,0 +1,6,2,5,0 +1,6,4,6,0 +1,6,6,7,0 +1,6,8,8,0 diff --git a/tests/data/ppmlhdfe_separation_examples/04.csv b/tests/data/ppmlhdfe_separation_examples/04.csv new file mode 100644 index 00000000..6cf9a107 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/04.csv @@ -0,0 +1,101 @@ +y,id1,id2,separated +0,1,1,1 +0,1,1,1 +0,1,34,1 +0,2,1,1 +0,2,2,1 +0,2,2,1 +0,3,3,1 +0,3,3,1 +0,4,3,1 +0,5,4,1 +0,5,5,1 +0,5,5,1 +0,6,5,1 +0,6,6,0 +0,7,6,1 +0,7,7,0 +0,8,7,1 +0,8,8,1 +0,8,8,1 +0,9,8,1 +0,9,9,1 +0,9,9,1 +0,10,10,1 +0,10,10,1 +0,11,10,1 +0,11,11,1 +0,11,11,1 +0,12,11,1 +0,12,12,1 +0,12,12,1 +0,13,12,1 +0,13,13,1 +0,13,13,1 +0,14,13,1 +0,14,14,1 +0,14,14,1 +0,15,14,1 +0,15,15,1 +0,15,15,1 +0,16,15,1 +0,16,16,1 +0,16,16,1 +0,17,16,1 +0,17,17,1 +0,17,17,1 +0,18,17,1 +0,18,18,0 +0,19,18,1 +0,19,19,1 +0,19,19,1 +0,20,19,1 +0,20,20,0 +0,21,20,1 +0,21,21,1 +0,21,21,1 +0,22,21,1 +0,22,22,1 +0,22,22,1 +0,23,22,1 +0,23,23,1 +0,23,23,1 +0,24,23,1 +0,24,24,1 +0,24,24,1 +0,25,24,1 +0,25,25,1 +0,25,25,1 +0,26,25,1 +0,26,26,1 +0,26,26,1 +0,27,26,1 +0,27,27,1 +0,27,27,1 +0,28,27,1 +0,28,28,0 +0,29,28,1 +0,29,29,1 +0,29,29,1 +0,30,29,1 +0,30,30,0 +0,31,30,1 +0,31,31,1 +0,31,31,1 +0,32,31,1 +0,32,32,1 +0,32,32,1 +0,33,32,1 +0,33,33,1 +0,33,33,1 +1,3,2,0 +1,4,4,0 +1,4,4,0 +1,6,6,0 +1,7,7,0 +1,10,9,0 +1,18,18,0 +1,20,20,0 +1,28,28,0 +1,30,30,0 +1,34,33,0 diff --git a/tests/data/ppmlhdfe_separation_examples/05.csv b/tests/data/ppmlhdfe_separation_examples/05.csv new file mode 100644 index 00000000..23ca85a3 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/05.csv @@ -0,0 +1,10 @@ +y,x1,x2,x3,x4,separated +0,5,11,2,2,0 +0,5,2,11,2,0 +0,5,2,2,11,0 +0,0,-1,-1,-1,1 +1,5,5,5,5,0 +2,4,4,4,4,0 +3,3,3,3,3,0 +4,2,2,2,2,0 +5,1,1,1,1,0 diff --git a/tests/data/ppmlhdfe_separation_examples/06.csv b/tests/data/ppmlhdfe_separation_examples/06.csv new file mode 100644 index 00000000..e2d9e014 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/06.csv @@ -0,0 +1,12 @@ +y,x1,x2,x3,x4,separated +0,0,0,0,0,0 +0,0,0,0,0,0 +0,5,11,2,2,0 +0,5,2,11,2,0 +0,5,2,2,11,0 +0,0,-1,-1,-1,1 +1,5,5,5,5,0 +2,4,4,4,4,0 +3,3,3,3,3,0 +4,2,2,2,2,0 +5,1,1,1,1,0 diff --git a/tests/data/ppmlhdfe_separation_examples/07.csv b/tests/data/ppmlhdfe_separation_examples/07.csv new file mode 100644 index 00000000..6fef9727 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/07.csv @@ -0,0 +1,19 @@ +y,x1,x2,id1,id2,separated +0,0,0,1,1,0 +0,0,0,1,1,0 +0,0,0,1,2,0 +0,0,0,1,3,0 +0,2,0,1,3,0 +0,0,1,2,2,1 +0,0,0,2,2,0 +0,0,0,2,2,0 +0,1,0,2,2,0 +0,1,2,2,3,1 +0,0,1,2,4,1 +0,0,0,4,2,0 +0,0,1,5,2,1 +1,0,0,1,1,0 +1,1,0,4,3,0 +1,0,0,5,4,0 +2,0,0,1,2,0 +2,0,0,2,1,0 diff --git a/tests/data/ppmlhdfe_separation_examples/08.csv b/tests/data/ppmlhdfe_separation_examples/08.csv new file mode 100644 index 00000000..631be7cf --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/08.csv @@ -0,0 +1,10 @@ +y,x1,x2,id1,id2,separated +0,1,0,1,2,1 +0,1,0,2,2,0 +1,2,0,1,3,0 +1,0,0,2,1,0 +1,1,0,2,2,0 +1,0,1,2,3,0 +2,0,0,2,1,0 +2,0,1,2,2,0 +2,1,0,2,3,0 diff --git a/tests/data/ppmlhdfe_separation_examples/09.csv b/tests/data/ppmlhdfe_separation_examples/09.csv new file mode 100644 index 00000000..6d407e40 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/09.csv @@ -0,0 +1,8 @@ +y,x1,x2,x3,separated +0.0000000000,0,0,1,0 +0.0000000000,0,0,2,0 +0.0000000000,0,0,1,0 +0.9788354000,3,2,1,0 +2.2596662000,1,1,2,0 +2.4177196000,2,2,2,0 +2.6114680000,4,2,1,0 diff --git a/tests/data/ppmlhdfe_separation_examples/10.csv b/tests/data/ppmlhdfe_separation_examples/10.csv new file mode 100644 index 00000000..249d7c0e --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/10.csv @@ -0,0 +1,11 @@ +y,x1,x2,x3,separated +0.0000000000,-5,0,1,1 +0.0000000000,-2,0,1,1 +0.0000000000,0,2,1,1 +0.0000000000,0,0,2,0 +0.0000000000,0,0,3,1 +0.0000000000,2,3,4,1 +0.5000000000,2,2,2,0 +1.1000000000,1,1,2,0 +2.1000000000,4,2,4,0 +3.3000000000,3,3,2,0 diff --git a/tests/data/ppmlhdfe_separation_examples/11.csv b/tests/data/ppmlhdfe_separation_examples/11.csv new file mode 100644 index 00000000..58bacfcc --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/11.csv @@ -0,0 +1,359 @@ +y,x1,separated +0.0000000000,-4.7028898,0 +0.0000000000,-4.6912823,0 +0.0000000000,-4.6912823,0 +0.0000000000,-4.4849668,0 +0.0000000000,-4.4849668,0 +0.0000000000,-4.4845267,0 +0.0000000000,-4.4830111,0 +0.0000000000,-4.2672952,0 +0.0000000000,-4.2615407,0 +0.0000000000,-4.2591059,0 +0.0000000000,-4.2567867,0 +0.0000000000,-4.2522058,0 +0.0000000000,-4.1484823,0 +0.0000000000,-4.0571199,0 +0.0000000000,-4.0423191,0 +0.0000000000,-3.9856916,0 +0.0000000000,-3.9597617,0 +0.0000000000,-3.8967899,0 +0.0000000000,-3.7462106,0 +0.0000000000,-3.5773098,0 +0.0000000000,-3.5668417,0 +0.0000000000,-3.5650773,0 +0.0000000000,-3.560908,0 +0.0000000000,-3.5599101,0 +0.0000000000,-3.5495663,0 +0.0000000000,-3.5495663,0 +0.0000000000,-3.528095,0 +0.0000000000,-3.528095,0 +0.0000000000,-3.5224364,0 +0.0000000000,-3.5224364,0 +0.0000000000,-3.5173011,0 +0.0000000000,-3.5135848,0 +0.0000000000,-3.4672173,0 +0.0000000000,-3.4652553,0 +0.0000000000,-3.45652,0 +0.0000000000,-3.4512085,0 +0.0000000000,-3.4512085,0 +0.0000000000,-3.3686349,0 +0.0000000000,-3.3686349,0 +0.0000000000,-3.349309,0 +0.0000000000,-3.3492445,0 +0.0000000000,-3.2919041,0 +0.0000000000,-3.2878591,0 +0.0000000000,-3.2586597,0 +0.0000000000,-3.2522087,0 +0.0000000000,-3.248818,0 +0.0000000000,-3.2035356,0 +0.0000000000,-3.1508029,0 +0.0000000000,-3.1352705,0 +0.0000000000,-3.1008907,0 +0.0000000000,-3.0952001,0 +0.0000000000,-3.0935383,0 +0.0000000000,-3.0733587,0 +0.0000000000,-3.0199775,0 +0.0000000000,-2.912453,0 +0.0000000000,-2.8678961,0 +0.0000000000,-2.8620788,0 +0.0000000000,-2.8603259,0 +0.0000000000,-2.8390441,0 +0.0000000000,-2.8000342,0 +0.0000000000,-2.7374803,0 +0.0000000000,-2.7209113,0 +0.0000000000,-2.5716397,0 +0.0000000000,-2.5705625,0 +0.0000000000,-2.5329383,0 +0.0000000000,-2.5209519,0 +0.0000000000,-2.5100772,0 +0.0000000000,-2.4988627,0 +0.0000000000,-2.4806327,0 +0.0000000000,-2.4716664,0 +0.0000000000,-2.4136075,0 +0.0000000000,-2.380103,0 +0.0000000000,-2.3790771,0 +0.0000000000,-2.3757199,0 +0.0000000000,-2.0949204,0 +0.0000000000,-2.0817463,0 +0.0000000000,-2.007045,0 +0.0000000000,-1.9941156,0 +0.0000000000,-1.9785869,0 +0.0000000000,-1.9785869,0 +1.0000000000,-4.6912823,0 +1.0000000000,-4.4849668,0 +1.0000000000,-4.4845267,0 +1.0000000000,-4.2672952,0 +1.0000000000,-4.2591059,0 +1.0000000000,-4.2567867,0 +1.0000000000,-4.2522058,0 +1.0000000000,-4.2453677,0 +1.0000000000,-4.1879296,0 +1.0000000000,-4.0423191,0 +1.0000000000,-4.0196778,0 +1.0000000000,-3.9856916,0 +1.0000000000,-3.9597617,0 +1.0000000000,-3.8967899,0 +1.0000000000,-3.5865002,0 +1.0000000000,-3.5773098,0 +1.0000000000,-3.5695546,0 +1.0000000000,-3.5695546,0 +1.0000000000,-3.5656664,0 +1.0000000000,-3.5656664,0 +1.0000000000,-3.528095,0 +1.0000000000,-3.528095,0 +1.0000000000,-3.5224364,0 +1.0000000000,-3.4672221,0 +1.0000000000,-3.4672221,0 +1.0000000000,-3.4672173,0 +1.0000000000,-3.4652553,0 +1.0000000000,-3.4634475,0 +1.0000000000,-3.4634475,0 +1.0000000000,-3.4602808,0 +1.0000000000,-3.4602808,0 +1.0000000000,-3.45652,0 +1.0000000000,-3.45652,0 +1.0000000000,-3.4556436,0 +1.0000000000,-3.4556436,0 +1.0000000000,-3.4512085,0 +1.0000000000,-3.3035746,0 +1.0000000000,-3.2919041,0 +1.0000000000,-3.2910526,0 +1.0000000000,-3.2910526,0 +1.0000000000,-3.2887994,0 +1.0000000000,-3.2887994,0 +1.0000000000,-3.2878591,0 +1.0000000000,-3.2877296,0 +1.0000000000,-3.2877296,0 +1.0000000000,-3.2863158,0 +1.0000000000,-3.248818,0 +1.0000000000,-3.2232136,0 +1.0000000000,-3.2232136,0 +1.0000000000,-3.1584606,0 +1.0000000000,-3.1508029,0 +1.0000000000,-3.1080069,0 +1.0000000000,-3.1008907,0 +1.0000000000,-3.0199775,0 +1.0000000000,-3.013773,0 +1.0000000000,-2.9881708,0 +1.0000000000,-2.8961343,0 +1.0000000000,-2.8678961,0 +1.0000000000,-2.8620788,0 +1.0000000000,-2.8603259,0 +1.0000000000,-2.855998,0 +1.0000000000,-2.8242449,0 +1.0000000000,-2.8014585,0 +1.0000000000,-2.8013567,0 +1.0000000000,-2.8013567,0 +1.0000000000,-2.7763037,0 +1.0000000000,-2.7695435,0 +1.0000000000,-2.7695435,0 +1.0000000000,-2.7689878,0 +1.0000000000,-2.7374803,0 +1.0000000000,-2.7186925,0 +1.0000000000,-2.6648361,0 +1.0000000000,-2.6563301,0 +1.0000000000,-2.6110737,0 +1.0000000000,-2.6109593,0 +1.0000000000,-2.5994501,0 +1.0000000000,-2.5994501,0 +1.0000000000,-2.581458,0 +1.0000000000,-2.5724507,0 +1.0000000000,-2.5716397,0 +1.0000000000,-2.5655839,0 +1.0000000000,-2.5558162,0 +1.0000000000,-2.5255902,0 +1.0000000000,-2.5136308,0 +1.0000000000,-2.5051369,0 +1.0000000000,-2.4136075,0 +1.0000000000,-2.4025292,0 +1.0000000000,-2.3804496,0 +1.0000000000,-2.3757199,0 +1.0000000000,-2.3704439,0 +1.0000000000,-2.3264626,0 +1.0000000000,-2.3264626,0 +1.0000000000,-2.3244218,0 +1.0000000000,-2.3232603,0 +1.0000000000,-2.2708158,0 +1.0000000000,-2.2199187,0 +1.0000000000,-2.1986774,0 +1.0000000000,-2.1904458,0 +1.0000000000,-2.1751882,0 +1.0000000000,-2.1369462,0 +1.0000000000,-2.1277875,0 +1.0000000000,-2.0965511,0 +1.0000000000,-2.0817463,0 +1.0000000000,-2.0534024,0 +1.0000000000,-2.0534024,0 +1.0000000000,-2.0441704,0 +1.0000000000,-2.0409411,0 +1.0000000000,-2.0291046,0 +1.0000000000,-1.9690826,0 +1.0000000000,-1.9561721,0 +1.0000000000,-1.8814413,0 +1.0000000000,-1.7268887,0 +1.0000000000,-1.0845564,0 +2.0000000000,-715.89342,0 +2.0000000000,-4.1484823,0 +2.0000000000,-3.5865002,0 +2.0000000000,-3.560908,0 +2.0000000000,-3.5599101,0 +2.0000000000,-3.3686349,0 +2.0000000000,-3.3492445,0 +2.0000000000,-3.2035356,0 +2.0000000000,-3.0952001,0 +2.0000000000,-3.0935001,0 +2.0000000000,-3.0375258,0 +2.0000000000,-2.968822,0 +2.0000000000,-2.8242449,0 +2.0000000000,-2.6023833,0 +2.0000000000,-2.5724507,0 +2.0000000000,-2.4268614,0 +2.0000000000,-2.41156,0 +2.0000000000,-2.2486649,0 +2.0000000000,-2.1999828,0 +2.0000000000,-2.1663907,0 +2.0000000000,-2.0555019,0 +2.0000000000,-1.9756922,0 +2.0000000000,-1.9445848,0 +2.0000000000,-1.9176316,0 +2.0000000000,-1.8930948,0 +2.0000000000,-1.8714506,0 +2.0000000000,-1.7522567,0 +2.0000000000,-1.6761822,0 +3.0000000000,-2.5957845,0 +3.0000000000,-2.5920771,0 +3.0000000000,-2.5920771,0 +3.0000000000,-2.5255902,0 +3.0000000000,-2.5136308,0 +3.0000000000,-1.5210485,0 +3.0000000000,-1.3909759,0 +3.0000000000,-1.0981068,0 +5.0000000000,-2.8014585,0 +5.0000000000,-2.5957845,0 +5.0000000000,-2.2708158,0 +5.0000000000,-2.1999828,0 +5.0000000000,-2.0555019,0 +5.0000000000,-1.9445848,0 +5.0000000000,-1.8869597,0 +5.0000000000,-1.8110781,0 +5.0000000000,-1.7933729,0 +5.0000000000,-1.7522567,0 +5.0000000000,-1.6901488,0 +5.0000000000,-1.6569215,0 +5.0000000000,-1.6236716,0 +5.0000000000,-1.6028269,0 +5.0000000000,-1.6028269,0 +5.0000000000,-1.6019071,0 +5.0000000000,-1.4646116,0 +5.0000000000,-1.4414754,0 +5.0000000000,-1.4047494,0 +5.0000000000,-1.2623378,0 +5.0000000000,-1.2616706,0 +5.0000000000,-1.2616706,0 +5.0000000000,-1.2616706,0 +5.0000000000,-1.1432591,0 +5.0000000000,-1.0120547,0 +6.0000000000,-2.4806327,0 +6.0000000000,-2.3437126,0 +6.0000000000,-2.0594384,0 +6.0000000000,-2.0337943,0 +6.0000000000,-2.0291046,0 +6.0000000000,-1.9561721,0 +6.0000000000,-1.9176316,0 +6.0000000000,-1.8814413,0 +6.0000000000,-1.8245799,0 +6.0000000000,-1.7933729,0 +6.0000000000,-1.7268887,0 +6.0000000000,-1.6837359,0 +6.0000000000,-1.6518409,0 +6.0000000000,-1.6170926,0 +6.0000000000,-1.5999804,0 +6.0000000000,-1.3853858,0 +6.0000000000,-1.332609,0 +6.0000000000,-1.2041231,0 +6.0000000000,-1.2041231,0 +6.0000000000,-1.1847808,0 +6.0000000000,-1.0171419,0 +6.0000000000,-.9524537,0 +6.0000000000,-.9365643,0 +6.0000000000,-.9365643,0 +7.0000000000,-1.8842221,0 +7.0000000000,-1.7928098,0 +7.0000000000,-1.7579167,0 +7.0000000000,-1.7182541,0 +7.0000000000,-1.6430042,0 +7.0000000000,-1.6430042,0 +7.0000000000,-1.5900827,0 +7.0000000000,-1.5291547,0 +7.0000000000,-1.4975146,0 +7.0000000000,-1.4788243,0 +7.0000000000,-1.4646116,0 +7.0000000000,-1.4620534,0 +7.0000000000,-1.1847808,0 +7.0000000000,-1.0287132,0 +7.0000000000,-1.0240032,0 +7.0000000000,-1.0171419,0 +7.0000000000,-1.0120547,0 +8.0000000000,-1.9785869,0 +8.0000000000,-1.5005933,0 +8.0000000000,-1.2477662,0 +8.0000000000,-1.1432591,0 +8.0000000000,-1.0265733,0 +8.0000000000,-.97210824,0 +8.0000000000,-.89774135,0 +9.0000000000,-1.9581937,0 +9.0000000000,-1.6902209,0 +9.0000000000,-1.6837359,0 +9.0000000000,-1.6430768,0 +9.0000000000,-1.4719776,0 +9.0000000000,-1.2583967,0 +9.0000000000,-1.1179997,0 +9.0000000000,-1.0287132,0 +9.0000000000,-1.0240032,0 +9.0000000000,-.97210824,0 +9.0000000000,-.93326825,0 +9.0000000000,-.82596957,0 +9.0000000000,-.32737491,0 +9.0000000000,-.23161159,0 +10.0000000000,-1.5995624,0 +10.0000000000,-1.5452971,0 +10.0000000000,-1.4637348,0 +10.0000000000,-1.3294306,0 +10.0000000000,-1.1891538,0 +10.0000000000,-.82596957,0 +10.0000000000,-.79784067,0 +11.0000000000,-1.3909759,0 +11.0000000000,-1.3510464,0 +11.0000000000,-1.2596354,0 +11.0000000000,-1.2583967,0 +11.0000000000,-1.2041231,0 +12.0000000000,-1.6493104,0 +12.0000000000,-1.2596354,0 +12.0000000000,-1.249949,0 +12.0000000000,-1.0265733,0 +12.0000000000,-1.010663,0 +12.0000000000,-.79784067,0 +13.0000000000,-1.4577824,0 +13.0000000000,-1.0845564,0 +13.0000000000,-.77647397,0 +14.0000000000,-.93326825,0 +14.0000000000,-.77647397,0 +14.0000000000,-.1916807,0 +15.0000000000,-1.2893075,0 +15.0000000000,-1.1717176,0 +15.0000000000,-1.0814776,0 +15.0000000000,-.76265301,0 +16.0000000000,-.89774135,0 +16.0000000000,-.76265301,0 +16.0000000000,-.34020503,0 +16.0000000000,-.34020503,0 +16.0000000000,-.23161159,0 +17.0000000000,-.49557516,0 +18.0000000000,-1.3510464,0 +19.0000000000,-.1916807,0 +21.0000000000,-.49557516,0 +22.0000000000,-1.23056,0 +23.0000000000,-1.0981068,0 +25.0000000000,.31510079,0 +35.0000000000,.31510079,0 +62.0000000000,-1.010663,0 diff --git a/tests/data/ppmlhdfe_separation_examples/12.csv b/tests/data/ppmlhdfe_separation_examples/12.csv new file mode 100644 index 00000000..92fadf60 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/12.csv @@ -0,0 +1,19 @@ +y,id1,id2,separated +0,3,3,0 +0,4,3,1 +0,4,4,0 +0,6,6,0 +0,7,7,0 +0,8,8,0 +0,9,9,0 +1,1,1,0 +1,2,2,0 +1,2,2,0 +1,3,3,0 +1,4,4,0 +1,5,5,0 +1,6,6,0 +1,7,7,0 +1,8,8,0 +1,9,9,0 +1,10,10,0 diff --git a/tests/data/ppmlhdfe_separation_examples/13.csv b/tests/data/ppmlhdfe_separation_examples/13.csv new file mode 100644 index 00000000..92fadf60 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/13.csv @@ -0,0 +1,19 @@ +y,id1,id2,separated +0,3,3,0 +0,4,3,1 +0,4,4,0 +0,6,6,0 +0,7,7,0 +0,8,8,0 +0,9,9,0 +1,1,1,0 +1,2,2,0 +1,2,2,0 +1,3,3,0 +1,4,4,0 +1,5,5,0 +1,6,6,0 +1,7,7,0 +1,8,8,0 +1,9,9,0 +1,10,10,0 diff --git a/tests/data/ppmlhdfe_separation_examples/14.csv b/tests/data/ppmlhdfe_separation_examples/14.csv new file mode 100644 index 00000000..38dda0d0 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/14.csv @@ -0,0 +1,15 @@ +y,x1,x2,separated +0,1,0,0 +0,1,0,0 +0,-1,0,0 +0,0,1,0 +0,0,1,0 +0,0,1,0 +0,0,-1,0 +0,0,-1,0 +1,0,0,0 +2,0,0,0 +3,0,0,0 +4,0,0,0 +5,0,0,0 +6,0,0,0 diff --git a/tests/data/ppmlhdfe_separation_examples/15.csv b/tests/data/ppmlhdfe_separation_examples/15.csv new file mode 100644 index 00000000..a520162f --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/15.csv @@ -0,0 +1,24 @@ +y,x1,x2,x3,separated +0,1,0,0,0 +0,1,0,0,0 +0,-1,0,0,0 +0,0,1,0,0 +0,0,1,0,0 +0,0,1,0,0 +0,0,-1,0,0 +0,0,-1,0,0 +0,0,0,1,1 +0,0,0,2,1 +0,0,0,3,1 +0,0,0,4,1 +0,0,0,5,1 +0,0,0,6,1 +0,0,0,7,1 +0,0,0,8,1 +0,0,0,1000,1 +1,0,0,0,0 +2,0,0,0,0 +3,0,0,0,0 +4,0,0,0,0 +5,0,0,0,0 +6,0,0,0,0 diff --git a/tests/data/ppmlhdfe_separation_examples/16.csv b/tests/data/ppmlhdfe_separation_examples/16.csv new file mode 100644 index 00000000..4e50b5c4 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/16.csv @@ -0,0 +1,101 @@ +y,x1,x2,x3,separated +0,3,-1,2,1 +0,5,2,-9,1 +0,5,4,0,1 +0,4.5,3,0,1 +0,-3,-13,1,1 +0,6.5,6,2,1 +0,5,2,10,1 +0,1.5,-3,-2,1 +0,1,-4,1,1 +0,4,0,-7,1 +0,8.5,11,5,1 +0,4,2,9,1 +0,.5,-6,7,1 +0,2,-3,-3,1 +0,4.5,3,7,1 +0,.5,-5,-29,1 +0,4,2,7,1 +0,6,6,-19,1 +0,3.5,-1,2,1 +0,2.5,-1,-17,1 +0,2.5,0,3,1 +0,.5,-6,-44,1 +0,5,2,1,1 +0,3.5,0,-8,1 +0,1,-4,0,1 +0,7.5,8,-36,1 +0,2,-3,-6,1 +0,3,-3,-41,1 +0,4.5,1,1,1 +0,0,-4,-5,0 +0,1.5,-1,8,0 +0,.5,-3,-28,0 +0,-2,-8,-5,0 +0,4.5,5,4,0 +0,-1,-6,1,0 +0,4.5,5,-31,0 +0,4,4,-2,0 +0,3.5,3,-35,0 +0,-2.5,-9,-9,0 +0,-1.5,-7,-17,0 +0,3.5,3,-2,0 +0,3.5,3,-25,0 +0,3,2,-2,0 +0,0,-4,-25,0 +0,3.5,3,-5,0 +0,-.5,-5,-33,0 +0,3,2,-4,0 +0,0,-4,-23,0 +0,4.5,5,-6,0 +0,6,8,-54,0 +0,0,-4,-4,0 +0,4.5,5,-30,0 +0,2,0,-12,0 +0,-.5,-5,-14,0 +0,-2,-8,8,0 +0,1.5,-1,-3,0 +0,-1,-6,-7,0 +0,2.5,1,-51,0 +0,2.5,1,8,0 +0,-1,-6,-28,0 +0,1,-2,-2,0 +0,1,-2,-27,0 +0,2.5,1,4,0 +0,1,-2,-33,0 +0,2.5,1,-3,0 +0,.5,-3,-36,0 +0,2,0,-4,0 +0,5,6,-25,0 +0,-4.5,-13,9,0 +0,5,6,-30,0 +0,1,-2,6,0 +0,1,-2,-44,0 +0,1,-2,-3,0 +0,-.5,-5,-13,0 +0,-1.5,-7,-4,0 +0,0,-4,-18,0 +0,0,-4,4,0 +0,-1,-6,-9,0 +0,1,-2,-3,0 +0,0,-4,-24,0 +0,3.5,3,-2,0 +0,-2,-8,-29,0 +0,2.5,1,-4,0 +0,3,2,-10,0 +0,0,-4,-2,0 +.149277,.5,-3,-33,0 +.190858,4,4,4,0 +.341605,3,2,1,0 +.3791031,2,0,-4,0 +.3860548,4,4,-30,0 +.4186196,7,10,4,0 +.535357,2.5,1,-53,0 +.7474937,8,12,-3,0 +1.188081,2,0,-36,0 +1.461509,4.5,5,11,0 +1.809239,2,0,-40,0 +1.940061,5.5,7,-6,0 +2.535522,2.5,1,-23,0 +2.784749,1.5,-1,2,0 +5.984444,7,10,-45,0 diff --git a/tests/data/ppmlhdfe_separation_examples/17.csv b/tests/data/ppmlhdfe_separation_examples/17.csv new file mode 100644 index 00000000..39256f57 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/17.csv @@ -0,0 +1,101 @@ +y,x1,x2,x3,separated +0,-3,5,4,1 +0,4,3.5,0,1 +0,-5,5,5,1 +0,-2,3,-3,1 +0,5,6,3,1 +0,3,2.5,-2,1 +0,5,5.5,5,1 +0,-25,1.5,-3,1 +0,4,4,0,1 +0,-16,6,6,1 +0,-3,5,2,1 +0,-25,6.5,8,1 +0,3,2,-4,1 +0,-44,-1,-8,1 +0,0,3.5,1,1 +0,-2,7,6,1 +0,-6,3.5,1,1 +0,-14,7,6,1 +0,-1,1,-5,1 +0,-20,5,3,1 +0,-4,6.5,4,1 +0,-13,6,3,1 +0,-6,5,3,1 +0,-3,5.5,3,1 +0,2,6.5,6,1 +0,-17,3.5,0,1 +0,1,3,0,1 +0,-24,4.5,4,1 +0,-5,4.5,3,1 +0,-36,1.5,-1,0 +0,6,0,-4,0 +0,-33,1,-2,0 +0,-4,1.5,-1,0 +0,-17,4,4,0 +0,4,3.5,3,0 +0,-6,.5,-3,0 +0,-8,-3,-10,0 +0,-34,-3.5,-11,0 +0,12,-.5,-5,0 +0,-22,1.5,-1,0 +0,4,3,2,0 +0,-3,.5,-3,0 +0,8,.5,-3,0 +0,-37,3,2,0 +0,1,-1,-6,0 +0,-24,3,2,0 +0,2,-2,-8,0 +0,2,-2,-8,0 +0,2,4.5,5,0 +0,-41,6,8,0 +0,-5,4,4,0 +0,-23,1.5,-1,0 +0,-6,-1,-6,0 +0,-42,-1,-6,0 +0,-3,3,2,0 +0,-45,1.5,-1,0 +0,0,4,4,0 +0,2,2.5,1,0 +0,-6,5,6,0 +0,-34,1.5,-1,0 +0,-4,4.5,5,0 +0,-15,3.5,3,0 +0,6,2,0,0 +0,-16,1.5,-1,0 +0,6,2,0,0 +0,-37,1.5,-1,0 +0,0,1.5,-1,0 +0,-9,-6.5,-17,0 +0,6,5.5,7,0 +0,-10,1,-2,0 +0,1,5.5,7,0 +0,-35,.5,-3,0 +0,-7,5.5,7,0 +0,-39,2.5,1,0 +0,-16,3.5,3,0 +0,-12,1,-2,0 +0,-5,2.5,1,0 +0,-37,2.5,1,0 +0,-8,-1,-6,0 +0,-19,7,10,0 +0,2,0,-4,0 +0,-43,3,2,0 +0,2,4.5,5,0 +0,-15,-2,-8,0 +0,-2,1.5,-1,0 +0,-19,2,0,0 +0,1,3,2,0 +0,-17,0,-4,0 +0,-8,-2,-8,0 +0,-37,1,-2,0 +0,3,4.5,5,0 +0,-45,3,2,0 +0,1,1,-2,0 +0,-29,1.5,-1,0 +0,2,0,-4,0 +.3131518,-8,-.5,-5,0 +.3476705,-6,0,-4,0 +.8587969,-19,1,-2,0 +3.179766,-4,-.5,-5,0 +4.394069,-22,1.5,-1,0 diff --git a/tests/data/ppmlhdfe_separation_examples/18.csv b/tests/data/ppmlhdfe_separation_examples/18.csv new file mode 100644 index 00000000..f191b713 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/18.csv @@ -0,0 +1,101 @@ +y,x1,x2,x3,separated +0,4.5,3,-8,1 +0,5,3,4,1 +0,5.5,3,-1,1 +0,0,-6,-1,1 +0,1.5,-4,-4,1 +0,-1.5,-10,7,1 +0,3.5,-1,-8,1 +0,-1,-8,-2,1 +0,7.5,8,4,1 +0,-2,-9,-6,1 +0,2.5,-1,3,1 +0,4.5,1,-40,1 +0,3,-2,3,1 +0,0,-8,-22,1 +0,4.5,0,-2,1 +0,3.5,-1,-33,1 +0,3.5,1,-1,1 +0,2,-2,-56,1 +0,3,0,-2,1 +0,4,2,-39,1 +0,5.5,4,6,1 +0,4.5,2,2,1 +0,1.5,-4,-9,1 +0,-.5,-8,-34,1 +0,5.5,2,-9,1 +0,1,-4,-5,1 +0,1.5,-5,-9,1 +0,8,8,-34,1 +0,2.5,-3,0,1 +0,.5,-3,-43,0 +0,2.5,1,-1,0 +0,2,0,-6,0 +0,2,0,11,0 +0,1.5,-1,-44,0 +0,2.5,1,4,0 +0,2.5,1,-37,0 +0,.5,-3,0,0 +0,2.5,1,-52,0 +0,.5,-3,-4,0 +0,0,-4,-25,0 +0,2.5,1,2,0 +0,3.5,3,-45,0 +0,4.5,5,-1,0 +0,4.5,5,-47,0 +0,2.5,1,9,0 +0,-2,-8,-44,0 +0,4,4,-3,0 +0,5,6,-34,0 +0,0,-4,-7,0 +0,3,2,-32,0 +0,4,4,-1,0 +0,5,6,-37,0 +0,-1,-6,-3,0 +0,.5,-3,-48,0 +0,3,2,-1,0 +0,3,2,-14,0 +0,0,-4,0,0 +0,6,8,-14,0 +0,1.5,-1,5,0 +0,2.5,1,-11,0 +0,.5,-3,-2,0 +0,-.5,-5,-21,0 +0,1,-2,-5,0 +0,-.5,-5,-52,0 +0,-1.5,-7,7,0 +0,-1,-6,-56,0 +0,2.5,1,-8,0 +0,3,2,-39,0 +0,1.5,-1,-5,0 +0,-2.5,-9,-48,0 +0,.5,-3,7,0 +0,4,4,-48,0 +0,2,0,2,0 +0,4,4,0,0 +0,1,-2,0,0 +0,1.5,-1,-42,0 +0,2.5,1,-2,0 +0,1,-2,-13,0 +0,5.5,7,6,0 +0,6.5,9,-54,0 +0,5.5,7,-4,0 +0,4,4,-24,0 +0,1.5,-1,1,0 +0,1.5,-1,-27,0 +0,4.5,5,3,0 +0,6.5,9,-36,0 +0,3,2,12,0 +0,-3,-10,-9,0 +0,1.5,-1,-9,0 +0,4.5,5,-14,0 +.2704671,3,2,3,0 +.3969807,1,-2,-28,0 +.4520356,-2,-8,9,0 +.6291217,1,-2,-42,0 +.7097536,0,-4,7,0 +1.218813,-2,-8,-7,0 +1.41596,0,-4,1,0 +1.53977,2.5,1,-46,0 +2.074442,-.5,-5,-1,0 +3.495168,-1,-6,-46,0 diff --git a/tests/data/ppmlhdfe_separation_examples/readme.md b/tests/data/ppmlhdfe_separation_examples/readme.md new file mode 100644 index 00000000..9989f224 --- /dev/null +++ b/tests/data/ppmlhdfe_separation_examples/readme.md @@ -0,0 +1,4 @@ +## Separation Data Sets + +All files in this document stem from the [pplmhdfe test suite](https://github.com/sergiocorreia/ppmlhdfe/tree/master/test/separation_datasets), +published under MIT license. From de016dc61557789ed90b745ab352347ae6f7b050 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Mon, 21 Oct 2024 21:29:31 +0200 Subject: [PATCH 10/12] catch linalg error after successful separation check for 0.8csv --- tests/test_poisson.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index e059c242..7ef1004e 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -1,3 +1,4 @@ +import contextlib import os import numpy as np @@ -28,7 +29,7 @@ def test_separation(): "Y": [0, 0, 0, 1, 2, 3], "fe1": ["a", "a", "b", "b", "b", "c"], "fe2": ["c", "c", "d", "d", "d", "e"], - "X": np.random.normal(0, 1, 6) + "X": np.random.normal(0, 1, 6), } ) with pytest.warns( @@ -60,7 +61,7 @@ def test_separation(): [fn for fn in os.listdir(os.path.join(path, folder)) if fn.endswith(".csv")] ) for fn in fns: - if fn == "07.csv": + if fn in ["07.csv"]: # this case fails but is not tested in ppmlhdfe # https://github.com/sergiocorreia/ppmlhdfe/blob/master/test/validate_tagsep.do#L27 continue @@ -89,11 +90,15 @@ def test_separation(): else: fml += f" | {' + '.join(fixed_effects)}" - with pytest.warns( - UserWarning, - match=f"{data.separated.sum()} observations removed because of separation.", + print("Testing separation check for", fn) + with ( + pytest.warns( + UserWarning, + match=f"{data.separated.sum()} observations removed because of separation.", + ), + contextlib.suppress(Exception), ): - pf.fepois(fml, data=data, separation_check=["ir"]) + pf.fepois(fml, data=data, separation_check=["ir"]) # noqa: F841 @pytest.mark.parametrize("fml", ["Y ~ X1", "Y ~ X1 | f1"]) From f68b3e7c4d53ce7a0ee381deb5ac31f383a90ef6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:59:50 +0000 Subject: [PATCH 11/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyfixest/estimation/fepois_.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 687a36c3..699b0deb 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -137,9 +137,9 @@ def prepare_model_matrix(self): # check for separation na_separation: list[int] = [] if ( - self._fe is not None - and self.separation_check is not None - and self.separation_check # not an empty list + self._fe is not None + and self.separation_check is not None + and self.separation_check # not an empty list ): na_separation = _check_for_separation( Y=self._Y, From 5cc064da62e49a5f51e5ebb60f0ff52fc40530ff Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Mon, 21 Oct 2024 22:20:51 +0200 Subject: [PATCH 12/12] test for syntax errors --- tests/test_errors.py | 27 +++++++++++++++++++++++++++ tests/test_poisson.py | 12 ++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/tests/test_errors.py b/tests/test_errors.py index b96d5c2e..8d6ded1f 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -657,3 +657,30 @@ def test_errors_panelview(): def test_split_fsplit_errors(data, split, fsplit, expected_exception, error_message): with pytest.raises(expected_exception, match=error_message): pf.feols("Y~X1", data=data, split=split, fsplit=fsplit) + + +def test_separation_check_validations(): + data = pd.DataFrame( + { + "Y": [1, 2, 3], + "X1": [4, 5, 6], + } + ) + + with pytest.raises( + ValueError, + match="The function argument `separation_check` must be a list of strings containing 'fe' and/or 'ir'.", + ): + pf.fepois("Y ~ X1", data=data, separation_check=["a"]) + + with pytest.raises( + TypeError, + match="The function argument `separation_check` must be of type list.", + ): + pf.fepois("Y ~ X1", data=data, separation_check="fe") + + with pytest.raises( + ValueError, + match="The function argument `separation_check` must be a list of strings containing 'fe' and/or 'ir'.", + ): + pf.fepois("Y ~ X1", data=data, separation_check=["fe", "invalid"]) diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 7ef1004e..3c00f5ce 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -75,9 +75,6 @@ def test_separation(): fixed_effects = data.columns[ data.columns.str.startswith("id") ] # fixed effects id1,...,id2 - if data.separated.sum() == 0: - # TODO: do not skip but update pytest.warn to confirm that no warning is produced - continue if regressors.empty: # TODO: formulae with just a constant term and fixed effects throw error in FIT.get_fit(), e.g., for 03.csv and Y ~ 1 | id1 + id2 + id3? @@ -90,15 +87,18 @@ def test_separation(): else: fml += f" | {' + '.join(fixed_effects)}" - print("Testing separation check for", fn) with ( pytest.warns( UserWarning, match=f"{data.separated.sum()} observations removed because of separation.", - ), + ) as record, contextlib.suppress(Exception), ): - pf.fepois(fml, data=data, separation_check=["ir"]) # noqa: F841 + pf.fepois(fml, data=data, separation_check=["ir"]) + + # if no separation, no warning is raised + if data.separated.sum() == 0: + assert len(record) == 0 @pytest.mark.parametrize("fml", ["Y ~ X1", "Y ~ X1 | f1"])