Skip to content

Commit

Permalink
Merge pull request #74 from s3alfisc/dev-iv
Browse files Browse the repository at this point in the history
IV Functionality
  • Loading branch information
s3alfisc authored May 13, 2023
2 parents 822637e + ee23bca commit cfc6bf9
Show file tree
Hide file tree
Showing 16 changed files with 568 additions and 129 deletions.
1 change: 1 addition & 0 deletions docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Supported covariance types are "iid", "HC1-3", CRV1 and CRV3 (one-way clustering
`.vcov()` method:

```py

fixest.vcov({'CRV1':'group_id'}).summary()
# >>> fixest.vcov({'CRV1':'group_id'}).summary()
#
Expand Down
49 changes: 27 additions & 22 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

126 changes: 103 additions & 23 deletions pyfixest/FormulaParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,50 @@ def __init__(self, fml):
fml_split = fml.split('|')
depvars, covars = fml_split[0].split("~")

if len(fml_split) > 1:
fevars = fml_split[1]
else:
if len(fml_split) == 1:
fevars = "0"
endogvars = None
instruments = None
elif len(fml_split) == 2:
if "~" in fml_split[1]:
fevars = "0"
endogvars, instruments = fml_split[1].split("~")
else:
fevars = fml_split[1]
endogvars = None
instruments = None
elif len(fml_split) == 3:
fevars = fml_split[1]
endogvars, instruments = fml_split[2].split("~")

if endogvars is not None:
if len(endogvars) > len(instruments):
raise ValueError("The IV system is underdetermined. Only fully determined systems are allowed. Please provide as many instruments as endogenous variables.")
elif len(endogvars) < len(instruments):
raise ValueError("The IV system is overdetermined. Only fully determined systems are allowed. Please provide as many instruments as endogenous variables.")
else:
pass

# Parse all individual formula components into lists
self.depvars = depvars.split("+")
self.covars = _unpack_fml(covars)
self.fevars = _unpack_fml(fevars)
# no fancy syntax for endogvars, instruments allowed
self.endogvars = endogvars
self.instruments = instruments

if instruments is not None:
self.is_iv = True
# all rhs variables for the first stage (endog variable replaced with instrument)
first_stage_covars_list = covars.split("+")
first_stage_covars_list[first_stage_covars_list.index(endogvars)] = instruments
self.first_stage_covars_list = "+".join(first_stage_covars_list)
self.covars_first_stage = _unpack_fml(self.first_stage_covars_list)
self.depvars_first_stage = endogvars
else:
self.is_iv = False
self.covars_first_stage = None
self.depvars_first_stage = None

if self.covars.get("i") is not None:
self.ivars = dict()
Expand All @@ -81,7 +116,10 @@ def __init__(self, fml):
# Pack the formula components back into strings
self.covars_fml = _pack_to_fml(self.covars)
self.fevars_fml = _pack_to_fml(self.fevars)

if instruments is not None:
self.covars_first_stage_fml = _pack_to_fml(self.covars_first_stage)
else:
self.covars_first_stage_fml = None
#if "^" in self.covars:
# raise CovariateInteractionError("Please use 'i()' or ':' syntax to interact covariates.")

Expand All @@ -93,63 +131,105 @@ def __init__(self, fml):



def get_fml_dict(self):
def get_fml_dict(self, iv = False):

"""
Returns a dictionary of all fevars & formula without fevars. The keys are the fixed effect variable combinations.
The values are lists of formula strings that do not include the fixed effect variables.
Args:
iv (bool): If True, the formula dictionary will be returned for the first stage of an IV regression.
If False, the formula dictionary will be returned for the second stage of an IV regression / OLS regression.
Returns:
dict: A dictionary of the form {"fe1+fe2": ['Y1 ~ X', 'Y2~X'], "fe1+fe3": ['Y1 ~ X', 'Y2~X']} where
the keys are the fixed effect variable combinations and the values are lists of formula strings
that do not include the fixed effect variables.
If IV is True, creates an instance named fml_dict_iv. Otherwise, creates an instance named fml_dict.
"""

self.fml_dict = dict()

fml_dict = dict()
for fevar in self.fevars_fml:
res = []
for depvar in self.depvars:
for covar in self.covars_fml:
res.append(depvar + '~' + covar)
self.fml_dict[fevar] = res
if iv:
for covar in self.covars_first_stage_fml:
res.append(depvar + '~' + covar)
else:
for covar in self.covars_fml:
res.append(depvar + '~' + covar)
fml_dict[fevar] = res

if iv:
self.fml_dict_iv = fml_dict
else:
self.fml_dict = fml_dict

def _transform_fml_dict(self):
def _transform_fml_dict(self, iv = False):

fml_dict2 = dict()

for fe in self.fml_dict.keys():
if iv:

fml_dict2[fe] = dict()
for fe in self.fml_dict_iv.keys():

for fml in self.fml_dict.get(fe):
depvars, covars = fml.split("~")
if fml_dict2[fe].get(depvars) is None:
fml_dict2[fe][depvars] = [covars]
else:
fml_dict2[fe][depvars].append(covars)
fml_dict2[fe] = dict()

self.fml_dict2 = fml_dict2
for fml in self.fml_dict_iv.get(fe):
depvars, covars = fml.split("~")
if fml_dict2[fe].get(depvars) is None:
fml_dict2[fe][depvars] = [covars]
else:
fml_dict2[fe][depvars].append(covars)
else:

for fe in self.fml_dict.keys():

fml_dict2[fe] = dict()

for fml in self.fml_dict.get(fe):
depvars, covars = fml.split("~")
if fml_dict2[fe].get(depvars) is None:
fml_dict2[fe][depvars] = [covars]
else:
fml_dict2[fe][depvars].append(covars)

if iv:
self.fml_dict2_iv = fml_dict2
else:
self.fml_dict2 = fml_dict2



def get_var_dict(self):
def get_var_dict(self, iv = False):

"""
Create a dictionary of all fevars and list of covars and depvars used in regression with those fevars.
The keys are the fixed effect variable combinations. The values are lists of variables (dependent variables and covariates) of
the resespective regressions.
Args:
iv (bool): If True, the formula dictionary will be returned for the first stage of an IV regression.
Returns:
dict: A dictionary of the form {"fe1+fe2": ['Y1', 'X1', 'X2'], "fe1+fe3": ['Y1', 'X1', 'X2']} where
the keys are the fixed effect variable combinations and the values are lists of variables
(dependent variables and covariates) used in the regression with those fixed effect variables.
"""
self.var_dict = dict()
for fevar in self.fevars_fml:
self.var_dict[fevar] = _flatten_list(self.depvars) + _flatten_list(list(self.covars.values()))
var_dict = dict()
if iv:
for fevar in self.fevars_fml:
var_dict[fevar] = _flatten_list(self.depvars) + _flatten_list(list(self.covars_first_stage.values()))

else:
for fevar in self.fevars_fml:
var_dict[fevar] = _flatten_list(self.depvars) + _flatten_list(list(self.covars.values()))

if iv:
self.var_dict_iv = var_dict
else:
self.var_dict = var_dict


def _unpack_fml(x):
Expand Down
Binary file modified pyfixest/__pycache__/FormulaParser.cpython-310.pyc
Binary file not shown.
Binary file modified pyfixest/__pycache__/feols.cpython-310.pyc
Binary file not shown.
Binary file modified pyfixest/__pycache__/fixest.cpython-310.pyc
Binary file not shown.
Binary file modified pyfixest/__pycache__/utils.cpython-310.pyc
Binary file not shown.
Loading

0 comments on commit cfc6bf9

Please sign in to comment.