Skip to content

Commit

Permalink
Merge pull request #268 from DoubleML/s-update-aggregation
Browse files Browse the repository at this point in the history
Update Var aggregation
  • Loading branch information
SvenKlaassen authored Sep 9, 2024
2 parents a043d8b + 6445c3e commit 31f7388
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 28 deletions.
14 changes: 9 additions & 5 deletions doubleml/utils/_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,19 @@ def abs_ipw_score(theta):


def _aggregate_coefs_and_ses(all_coefs, all_ses, var_scaling_factors):
if var_scaling_factors.shape == (all_coefs.shape[0],):
scaling_factors = np.repeat(var_scaling_factors[:, np.newaxis], all_coefs.shape[1], axis=1)
else:
scaling_factors = var_scaling_factors
# aggregation is done over dimension 1, such that the coefs and ses have to be of shape (n_coefs, n_rep)
coefs = np.median(all_coefs, 1)
coefs_deviations = np.square(all_coefs - coefs.reshape(-1, 1))

rescaled_variances = np.multiply(np.square(all_ses), var_scaling_factors.reshape(-1, 1))

var = np.median(rescaled_variances + coefs_deviations, 1)
ses = np.sqrt(np.divide(var, var_scaling_factors))
coefs_deviations = np.square(all_coefs - coefs.reshape(-1, 1))
scaled_coef_deviations = np.divide(coefs_deviations, scaling_factors)
all_variances = np.square(all_ses) + scaled_coef_deviations

var = np.median(all_variances, 1)
ses = np.sqrt(var)
return coefs, ses


Expand Down
82 changes: 59 additions & 23 deletions doubleml/utils/tests/test_var_est_and_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,71 @@ def n_rep(request):
return request.param


@pytest.fixture(scope='module',
params=[1, 5])
def n_coefs(request):
return request.param


@pytest.fixture(scope='module')
def test_var_est_and_aggr_fixture(n_rep):
n_obs = 100
psi = np.random.normal(size=(n_obs, n_rep))
psi_deriv = np.ones((n_obs, n_rep))

all_thetas = np.mean(psi, axis=0).reshape((-1, n_rep))
all_ses = np.zeros(n_rep).reshape((-1, n_rep))
all_var_scaling_factors = np.zeros(n_rep)

for i_rep in range(n_rep):
var_estimate, var_scaling_factor = _var_est(
psi=psi[:, i_rep],
psi_deriv=psi_deriv[:, i_rep],
smpls=None,
is_cluster_data=False
)
all_ses[0, i_rep] = np.sqrt(var_estimate)
all_var_scaling_factors[i_rep] = var_scaling_factor
def test_var_est_and_aggr_fixture(n_rep, n_coefs):
np.random.seed(42)

all_thetas = np.full((n_coefs, n_rep), np.nan)
all_ses = np.full((n_coefs, n_rep), np.nan)
expected_all_var = np.full((n_coefs, n_rep), np.nan)
all_var_scaling_factors = np.full((n_coefs, n_rep), np.nan)

for i_coef in range(n_coefs):
n_obs = np.random.randint(100, 200)
for i_rep in range(n_rep):

psi = np.random.normal(size=(n_obs))
psi_deriv = np.ones((n_obs))

all_thetas[i_coef, i_rep] = np.mean(psi)

var_estimate, var_scaling_factor = _var_est(
psi=psi,
psi_deriv=psi_deriv,
smpls=None,
is_cluster_data=False
)

all_ses[i_coef, i_rep] = np.sqrt(var_estimate)
all_var_scaling_factors[i_coef, i_rep] = var_scaling_factor

expected_theta = np.median(all_thetas, axis=1)
for i_coef in range(n_coefs):
for i_rep in range(n_rep):
theta_deviation = np.square(all_thetas[i_coef, i_rep] - expected_theta[i_coef])
expected_all_var[i_coef, i_rep] = np.square(all_ses[i_coef, i_rep]) + \
np.divide(theta_deviation, all_var_scaling_factors[i_coef, i_rep])

expected_se = np.sqrt(np.median(expected_all_var, axis=1))

# without n_rep
theta, se = _aggregate_coefs_and_ses(
all_coefs=all_thetas,
all_ses=all_ses,
var_scaling_factors=np.full(1, n_obs),
var_scaling_factors=all_var_scaling_factors[:, 0],
)

expected_theta = np.median(all_thetas)
expected_se = np.sqrt(np.median(
np.square(all_ses) + np.square(all_thetas - expected_theta) / n_obs
)
# with n_rep
theta_2, se_2 = _aggregate_coefs_and_ses(
all_coefs=all_thetas,
all_ses=all_ses,
var_scaling_factors=all_var_scaling_factors,
)

result_dict = {
'theta': theta,
'se': se,
'theta_2': theta_2,
'se_2': se_2,
'expected_theta': expected_theta,
'expected_se': expected_se,
'all_var_scaling_factors': all_var_scaling_factors,
}
return result_dict

Expand All @@ -57,6 +85,10 @@ def test_aggregate_theta(test_var_est_and_aggr_fixture):
test_var_est_and_aggr_fixture['theta'],
test_var_est_and_aggr_fixture['expected_theta']
)
assert np.allclose(
test_var_est_and_aggr_fixture['theta_2'],
test_var_est_and_aggr_fixture['expected_theta']
)


@pytest.mark.ci
Expand All @@ -65,3 +97,7 @@ def test_aggregate_se(test_var_est_and_aggr_fixture):
test_var_est_and_aggr_fixture['se'],
test_var_est_and_aggr_fixture['expected_se']
)
assert np.allclose(
test_var_est_and_aggr_fixture['se_2'],
test_var_est_and_aggr_fixture['expected_se']
)

0 comments on commit 31f7388

Please sign in to comment.