Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[switch] ridge modeling version to better match convergence for initial release #1173

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,5 @@ python/src/tutorials/data/R/*
python/src/tutorials/data/*
*.pkl
python/src/robyn/_deprecate/*
python/src/robyn/tutorials/output/*
python/src/robyn/tutorials/output/*
python/src/robyn/debug/*
7 changes: 5 additions & 2 deletions python/src/robyn/modeling/ridge/ridge_data_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,19 @@ def _hyper_collector(
return hyper_collect

def _geometric_adstock(self, x: pd.Series, theta: float) -> pd.Series:
# print(f"Before adstock: {x.head()}")
y = x.copy()
for i in range(1, len(x)):
y.iloc[i] += theta * y.iloc[i - 1]
# print(f"After adstock: {y.head()}")
return y

def _hill_transformation(
self, x: pd.Series, alpha: float, gamma: float
) -> pd.Series:

# Add debug self.logger.debugs
# print(f"Before hill: {x.head()}")
x_scaled = (x - x.min()) / (x.max() - x.min())
result = x_scaled**alpha / (x_scaled**alpha + gamma**alpha)

# print(f"After hill: {result.head()}")
return result
2 changes: 1 addition & 1 deletion python/src/robyn/modeling/ridge/ridge_evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def _evaluate_model(
# Calculate metrics using R-style calculations
y_train_pred = model.predict(x_norm)
metrics["rsq_train"] = self.ridge_metrics_calculator.calculate_r2_score(
y_norm, y_train_pred, x_norm.shape[1], debug=debug, iteration=iter_ng
y_norm, y_train_pred, x_norm.shape[1]
)
metrics["nrmse_train"] = self.ridge_metrics_calculator.calculate_nrmse(
y_norm, y_train_pred, debug=debug, iteration=iter_ng
Expand Down
155 changes: 79 additions & 76 deletions python/src/robyn/modeling/ridge/ridge_metrics_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,13 @@ def _calculate_decomp_spend_dist(
"iterPar",
"Elapsed",
]
# self.logger.debug(f"Decomp spend distribution debug:")
# self.logger.debug(f"Total media spend: {total_media_spend}")
# self.logger.debug(f"Total effect: {total_effect}")
# for col in paid_media_cols:
# self.logger.debug(f"{col} - effect: {all_effects[col]}, spend: {all_spends[col]}")
self.logger.debug(f"Decomp spend distribution debug:")
self.logger.debug(f"Total media spend: {total_media_spend}")
self.logger.debug(f"Total effect: {total_effect}")
for col in paid_media_cols:
self.logger.debug(
f"{col} - effect: {all_effects[col]}, spend: {all_spends[col]}"
)
return df[required_cols]

def _calculate_lambda(
Expand All @@ -163,44 +165,45 @@ def _calculate_lambda(
iteration: int = 0,
) -> Tuple[float, float]:
"""Match R's glmnet lambda calculation exactly"""
n_samples = len(y_norm)

# Standardize first before scale factor calculation
def r_scale(x):
mean = np.mean(x)
sd = np.sqrt(np.sum((x - mean) ** 2) / (len(x) - 1))
return (x - mean) / (sd if sd > 1e-10 else 1.0)

# Scale X and y first
x_scaled = np.apply_along_axis(r_scale, 0, x_norm)
y_scaled = r_scale(y_norm)

# Now calculate scale factor using scaled data
scale_factor = np.mean(np.abs(x_scaled)) * np.mean(np.abs(y_scaled))

if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug(f"\nLambda Calculation Debug (iteration {iteration}):")
self.logger.debug(f"n_samples: {n_samples}")
self.logger.debug(f"x_scaled mean abs: {np.mean(np.abs(x_scaled)):.6f}")
self.logger.debug(f"y_scaled mean abs: {np.mean(np.abs(y_scaled)):.6f}")
self.logger.debug(f"Scale factor: {scale_factor:.6f}")
self.logger.debug(f"lambda_hp: {lambda_hp:.6f}")

def mysd(y: np.ndarray) -> float:
"""R's implementation of sd"""
return np.sqrt(np.sum((y - np.mean(y)) ** 2) / len(y))

# Scale X using R's approach
x_means = np.mean(x_norm, axis=0)
x_sds = np.apply_along_axis(mysd, 0, x_norm)
sx = (x_norm - x_means) / x_sds[:, np.newaxis].T

# Handle NaN values like R does
check_nan = np.all(np.isnan(sx), axis=0)
sx = np.where(check_nan, 0, sx)

# R does not scale y in this case
sy = y_norm

# Calculate lambda_max using R's approach
# 0.001 is glmnet's default alpha value for ridge regression
lambda_max = np.max(np.abs(np.sum(sx * sy[:, np.newaxis], axis=0))) / (
0.001 * len(x_norm)
)
# R's lambda calculation
alpha = 0.001
gram = x_scaled.T @ x_scaled / n_samples
ctx = np.abs(x_scaled.T @ y_scaled) / n_samples

# Calculate lambda using lambda_hp
lambda_min = lambda_max * 0.0001 # R's default lambda_min_ratio
lambda_max = np.max(ctx) / alpha
lambda_min = lambda_max * 0.0001
lambda_ = lambda_min + lambda_hp * (lambda_max - lambda_min)

if debug and (iteration % 10 == 0):
self.logger.debug(f"\nLambda Debug (iter {iteration}):")
self.logger.debug(f"x_means: {np.mean(np.abs(x_means)):.6f}")
self.logger.debug(f"x_sds mean: {np.mean(x_sds):.6f}")
self.logger.debug(f"sx mean: {np.mean(np.abs(sx)):.6f}")
self.logger.debug(f"sy mean: {np.mean(np.abs(sy)):.6f}")
if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug(f"max ctx: {np.max(ctx):.6f}")
self.logger.debug(f"lambda_max: {lambda_max:.6f}")
self.logger.debug(f"lambda_: {lambda_:.6f}")
self.logger.debug(f"lambda_hp: {lambda_hp:.6f}")
self.logger.debug(f"lambda_min: {lambda_min:.6f}")
self.logger.debug(f"final lambda_: {lambda_:.6f}")

return float(lambda_), float(lambda_max)
return lambda_, lambda_max

def _calculate_rssd(
self,
Expand All @@ -225,11 +228,11 @@ def _calculate_rssd(
effects.append(effect)
spends.append(raw_spend)

# if debug and (iteration % 50 == 0):
# self.logger.debug(f"{col}:")
# self.logger.debug(f" coefficient: {coef:.6f}")
# self.logger.debug(f" raw spend: {raw_spend:.6f}")
# self.logger.debug(f" effect: {effect:.6f}")
if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug(f"{col}:")
self.logger.debug(f" coefficient: {coef:.6f}")
self.logger.debug(f" raw spend: {raw_spend:.6f}")
self.logger.debug(f" effect: {effect:.6f}")

# Convert to numpy arrays
effects = np.array(effects)
Expand All @@ -244,12 +247,12 @@ def _calculate_rssd(
effects_norm = effects / total_effect
spends_norm = spends / total_spend

# if debug and (iteration % 50 == 0):
# self.logger.debug("\nNormalized values:")
# self.logger.debug("Effects:", effects_norm)
# self.logger.debug("Spends:", spends_norm)
# self.logger.debug("Effect total (check=1):", np.sum(effects_norm))
# self.logger.debug("Spend total (check=1):", np.sum(spends_norm))
if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug("\nNormalized values:")
self.logger.debug("Effects:", effects_norm)
self.logger.debug("Spends:", spends_norm)
self.logger.debug("Effect total (check=1):", np.sum(effects_norm))
self.logger.debug("Spend total (check=1):", np.sum(spends_norm))

# Calculate RSSD
squared_diff = (effects_norm - spends_norm) ** 2
Expand Down Expand Up @@ -427,18 +430,12 @@ def _calculate_x_decomp_agg(
return df

def calculate_r2_score(
self,
y_true: np.ndarray,
y_pred: np.ndarray,
n_features: int,
df_int: int = 1,
debug: bool = True,
iteration: int = 0,
self, y_true: np.ndarray, y_pred: np.ndarray, n_features: int, df_int: int = 1
) -> float:
"""Match R's R² calculation exactly"""
n = len(y_true)

# Calculate R's version of R²
# Scale data like R
y_mean = np.mean(y_true)
ss_tot = np.sum((y_true - y_mean) ** 2)
ss_res = np.sum((y_true - y_pred) ** 2)
Expand All @@ -449,17 +446,10 @@ def calculate_r2_score(
# R's adjustment formula
adj_r2 = 1 - ((1 - r2) * (n - df_int) / (n - n_features - df_int))

# Match R's scale
if adj_r2 < 0:
adj_r2 = -np.sqrt(np.abs(adj_r2)) # R-style negative scaling

# if debug and (iteration % 50 == 0):
# self.logger.debug(f"R² Calculation Debug:")
# self.logger.debug(f"n: {n}")
# self.logger.debug(f"ss_tot: {ss_tot:.6f}")
# self.logger.debug(f"ss_res: {ss_res:.6f}")
# self.logger.debug(f"R²: {r2:.6f}")
# self.logger.debug(f"Adjusted R²: {adj_r2:.6f}")

return float(adj_r2)

def calculate_nrmse(
Expand All @@ -469,7 +459,7 @@ def calculate_nrmse(
debug: bool = True,
iteration: int = 0,
) -> float:
"""Calculate NRMSE with R-matching normalization"""
"""Calculate NRMSE with detailed debugging"""
n = len(y_true)
y_true = np.asarray(y_true)
y_pred = np.asarray(y_pred)
Expand All @@ -478,18 +468,31 @@ def calculate_nrmse(
residuals = y_true - y_pred
rss = np.sum(residuals**2)

# Calculate range same as R
y_range = np.max(y_true) - np.min(y_true)
# Calculate range from true values
y_min = np.min(y_true)
y_max = np.max(y_true)
scale = y_max - y_min

# Calculate RMSE
# Calculate RMSE first
rmse = np.sqrt(rss / n)
nrmse = rmse / y_range if y_range > 0 else rmse

# if debug and (iteration % 50 == 0):
# self.logger.debug(f"\nNRMSE Calculation Debug (iteration {iteration}):")
# self.logger.debug(f"RSS: {rss:.6f}")
# self.logger.debug(f"RMSE: {rmse:.6f}")
# self.logger.debug(f"Range: {y_range:.6f}")
# self.logger.debug(f"NRMSE: {nrmse:.6f}")

if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug(f"\nNRMSE Calculation Debug (iteration {iteration}):")
self.logger.debug(f"n: {n}")
self.logger.debug(f"RSS: {rss:.6f}")
self.logger.debug(f"RMSE: {rmse:.6f}")
self.logger.debug(f"y_true range: [{y_min:.6f}, {y_max:.6f}]")
self.logger.debug(f"scale: {scale:.6f}")
self.logger.debug("First 5 pairs (true, pred, residual):")
for i in range(min(5, len(y_true))):
self.logger.debug(
f" {y_true[i]:.6f}, {y_pred[i]:.6f}, {residuals[i]:.6f}"
)

# Calculate final NRMSE
nrmse = rmse / scale if scale > 0 else rmse

if debug and (iteration == 0 or iteration % 25 == 0):
self.logger.debug(f"Final NRMSE: {nrmse:.6f}")

return float(nrmse)
Loading