Skip to content

Commit

Permalink
v0.7.4 codes following PEP8
Browse files Browse the repository at this point in the history
  • Loading branch information
jhwjhw0123 authored Jul 24, 2019
1 parent 1393190 commit 6c1c49f
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 48 deletions.
8 changes: 1 addition & 7 deletions imxgboost/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,2 @@
import sys
sys.path.append("..")
import imxgboost.weighted_loss
from imxgboost.weighted_loss import Weight_Binary_Cross_Entropy
import imxgboost.focal_loss
from imxgboost.focal_loss import Focal_Binary_Loss
import imxgboost.imbalance_xgb
from imxgboost.imbalance_xgb import imbalance_xgboost
sys.path.append("..")
11 changes: 6 additions & 5 deletions imxgboost/focal_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ def focal_binary_object(self, pred, dtrain):
g4 = 1 - label - ((-1) ** label) * sigmoid_pred
g5 = label + ((-1) ** label) * sigmoid_pred
# combine the gradient
grad = gamma_indct * g3 * self.robust_pow(g2, gamma_indct) * np.log(g4 + 1e-9) + ((-1) ** label) * self.robust_pow(g5, (
gamma_indct + 1))
grad = gamma_indct * g3 * self.robust_pow(g2, gamma_indct) * np.log(g4 + 1e-9) + \
((-1) ** label) * self.robust_pow(g5, (gamma_indct + 1))
# combine the gradient parts to get hessian components
hess_1 = self.robust_pow(g2, gamma_indct) + gamma_indct * ((-1) ** label) * g3 * self.robust_pow(g2, (gamma_indct - 1))
hess_1 = self.robust_pow(g2, gamma_indct) + \
gamma_indct * ((-1) ** label) * g3 * self.robust_pow(g2, (gamma_indct - 1))
hess_2 = ((-1) ** label) * g3 * self.robust_pow(g2, gamma_indct) / g4
# get the final 2nd order derivative
hess = ((hess_1 * np.log(g4 + 1e-9) - hess_2) * gamma_indct + (gamma_indct + 1) * self.robust_pow(g5,
gamma_indct)) * g1
hess = ((hess_1 * np.log(g4 + 1e-9) - hess_2) * gamma_indct +
(gamma_indct + 1) * self.robust_pow(g5, gamma_indct)) * g1

return grad, hess
97 changes: 62 additions & 35 deletions imxgboost/imbalance_xgb.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,43 @@
import sys
sys.path.append("..")
import numpy as np
import xgboost as xgb
from imxgboost.weighted_loss import Weight_Binary_Cross_Entropy
from imxgboost.focal_loss import Focal_Binary_Loss
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef


def evalerror(preds, dtrain):
labels = dtrain.get_label()
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)

return 'error', float(sum(labels != (preds > 0.0))) / len(labels)


def two_class_encoding(flat_prediction):
if len(np.shape(flat_prediction))==2:
if len(np.shape(flat_prediction)) == 2:
return flat_prediction
else:
# class 1 probability
class_one_prob = 1.0 / (1.0 + np.exp(-flat_prediction))
class_one_prob = np.reshape(class_one_prob,[-1,1])
class_one_prob = np.reshape(class_one_prob, [-1, 1])
# class 0 probability
class_zero_prob = 1 - class_one_prob
class_zero_prob = np.reshape(class_zero_prob,[-1,1])
class_zero_prob = np.reshape(class_zero_prob, [-1, 1])
# concatenate the probabilities to get the final prediction
sigmoid_two_class_pred = np.concatenate((class_zero_prob,class_one_prob),axis=1)
sigmoid_two_class_pred = np.concatenate((class_zero_prob, class_one_prob), axis=1)

return sigmoid_two_class_pred

class imbalance_xgboost(BaseEstimator,ClassifierMixin):

class imbalance_xgboost(BaseEstimator, ClassifierMixin):
"""Data in the form of [nData * nDim], where nDim stands for the number of features.
This wrapper would provide a Xgboost interface with sklearn estimiator structure, which could be stacked in other Sk pipelines
"""
def __init__(self,num_round=10,max_depth=10,eta=0.3,silent_mode=True,objective_func='binary:logitraw',eval_metric='logloss',booster='gbtree',special_objective=None, imbalance_alpha=None, focal_gamma=None):

def __init__(self, num_round=10, max_depth=10, eta=0.3, silent_mode=True, objective_func='binary:logitraw',
eval_metric='logloss', booster='gbtree', special_objective=None, imbalance_alpha=None,
focal_gamma=None):
"""
Parameters to initialize a Xgboost estimator
:param num_round. The rounds we would like to iterate to train the model
Expand All @@ -60,35 +64,34 @@ def __init__(self,num_round=10,max_depth=10,eta=0.3,silent_mode=True,objective_f
self.imbalance_alpha = imbalance_alpha
self.focal_gamma = focal_gamma


def fit(self,data_x,data_y):
def fit(self, data_x, data_y):
if self.special_objective is None:
# get the parameter list
self.para_dict = {'max_depth': self.max_depth,
'eta': self.eta,
'silent': self.silent_mode,
'eta': self.eta,
'silent': self.silent_mode,
'objective': self.objective_func,
'eval_metric': self.eval_metric,
'booster': self.booster}
else:
# get the parameter list, without stating the objective function
self.para_dict = {'max_depth': self.max_depth,
'eta': self.eta,
'eta': self.eta,
'silent': self.silent_mode,
'eval_metric': self.eval_metric,
'booster': self.booster}
# make sure data is in [nData * nSample] format
assert len(data_x.shape)==2
assert len(data_x.shape) == 2
# check if data length is the same
if data_x.shape[0]!=data_y.shape[0]:
if data_x.shape[0] != data_y.shape[0]:
raise ValueError('The numbner of instances for x and y data should be the same!')
# data_x is in [nData*nDim]
nData = data_x.shape[0]
nDim = data_x.shape[1]
# split the data into train and validation
holistic_ind = np.random.permutation(nData)
train_ind = holistic_ind[0:nData*3//4]
valid_ind = holistic_ind[nData*3//4:nData]
train_ind = holistic_ind[0:nData * 3 // 4]
valid_ind = holistic_ind[nData * 3 // 4:nData]
# indexing and get the data
train_data = data_x[train_ind]
train_label = data_y[train_ind]
Expand All @@ -108,46 +111,50 @@ def fit(self,data_x,data_y):
# construct the object with imbalanced alpha value
weighted_loss_obj = Weight_Binary_Cross_Entropy(imbalance_alpha=self.imbalance_alpha)
# fit the classfifier
self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list, obj=weighted_loss_obj.weighted_binary_cross_entropy, feval=evalerror, verbose_eval=False)
self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list,
obj=weighted_loss_obj.weighted_binary_cross_entropy, feval=evalerror,
verbose_eval=False)
elif self.special_objective == 'focal':
# if the gamma value is None then raise an error
if self.focal_gamma is None:
raise ValueError('Argument focal_gamma must have a value when the objective is \'focal\'!')
# construct the object with focal gamma value
focal_loss_obj = Focal_Binary_Loss(gamma_indct=self.focal_gamma)
# fit the classfifier
self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list, obj=focal_loss_obj.focal_binary_object, feval=evalerror, verbose_eval=False)
self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list,
obj=focal_loss_obj.focal_binary_object, feval=evalerror, verbose_eval=False)
else:
raise ValueError('The input special objective mode not recognized! Could only be \'weighted\' or \'focal\', but got '+str(self.special_objective))
raise ValueError(
'The input special objective mode not recognized! Could only be \'weighted\' or \'focal\', but got ' + str(
self.special_objective))


def predict(self,data_x,y=None):
def predict(self, data_x, y=None):
# matrixilize
if y is not None:
try:
dtest = xgb.DMatrix(data_x,label=y)
dtest = xgb.DMatrix(data_x, label=y)
except:
raise ValueError('Test data invalid!')
else:
dtest = xgb.DMatrix(data_x)

prediction_output = self.boosting_model.predict(dtest)

return prediction_output

def predict_sigmoid(self,data_x, y=None):
def predict_sigmoid(self, data_x, y=None):
# sigmoid output, for the prob = 1

raw_output = self.predict(data_x, y)
sigmoid_output = 1. / (1. + np.exp(-raw_output))

return sigmoid_output
def predict_determine(self,data_x,y=None):

def predict_determine(self, data_x, y=None):
# deterministic output
sigmoid_output = self.predict_sigmoid(data_x, y)
prediction_output = np.round(sigmoid_output)

return prediction_output

def predict_two_class(self, data_x, y=None):
Expand All @@ -161,11 +168,11 @@ def score(self, X, y, sample_weight=None):
score_pred = accuracy_score(y_true=y, y_pred=label_pred)

return score_pred

def score_eval_func(self, y_true, y_pred, mode='accuracy'):
prob_pred = two_class_encoding(y_pred)
label_pred = np.argmax(prob_pred, axis=1)
if mode=='accuracy':
if mode == 'accuracy':
score_pred = accuracy_score(y_true=y_true, y_pred=label_pred)
elif mode == 'precision':
score_pred = precision_score(y_true=y_true, y_pred=label_pred)
Expand All @@ -178,5 +185,25 @@ def score_eval_func(self, y_true, y_pred, mode='accuracy'):
else:
raise ValueError('Score function mode unrecognized! Must from one in the list '
'[\'accuracy\', \'precision\',\'recall\',\'f1\',\'MCC\']')

return score_pred

return score_pred

def correct_eval_func(self, y_true, y_pred, mode='TP'):
# get the predictions first
prob_pred = two_class_encoding(y_pred)
label_pred = np.argmax(prob_pred, axis=1)
# logic-not for the tn predictions
y_true_negative = np.logical_not(y_true)
y_pred_negative = np.logical_not(label_pred)
# return values based on cases
if mode == 'TP':
return np.sum(np.logical_and(y_true, label_pred))
elif mode == 'TN':
return np.sum(np.logical_and(y_true_negative, y_pred_negative))
elif mode == 'FP':
return np.sum(np.logical_and(y_true_negative, label_pred))
elif mode == 'FN':
return np.sum(np.logical_and(y_true, y_pred_negative))
else:
raise ValueError('Corrective evaluation mode not recognized! '
'Must be one of \'TP\', \'TN\', \'FP\', or \'FN\'')
3 changes: 2 additions & 1 deletion imxgboost/weighted_loss.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np


class Weight_Binary_Cross_Entropy:
'''
The class of binary cross entropy loss, allows the users to change the weight parameter
Expand All @@ -22,4 +23,4 @@ def weighted_binary_cross_entropy(self, pred, dtrain):
grad = -(imbalance_alpha ** label) * (label - sigmoid_pred)
hess = (imbalance_alpha ** label) * sigmoid_pred * (1.0 - sigmoid_pred)

return grad, hess
return grad, hess

0 comments on commit 6c1c49f

Please sign in to comment.