v0.7.4 codes following PEP8

jhwjhw0123 · Jul 24, 2019 · 6c1c49f · 6c1c49f
1 parent 1393190
commit 6c1c49f
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 48 deletions.
diff --git a/imxgboost/__init__.py b/imxgboost/__init__.py
@@ -1,8 +1,2 @@
 import sys
-sys.path.append("..")
-import imxgboost.weighted_loss
-from imxgboost.weighted_loss import Weight_Binary_Cross_Entropy
-import imxgboost.focal_loss
-from imxgboost.focal_loss import Focal_Binary_Loss
-import imxgboost.imbalance_xgb
-from imxgboost.imbalance_xgb import imbalance_xgboost
+sys.path.append("..")
diff --git a/imxgboost/focal_loss.py b/imxgboost/focal_loss.py
@@ -31,13 +31,14 @@ def focal_binary_object(self, pred, dtrain):
         g4 = 1 - label - ((-1) ** label) * sigmoid_pred
         g5 = label + ((-1) ** label) * sigmoid_pred
         # combine the gradient
-        grad = gamma_indct * g3 * self.robust_pow(g2, gamma_indct) * np.log(g4 + 1e-9) + ((-1) ** label) * self.robust_pow(g5, (
-        gamma_indct + 1))
+        grad = gamma_indct * g3 * self.robust_pow(g2, gamma_indct) * np.log(g4 + 1e-9) + \
+               ((-1) ** label) * self.robust_pow(g5, (gamma_indct + 1))
         # combine the gradient parts to get hessian components
-        hess_1 = self.robust_pow(g2, gamma_indct) + gamma_indct * ((-1) ** label) * g3 * self.robust_pow(g2, (gamma_indct - 1))
+        hess_1 = self.robust_pow(g2, gamma_indct) + \
+                 gamma_indct * ((-1) ** label) * g3 * self.robust_pow(g2, (gamma_indct - 1))
         hess_2 = ((-1) ** label) * g3 * self.robust_pow(g2, gamma_indct) / g4
         # get the final 2nd order derivative
-        hess = ((hess_1 * np.log(g4 + 1e-9) - hess_2) * gamma_indct + (gamma_indct + 1) * self.robust_pow(g5,
-                                                                                                     gamma_indct)) * g1
+        hess = ((hess_1 * np.log(g4 + 1e-9) - hess_2) * gamma_indct +
+                (gamma_indct + 1) * self.robust_pow(g5, gamma_indct)) * g1
 
         return grad, hess
diff --git a/imxgboost/imbalance_xgb.py b/imxgboost/imbalance_xgb.py
@@ -1,39 +1,43 @@
-import sys
-sys.path.append("..")
 import numpy as np
 import xgboost as xgb
 from imxgboost.weighted_loss import Weight_Binary_Cross_Entropy
 from imxgboost.focal_loss import Focal_Binary_Loss
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
 
+
 def evalerror(preds, dtrain):
     labels = dtrain.get_label()
     # return a pair metric_name, result
     # since preds are margin(before logistic transformation, cutoff at 0)
-    
+
     return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
 
+
 def two_class_encoding(flat_prediction):
-    if len(np.shape(flat_prediction))==2:
+    if len(np.shape(flat_prediction)) == 2:
         return flat_prediction
     else:
         # class 1 probability
         class_one_prob = 1.0 / (1.0 + np.exp(-flat_prediction))
-        class_one_prob = np.reshape(class_one_prob,[-1,1])
+        class_one_prob = np.reshape(class_one_prob, [-1, 1])
         # class 0 probability
         class_zero_prob = 1 - class_one_prob
-        class_zero_prob = np.reshape(class_zero_prob,[-1,1])
+        class_zero_prob = np.reshape(class_zero_prob, [-1, 1])
         # concatenate the probabilities to get the final prediction
-        sigmoid_two_class_pred = np.concatenate((class_zero_prob,class_one_prob),axis=1)
-    
+        sigmoid_two_class_pred = np.concatenate((class_zero_prob, class_one_prob), axis=1)
+
         return sigmoid_two_class_pred
 
-class imbalance_xgboost(BaseEstimator,ClassifierMixin):
+
+class imbalance_xgboost(BaseEstimator, ClassifierMixin):
     """Data in the form of [nData * nDim], where nDim stands for the number of features.
        This wrapper would provide a Xgboost interface with sklearn estimiator structure, which could be stacked in other Sk pipelines
     """
-    def __init__(self,num_round=10,max_depth=10,eta=0.3,silent_mode=True,objective_func='binary:logitraw',eval_metric='logloss',booster='gbtree',special_objective=None, imbalance_alpha=None, focal_gamma=None):
+
+    def __init__(self, num_round=10, max_depth=10, eta=0.3, silent_mode=True, objective_func='binary:logitraw',
+                 eval_metric='logloss', booster='gbtree', special_objective=None, imbalance_alpha=None,
+                 focal_gamma=None):
         """
         Parameters to initialize a Xgboost estimator
         :param num_round. The rounds we would like to iterate to train the model
@@ -60,35 +64,34 @@ def __init__(self,num_round=10,max_depth=10,eta=0.3,silent_mode=True,objective_f
         self.imbalance_alpha = imbalance_alpha
         self.focal_gamma = focal_gamma
 
-
-    def fit(self,data_x,data_y):
+    def fit(self, data_x, data_y):
         if self.special_objective is None:
             # get the parameter list
             self.para_dict = {'max_depth': self.max_depth,
-                              'eta': self.eta, 
-                              'silent': self.silent_mode, 
+                              'eta': self.eta,
+                              'silent': self.silent_mode,
                               'objective': self.objective_func,
                               'eval_metric': self.eval_metric,
                               'booster': self.booster}
         else:
             # get the parameter list, without stating the objective function
             self.para_dict = {'max_depth': self.max_depth,
-                              'eta': self.eta, 
+                              'eta': self.eta,
                               'silent': self.silent_mode,
                               'eval_metric': self.eval_metric,
                               'booster': self.booster}
         # make sure data is in [nData * nSample] format
-        assert len(data_x.shape)==2
+        assert len(data_x.shape) == 2
         # check if data length is the same
-        if data_x.shape[0]!=data_y.shape[0]:
+        if data_x.shape[0] != data_y.shape[0]:
             raise ValueError('The numbner of instances for x and y data should be the same!')
         # data_x is in [nData*nDim]
         nData = data_x.shape[0]
         nDim = data_x.shape[1]
         # split the data into train and validation
         holistic_ind = np.random.permutation(nData)
-        train_ind = holistic_ind[0:nData*3//4]
-        valid_ind = holistic_ind[nData*3//4:nData]
+        train_ind = holistic_ind[0:nData * 3 // 4]
+        valid_ind = holistic_ind[nData * 3 // 4:nData]
         # indexing and get the data
         train_data = data_x[train_ind]
         train_label = data_y[train_ind]
@@ -108,46 +111,50 @@ def fit(self,data_x,data_y):
             # construct the object with imbalanced alpha value
             weighted_loss_obj = Weight_Binary_Cross_Entropy(imbalance_alpha=self.imbalance_alpha)
             # fit the classfifier
-            self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list, obj=weighted_loss_obj.weighted_binary_cross_entropy, feval=evalerror, verbose_eval=False)
+            self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list,
+                                            obj=weighted_loss_obj.weighted_binary_cross_entropy, feval=evalerror,
+                                            verbose_eval=False)
         elif self.special_objective == 'focal':
             # if the gamma value is None then raise an error
             if self.focal_gamma is None:
                 raise ValueError('Argument focal_gamma must have a value when the objective is \'focal\'!')
             # construct the object with focal gamma value
             focal_loss_obj = Focal_Binary_Loss(gamma_indct=self.focal_gamma)
             # fit the classfifier
-            self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list, obj=focal_loss_obj.focal_binary_object, feval=evalerror, verbose_eval=False)
+            self.boosting_model = xgb.train(self.para_dict, dtrain, self.num_round, self.eval_list,
+                                            obj=focal_loss_obj.focal_binary_object, feval=evalerror, verbose_eval=False)
         else:
-            raise ValueError('The input special objective mode not recognized! Could only be \'weighted\' or \'focal\', but got '+str(self.special_objective))
+            raise ValueError(
+                'The input special objective mode not recognized! Could only be \'weighted\' or \'focal\', but got ' + str(
+                    self.special_objective))
 
-
-    def predict(self,data_x,y=None):
+    def predict(self, data_x, y=None):
         # matrixilize
         if y is not None:
             try:
-                dtest = xgb.DMatrix(data_x,label=y)
+                dtest = xgb.DMatrix(data_x, label=y)
             except:
                 raise ValueError('Test data invalid!')
         else:
             dtest = xgb.DMatrix(data_x)
-        
+
         prediction_output = self.boosting_model.predict(dtest)
-        
+
         return prediction_output
 
-    def predict_sigmoid(self,data_x, y=None):
+    def predict_sigmoid(self, data_x, y=None):
         # sigmoid output, for the prob = 1
 
         raw_output = self.predict(data_x, y)
         sigmoid_output = 1. / (1. + np.exp(-raw_output))
 
         return sigmoid_output
-    
-    def predict_determine(self,data_x,y=None):
+
+    def predict_determine(self, data_x, y=None):
         # deterministic output
         sigmoid_output = self.predict_sigmoid(data_x, y)
         prediction_output = np.round(sigmoid_output)
-        
+
         return prediction_output
 
     def predict_two_class(self, data_x, y=None):
@@ -161,11 +168,11 @@ def score(self, X, y, sample_weight=None):
         score_pred = accuracy_score(y_true=y, y_pred=label_pred)
 
         return score_pred
-    
+
     def score_eval_func(self, y_true, y_pred, mode='accuracy'):
         prob_pred = two_class_encoding(y_pred)
         label_pred = np.argmax(prob_pred, axis=1)
-        if mode=='accuracy':
+        if mode == 'accuracy':
             score_pred = accuracy_score(y_true=y_true, y_pred=label_pred)
         elif mode == 'precision':
             score_pred = precision_score(y_true=y_true, y_pred=label_pred)
@@ -178,5 +185,25 @@ def score_eval_func(self, y_true, y_pred, mode='accuracy'):
         else:
             raise ValueError('Score function mode unrecognized! Must from one in the list '
                              '[\'accuracy\', \'precision\',\'recall\',\'f1\',\'MCC\']')
-
-        return score_pred
+
+        return score_pred
+
+    def correct_eval_func(self, y_true, y_pred, mode='TP'):
+        # get the predictions first
+        prob_pred = two_class_encoding(y_pred)
+        label_pred = np.argmax(prob_pred, axis=1)
+        # logic-not for the tn predictions
+        y_true_negative = np.logical_not(y_true)
+        y_pred_negative = np.logical_not(label_pred)
+        # return values based on cases
+        if mode == 'TP':
+            return np.sum(np.logical_and(y_true, label_pred))
+        elif mode == 'TN':
+            return np.sum(np.logical_and(y_true_negative, y_pred_negative))
+        elif mode == 'FP':
+            return np.sum(np.logical_and(y_true_negative, label_pred))
+        elif mode == 'FN':
+            return np.sum(np.logical_and(y_true, y_pred_negative))
+        else:
+            raise ValueError('Corrective evaluation mode not recognized! '
+                             'Must be one of \'TP\', \'TN\', \'FP\', or \'FN\'')
diff --git a/imxgboost/weighted_loss.py b/imxgboost/weighted_loss.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 class Weight_Binary_Cross_Entropy:
     '''
     The class of binary cross entropy loss, allows the users to change the weight parameter
@@ -22,4 +23,4 @@ def weighted_binary_cross_entropy(self, pred, dtrain):
         grad = -(imbalance_alpha ** label) * (label - sigmoid_pred)
         hess = (imbalance_alpha ** label) * sigmoid_pred * (1.0 - sigmoid_pred)
 
-        return grad, hess
+        return grad, hess