forked from sakshiudeshi/Aequitas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Aequitas_Random_Sklearn.py
169 lines (122 loc) · 4.84 KB
/
Aequitas_Random_Sklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from __future__ import division
from random import seed, shuffle
import random
import math
import os
from collections import defaultdict
from sklearn import svm
import os,sys
import urllib2
sys.path.insert(0, './fair_classification/') # the code for fair classification is in this directory
import numpy as np
import loss_funcs as lf # loss funcs that can be optimized subject to various constraints
import random
import time
from scipy.optimize import basinhopping
import config
from sklearn.externals import joblib
random.seed(time.time())
start_time = time.time()
init_prob = 0.5
params = config.params
direction_probability = [init_prob] * params
direction_probability_change_size = 0.001
sensitive_param = config.sensitive_param
name = 'sex'
cov = 0
perturbation_unit = config.perturbation_unit
threshold = config.threshold
global_disc_inputs = set()
global_disc_inputs_list = []
local_disc_inputs = set()
local_disc_inputs_list = []
tot_inputs = set()
global_iteration_limit = 1000
local_iteration_limit = 1000
input_bounds = config.input_bounds
classifier_name = config.classifier_name
model = joblib.load(classifier_name)
class Local_Perturbation(object):
def __init__(self, stepsize=1):
self.stepsize = stepsize
def __call__(self, x):
s = self.stepsize
val = random.randint(0, 12)
act = [-1, 1]
x[val] = x[val] + random.choice(act)
x[val] = max(input_bounds[val][0], x[val])
x[val] = min(input_bounds[val][1], x[val])
return x
class Global_Discovery(object):
def __init__(self, stepsize=1):
self.stepsize = stepsize
def __call__(self, x):
s = self.stepsize
for i in xrange(params):
random.seed(time.time())
x[i] = random.randint(input_bounds[i][0], input_bounds[i][1])
x[sensitive_param - 1] = 0
# print x
return x
def evaluate_global(inp):
inp0 = [int(i) for i in inp]
inp1 = [int(i) for i in inp]
inp0[sensitive_param - 1] = 0
inp1[sensitive_param - 1] = 1
inp0 = np.asarray(inp0)
inp0 = np.reshape(inp0, (1, -1))
inp1 = np.asarray(inp1)
inp1 = np.reshape(inp1, (1, -1))
out0 = model.predict(inp0)
out1 = model.predict(inp1)
tot_inputs.add(tuple(map(tuple, inp0)))
if (abs(out0 + out1) == 0 and tuple(map(tuple, inp0)) not in global_disc_inputs):
global_disc_inputs.add(tuple(map(tuple, inp0)))
global_disc_inputs_list.append(inp0.tolist()[0])
return not abs(out0 - out1) > threshold
# for binary classification, we have found that the
# following optimization function gives better results
# return abs(out1 + out0)
def evaluate_local(inp):
inp0 = [int(i) for i in inp]
inp1 = [int(i) for i in inp]
inp0[sensitive_param - 1] = 0
inp1[sensitive_param - 1] = 1
inp0 = np.asarray(inp0)
inp0 = np.reshape(inp0, (1, -1))
inp1 = np.asarray(inp1)
inp1 = np.reshape(inp1, (1, -1))
out0 = model.predict(inp0)
out1 = model.predict(inp1)
tot_inputs.add(tuple(map(tuple, inp0)))
if (abs(out0 + out1) == 0 and (tuple(map(tuple, inp0)) not in global_disc_inputs)
and (tuple(map(tuple, inp0)) not in local_disc_inputs)):
local_disc_inputs.add(tuple(map(tuple, inp0)))
local_disc_inputs_list.append(inp0.tolist()[0])
return not abs(out0 - out1) > threshold
# for binary classification, we have found that the
# following optimization function gives better results
# return abs(out1 + out0)
initial_input = [7, 4, 26, 1, 4, 4, 0, 0, 0, 1, 5, 73, 1]
minimizer = {"method": "L-BFGS-B"}
global_discovery = Global_Discovery()
local_perturbation = Local_Perturbation()
basinhopping(evaluate_global, initial_input, stepsize=1.0, take_step=global_discovery, minimizer_kwargs=minimizer,
niter=global_iteration_limit)
print "Finished Global Search"
print "Percentage discriminatory inputs - " + str(float(len(global_disc_inputs_list)
+ len(local_disc_inputs_list)) / float(len(tot_inputs))*100)
print ""
print "Starting Local Search"
for inp in global_disc_inputs_list:
basinhopping(evaluate_local, inp, stepsize=1.0, take_step=local_perturbation, minimizer_kwargs=minimizer,
niter=local_iteration_limit)
print "Percentage discriminatory inputs - " + str(float(len(global_disc_inputs_list) + len(local_disc_inputs_list))
/ float(len(tot_inputs))*100)
print ""
print "Local Search Finished"
print "Percentage discriminatory inputs - " + str(float(len(global_disc_inputs_list) + len(local_disc_inputs_list))
/ float(len(tot_inputs))*100)
print ""
print "Total Inputs are " + str(len(tot_inputs))
print "Number of discriminatory inputs are " + str(len(global_disc_inputs_list)+len(local_disc_inputs_list))