-
Notifications
You must be signed in to change notification settings - Fork 0
/
bias.py
100 lines (91 loc) · 2.88 KB
/
bias.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from utils import find_distance
import torch
weat_career = ["executive", "management", "professional", "corporation", "salary", "office", "business", "career"]
weat_family = ["home", "parents", "children", "family", "cousins", "marriage", "wedding", "relatives"]
weat_arts = ["poetry", "art", "dance", "literature", "novel", "symphony", "drama", "sculpture", "shakespeare"]
weat_science = ["science", "technology", "physics", "chemistry", "einstein", "nasa", "experiment", "astronomy"]
weat_male = ["he", "his", "man", "male", "boy", "son", "brother", "father", "uncle", "gentleman"]
weat_female = ["she", "her", "woman", "female", "girl", "daughter", "sister", "mother", "aunt", "lady"]
categories = ["career", "family", "arts", "science"]
def bias(word, emb_list, c):
try:
for emb in emb_list:
emb[word]
except:
raise Exception("word not in embedding")
male_count = 0
female_count = 0
male_similarity = 0
female_similarity = 0
for male_word in weat_male:
try:
for emb in emb_list:
male_similarity += find_distance(emb[word], emb[male_word], c)
male_count += 1
except:
print("male except")
continue
for female_word in weat_female:
try:
for emb in emb_list:
female_similarity += find_distance(emb[word], emb[female_word], c)
female_count += 1
except:
print(female_word)
continue
compute_bias = (male_similarity / male_count) - (female_similarity / female_count)
return compute_bias
# input: category
def bias_category(category, emb_list, c):
num_valid = 0
total_bias = 0
if category == "career":
for word in weat_career:
try:
total_bias += bias(word, emb_list, c)
num_valid += 1
except:
print(word)
continue
elif category == "family":
for word in weat_family:
try:
total_bias += bias(word, emb_list, c)
num_valid += 1
except:
print(word)
continue
elif category == "arts":
for word in weat_arts:
try:
total_bias += bias(word, emb_list, c)
num_valid += 1
except:
print(word)
continue
elif category == "science":
for word in weat_science:
try:
total_bias += bias(word, emb_list, c)
num_valid += 1
except:
print(word)
continue
else:
raise Exception("category not valid")
return total_bias / num_valid
def all_bias(emb_list, categories, c):
biases = []
for category in categories:
try:
biases.append(bias_category(category, emb_list, c))
except:
continue
return biases
if __name__ == "__main__":
emb_list = []
for i in range(3):
embeddings = {"king": torch.rand(20), "queen": torch.rand(20), "man": torch.rand(20), "women": torch.rand(20), "a": torch.rand(20), "women2": torch.rand(20), "women3": torch.rand(20)}
emb_list.append(embeddings)
c=0
all_bias(emb_list, categories, c)