-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbreeder.py
156 lines (127 loc) · 4.41 KB
/
breeder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from __future__ import division
from gene import Gene
from gene_creator import GeneCreator
from titanic_boost_regressor import TitanicBoostRegressor
from titanic_boost_classifier import TitanicBoostClassifier
import numpy as np
import random
import math
from data_reader import DataReader
from various_forests import VariousForests
'''
how does a genetic algorithm work?
https://en.wikipedia.org/wiki/Genetic_algorithm
'''
class Breeder:
# create a new generation made of:
# 1) best elements of the old generation
# 2) new elements generated from the best old ones (sons)
# 3) some randome new elements, in order to add some variability
def get_new_generation( self, old, n):
gene_creator = GeneCreator()
new_generation = list()
strongest_n = 3
if(n<3):
strongest_n = n
reprods = math.ceil(n/2.5)
random_adds = 2
goods = self.take_goods( old , strongest_n )
reproducible = self.take_goods(old, reprods )
#I want to maintain old goods in my genetic pools
for i in range( 0, len(goods) ):
new_generation.append(goods[i])
#I want some sons generated by goods
for i in range( 0 , (n - strongest_n - random_adds ) ):
son = self.get_son( reproducible )
new_generation.append(son)
#I want also some randoms new borns
for i in range( 0, random_adds ):
new_generation.append( gene_creator.random_create() )
return new_generation
# from parent genes (not necessarily 2 parents.. maybe more but at least 1) generate
# a new gene with attributes takan randomly from parents
def get_son( self, parents ):
cbti = random.randint(0, (len(parents) - 1 ) )
cbt = parents[cbti].col_by_tree
ssi = random.randint(0, (len(parents) - 1 ))
ss = parents[ssi].subsample
mcwi = random.randint(0, (len(parents) - 1 ))
mcw = parents[mcwi].min_child_weight
mdi = random.randint(0, (len(parents) - 1 ))
md = parents[mdi].max_depth
nei = random.randint(0, (len(parents) - 1 ))
ne = parents[nei].n_estimators
lri = random.randint(0, (len(parents) - 1 ))
lr = parents[lri].learning_rate
wayi = random.randint(0, (len(parents) - 1 ))
way = parents[wayi].way
nnei = random.randint( 0, (len(parents) - 1 ))
n_neighbors = parents[nnei].way
son = Gene( cbt, ss, mcw, md, ne, lr, way , n_neighbors)
return son
#the first is completely random..
def get_first_generation( self, n ):
genes = list()
creator = GeneCreator()
for i in range( 0 , n):
g = creator.random_create()
genes.append(g)
return genes
#from the best to the worst, according to a fitness function able to evaluate the "level" of the gene
def order_genes( self , genes ):
result = []
genes_set = set(genes)
genes = list( genes_set ) # no doubles!
#for i in range( 0, len(genes) ):
# print( "before: " + str(genes[i].level) )
result = sorted(genes, key=lambda x: x.level, reverse=True)
#for i in range( 0, len(result) ):
# print( result[i].level )
return result
# take the best N elements
def take_goods( self, genes, n ):
goods = []
for i in range(0, len(genes) ):
g = genes[i]
goods.append(g)
goods = self.order_genes( goods )
if( len( goods ) > n):
goods = goods[ 0 : n ]
#for i in range( 0, len(goods) ):
# print( goods[i].level )
return goods
# take the best 1 element
def take_best( self, genes ):
return self.take_goods( genes, 1 )[0]
# run the algorithm itself
# generation is the population of genes in this iteration
def run(self, generation):
runned_generation = list()
data_reader = DataReader.getInstance()
X,Y,X_test,X_output = data_reader.read_data()
#for each gene of this generation
for i in range( 0 , len(generation)):
this_gene = generation[i]
# runner is which algorithm will I use:
# 0 is XGBoost Classifier
# 1 is XGBoost regressor
# 2 is SVC
# 3 is DecisionTreeClassifier
# 4 is AdaBoost applied to DecisionTreeClassifier
# 5 is GradientBoosting
# 6 is KNeighbors
# 7 is RandomForest
# 8 is RandomForest but simplified (more defaults and less configuration)
runner = None
if( this_gene.way == 0 ):
runner = TitanicBoostClassifier()
else:
if( this_gene.way == 1 ):
runner = TitanicBoostRegressor()
else:
runner = VariousForests()
runner.set_datasets( X , Y , X_test , X_output )
runner.set_gene_to_model( this_gene ) #here we configure the model
this_gene.set_fitness_level( runner.run() )
runned_generation.append(this_gene)
return runned_generation