-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexp3.py
111 lines (104 loc) · 2.73 KB
/
exp3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
from scipy.stats import bernoulli
from math import log,sqrt
Totalunits = [1000,10000,20000,30000,40000,50000,60000,70000,80000,90000,100000]
total_reward = np.zeros(len(Totalunits))
k =0;
for L in Totalunits:
for x in range(0,100):
N=5
eta=sqrt(2*N/L)
# L=1000
mu=0.1
R=30
def get_ber_data(p):
result = bernoulli.rvs(p,size=1)
return result[0]
def recursive(c):
a=get_ber_data(1-mu)
if a==1:
return c
else:
temp=np.random.rand(1)[0]*(1-c)+c
return recursive(temp)
def resampling(c):
a=get_ber_data(1-mu)
alpha=0.0
beta=0.0
if a==1:
alpha=c
beta=c
else:
beta=np.random.rand(1)[0]*(1-c)+c
alpha=recursive(beta)
return [alpha,beta]
class Agents(object):
"""docstring for Agents"""
def __init__(self):
super(Agents, self).__init__()
self.Costs=np.random.rand(5)
self.Capacities=np.ones(5)*L/N+4*L/N*np.random.rand(5)
self.Qualities=np.array([0.85742449, 0.84950945, 0.67021904, 0.8082646 , 0.71201124])
def getBids(self):
bids=np.vstack((self.Costs,self.Capacities)).T
return bids
def reward(self,i):
quality=self.Qualities[i]
return get_ber_data(quality)
def rewards(self):
a=np.ones(N)
for i in range(N):
a[i]=self.reward(i)
return a
def getCapacitiesFulfilled(self,numberOfTimesPlayed):
return numberOfTimesPlayed<self.Capacities
def compareBeta(self,beta):
return self.Costs<beta
a=Agents()
bids=a.getBids()
modifiedBids=[]
for bid in bids:
b=resampling(bid[0])
modifiedBids.insert(len(modifiedBids),b)
s=np.full(N,1.0)
weights=np.full(N,1.0)
probabilities=np.full(N,1.0)
numberOfTimesPlayed=np.full(N,0)
empericalQuality=np.full(N,0)
qualityUpperBound=np.full(N,1.0)
t=1
modifiedBids=np.array(modifiedBids)
# print(modifiedBids)
while t<L:
#step7
H=2*modifiedBids[:,0]
#step8 p1
weights=np.exp(eta*s)
temp2=1*a.getCapacitiesFulfilled(numberOfTimesPlayed)
weights=weights*temp2
probabilities=weights/np.sum(weights)
i=np.random.choice(np.arange(0,N),p=probabilities)
gi=R*qualityUpperBound[i]-H[i]
#step9
if gi>0:
#step 10,11
reward=a.reward(i)
# print(reward)
total_reward[k]+= R*reward-H[i]
empericalQuality[i]=(empericalQuality[i]*numberOfTimesPlayed[i]+reward)/(numberOfTimesPlayed[i]+1)
numberOfTimesPlayed[i]+=1
qualityUpperBound[i]=empericalQuality[i]+sqrt(log(t)/(2*numberOfTimesPlayed[i]))
s[i]=s[i]+reward/probabilities[i]
#step 12,13
else:
break
t=t+1
# print(bids[:,1])
P=1/mu*numberOfTimesPlayed*(1-bids[:,0])
temp=1*a.compareBeta(modifiedBids[:,1])
P=P*temp
T=bids[:,0]*numberOfTimesPlayed+P
# print(T)
print(total_reward[k]/(L*100))
k = k+1
# print(modifiedBids)